mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
parent
9bd13fe5bb
commit
bfbecd1174
3 changed files with 57 additions and 2 deletions
|
@ -1083,6 +1083,7 @@
|
||||||
NewgroundsPlaylistIE,
|
NewgroundsPlaylistIE,
|
||||||
NewgroundsUserIE,
|
NewgroundsUserIE,
|
||||||
)
|
)
|
||||||
|
from .newspicks import NewsPicksIE
|
||||||
from .newstube import NewstubeIE
|
from .newstube import NewstubeIE
|
||||||
from .newsy import NewsyIE
|
from .newsy import NewsyIE
|
||||||
from .nextmedia import (
|
from .nextmedia import (
|
||||||
|
|
|
@ -3260,7 +3260,7 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||||
'subtitles': {},
|
'subtitles': {},
|
||||||
}
|
}
|
||||||
media_attributes = extract_attributes(media_tag)
|
media_attributes = extract_attributes(media_tag)
|
||||||
src = strip_or_none(media_attributes.get('src'))
|
src = strip_or_none(dict_get(media_attributes, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||||
if src:
|
if src:
|
||||||
f = parse_content_type(media_attributes.get('type'))
|
f = parse_content_type(media_attributes.get('type'))
|
||||||
_, formats = _media_formats(src, media_type, f)
|
_, formats = _media_formats(src, media_type, f)
|
||||||
|
@ -3271,7 +3271,7 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||||
s_attr = extract_attributes(source_tag)
|
s_attr = extract_attributes(source_tag)
|
||||||
# data-video-src and data-src are non standard but seen
|
# data-video-src and data-src are non standard but seen
|
||||||
# several times in the wild
|
# several times in the wild
|
||||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src', 'data-source')))
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
f = parse_content_type(s_attr.get('type'))
|
f = parse_content_type(s_attr.get('type'))
|
||||||
|
|
54
yt_dlp/extractor/newspicks.py
Normal file
54
yt_dlp/extractor/newspicks.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class NewsPicksIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://newspicks.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1813',
|
||||||
|
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||||
|
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||||
|
'channel': 'HORIE ONE',
|
||||||
|
'channel_id': '11',
|
||||||
|
'release_date': '20220117',
|
||||||
|
'thumbnail': r're:https://.+jpg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
entries = self._parse_html5_media_entries(
|
||||||
|
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||||
|
if not entries:
|
||||||
|
raise ExtractorError('No HTML5 media elements found')
|
||||||
|
info = entries[0]
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
|
|
||||||
|
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||||
|
description = self._html_search_meta(
|
||||||
|
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||||
|
channel = self._html_search_regex(
|
||||||
|
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||||
|
if not title or not channel:
|
||||||
|
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||||
|
|
||||||
|
release_date = self._search_regex(
|
||||||
|
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||||
|
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||||
|
})
|
||||||
|
return info
|
Loading…
Reference in a new issue