mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[ie/orf:on] Improve extraction (#9677)
Closes #9652 Authored by: TuxCoder
This commit is contained in:
parent
be7db1a5a8
commit
0dd53faeca
1 changed files with 33 additions and 9 deletions
|
@ -14,6 +14,7 @@
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_age_limit,
|
||||||
remove_end,
|
remove_end,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
@ -569,7 +570,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class ORFONIE(InfoExtractor):
|
class ORFONIE(InfoExtractor):
|
||||||
IE_NAME = 'orf:on'
|
IE_NAME = 'orf:on'
|
||||||
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -583,32 +584,55 @@ class ORFONIE(InfoExtractor):
|
||||||
'timestamp': 1706472362,
|
'timestamp': 1706472362,
|
||||||
'upload_date': '20240128',
|
'upload_date': '20240128',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://on.orf.at/video/3220355',
|
||||||
|
'md5': 'f94d98e667cf9a3851317efb4e136662',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3220355',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 445.04,
|
||||||
|
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png',
|
||||||
|
'title': '50 Jahre Burgenland: Der Festumzug',
|
||||||
|
'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0',
|
||||||
|
'media_type': 'episode',
|
||||||
|
'timestamp': 52916400,
|
||||||
|
'upload_date': '19710905',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video(self, video_id, display_id):
|
def _extract_video(self, video_id):
|
||||||
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||||
api_json = self._download_json(
|
api_json = self._download_json(
|
||||||
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
|
||||||
|
|
||||||
|
if traverse_obj(api_json, 'is_drm_protected'):
|
||||||
|
self.report_drm(video_id)
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||||
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||||
if manifest_type == 'hls':
|
if manifest_type == 'hls':
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
manifest_url, video_id, fatal=False, m3u8_id='hls')
|
||||||
elif manifest_type == 'dash':
|
elif manifest_type == 'dash':
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
manifest_url, display_id, fatal=False, mpd_id='dash')
|
manifest_url, video_id, fatal=False, mpd_id='dash')
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
for sub_url in traverse_obj(api_json, (
|
||||||
|
'_embedded', 'subtitle',
|
||||||
|
('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})):
|
||||||
|
self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
**traverse_obj(api_json, {
|
**traverse_obj(api_json, {
|
||||||
|
'age_limit': ('age_classification', {parse_age_limit}),
|
||||||
'duration': ('duration_second', {float_or_none}),
|
'duration': ('duration_second', {float_or_none}),
|
||||||
'title': (('title', 'headline'), {str}),
|
'title': (('title', 'headline'), {str}),
|
||||||
'description': (('description', 'teaser_text'), {str}),
|
'description': (('description', 'teaser_text'), {str}),
|
||||||
|
@ -617,14 +641,14 @@ def _extract_video(self, video_id, display_id):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||||
'description': self._html_search_meta(
|
'description': self._html_search_meta(
|
||||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||||
**self._extract_video(video_id, display_id),
|
**self._extract_video(video_id),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue