mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-11 17:55:25 -05:00
[extractor/biliIntl] Add fallback to video_data
(#5971)
Authored by: HobbyistDev
This commit is contained in:
parent
933ed882e9
commit
d37422f1db
1 changed files with 27 additions and 2 deletions
|
@ -16,6 +16,7 @@
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
@ -934,6 +935,10 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
'title': 'E2 - The First Night',
|
'title': 'E2 - The First Night',
|
||||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||||
'episode_number': 2,
|
'episode_number': 2,
|
||||||
|
'upload_date': '20201009',
|
||||||
|
'episode': 'Episode 2',
|
||||||
|
'timestamp': 1602259500,
|
||||||
|
'description': 'md5:297b5a17155eb645e14a14b385ab547e',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# Non-Bstation page
|
# Non-Bstation page
|
||||||
|
@ -944,6 +949,10 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
'title': 'E3 - Who?',
|
'title': 'E3 - Who?',
|
||||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||||
'episode_number': 3,
|
'episode_number': 3,
|
||||||
|
'description': 'md5:e1a775e71a35c43f141484715470ad09',
|
||||||
|
'episode': 'Episode 3',
|
||||||
|
'upload_date': '20211219',
|
||||||
|
'timestamp': 1639928700,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# Subtitle with empty content
|
# Subtitle with empty content
|
||||||
|
@ -956,6 +965,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
'episode_number': 140,
|
'episode_number': 140,
|
||||||
},
|
},
|
||||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bilibili.tv/en/video/2041863208',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2041863208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1670874843,
|
||||||
|
'description': 'Scheduled for April 2023.\nStudio: ufotable',
|
||||||
|
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||||
|
'upload_date': '20221212',
|
||||||
|
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -989,7 +1009,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
||||||
self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
|
self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
|
||||||
or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
|
or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
|
||||||
video_data = traverse_obj(
|
video_data = traverse_obj(
|
||||||
initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict)
|
initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
|
||||||
|
|
||||||
if season_id and not video_data:
|
if season_id and not video_data:
|
||||||
# Non-Bstation layout, read through episode list
|
# Non-Bstation layout, read through episode list
|
||||||
|
@ -998,7 +1018,12 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
||||||
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
|
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
|
||||||
), expected_type=dict, get_all=False)
|
), expected_type=dict, get_all=False)
|
||||||
|
|
||||||
return self._parse_video_metadata(video_data)
|
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||||
|
return merge_dicts(
|
||||||
|
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
|
||||||
|
'title': self._html_search_meta('og:title', webpage),
|
||||||
|
'description': self._html_search_meta('og:description', webpage)
|
||||||
|
})
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||||
|
|
Loading…
Reference in a new issue