[bilibili] Fix extractor

Closes #2599, Closes #2562
Fixes https://github.com/yt-dlp/yt-dlp/pull/1716#issuecomment-980512982
This commit is contained in:
pukkandan 2022-02-02 18:06:04 +05:30
parent c5332d7fbb
commit 54bb39065c
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698

View file

@ -52,7 +52,7 @@ class BiliBiliIE(InfoExtractor):
'url': 'http://www.bilibili.com/video/av1074402/', 'url': 'http://www.bilibili.com/video/av1074402/',
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': { 'info_dict': {
'id': '1074402', 'id': '1074402_part1',
'ext': 'mp4', 'ext': 'mp4',
'title': '【金坷垃】金泡沫', 'title': '【金坷垃】金泡沫',
'uploader_id': '156160', 'uploader_id': '156160',
@ -73,7 +73,7 @@ class BiliBiliIE(InfoExtractor):
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
'md5': '3f721ad1e75030cc06faf73587cfec57', 'md5': '3f721ad1e75030cc06faf73587cfec57',
'info_dict': { 'info_dict': {
'id': '100643', 'id': '100643_part1',
'ext': 'mp4', 'ext': 'mp4',
'title': 'CHAOS;CHILD', 'title': 'CHAOS;CHILD',
'description': '如果你是神明并且能够让妄想成为现实。那你会进行怎么样的妄想是淫靡的世界独裁社会毁灭性的制裁还是……2015年涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', 'description': '如果你是神明并且能够让妄想成为现实。那你会进行怎么样的妄想是淫靡的世界独裁社会毁灭性的制裁还是……2015年涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
@ -82,7 +82,7 @@ class BiliBiliIE(InfoExtractor):
}, { }, {
'url': 'http://www.bilibili.com/video/av8903802/', 'url': 'http://www.bilibili.com/video/av8903802/',
'info_dict': { 'info_dict': {
'id': '8903802', 'id': '8903802_part1',
'ext': 'mp4', 'ext': 'mp4',
'title': '阿滴英文|英文歌分享#6 "Closer', 'title': '阿滴英文|英文歌分享#6 "Closer',
'upload_date': '20170301', 'upload_date': '20170301',
@ -181,8 +181,8 @@ def _real_extract(self, url):
headers.update(self.geo_verification_headers()) headers.update(self.geo_verification_headers())
video_info = self._parse_json( video_info = self._parse_json(
self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None), self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None) or '{}',
video_id, fatal=False) or {} video_id, fatal=False)
video_info = video_info.get('data') or {} video_info = video_info.get('data') or {}
durl = traverse_obj(video_info, ('dash', 'video')) durl = traverse_obj(video_info, ('dash', 'video'))
@ -257,10 +257,11 @@ def _real_extract(self, url):
self._sort_formats(formats) self._sort_formats(formats)
title = self._html_search_regex( title = self._html_search_regex((
(r'<h1[^>]+title=(["\'])(?P<title>[^"\']+)', r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)',
r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', r'(?s)<h1[^>]*>(?P<content>.+?)</h1>',
group='title', fatal=False) self._meta_regex('title')
), webpage, 'title', group='content', fatal=False)
# Get part title for anthologies # Get part title for anthologies
if page_id is not None: if page_id is not None:
@ -279,7 +280,7 @@ def _real_extract(self, url):
# TODO 'view_count' requires deobfuscating Javascript # TODO 'view_count' requires deobfuscating Javascript
info.update({ info.update({
'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id), 'id': f'{video_id}_part{page_id or 1}',
'cid': cid, 'cid': cid,
'title': title, 'title': title,
'description': description, 'description': description,