mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[extractor/bilibili] Add space.bilibili extractors (#4468)
Authored by: lockmatrix
This commit is contained in:
parent
2314b4d89f
commit
2b9d02167f
2 changed files with 119 additions and 29 deletions
|
@ -190,7 +190,9 @@
|
||||||
BilibiliAudioIE,
|
BilibiliAudioIE,
|
||||||
BilibiliAudioAlbumIE,
|
BilibiliAudioAlbumIE,
|
||||||
BiliBiliPlayerIE,
|
BiliBiliPlayerIE,
|
||||||
BilibiliChannelIE,
|
BilibiliSpaceVideoIE,
|
||||||
|
BilibiliSpaceAudioIE,
|
||||||
|
BilibiliSpacePlaylistIE,
|
||||||
BiliIntlIE,
|
BiliIntlIE,
|
||||||
BiliIntlSeriesIE,
|
BiliIntlSeriesIE,
|
||||||
BiliLiveIE,
|
BiliLiveIE,
|
||||||
|
|
|
@ -2,8 +2,8 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import functools
|
import functools
|
||||||
import re
|
|
||||||
import math
|
import math
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -13,23 +13,24 @@
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
InAdvancePagedList,
|
||||||
|
OnDemandPagedList,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
traverse_obj,
|
|
||||||
parse_count,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
OnDemandPagedList
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -505,39 +506,126 @@ def _real_extract(self, url):
|
||||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||||
|
|
||||||
|
|
||||||
class BilibiliChannelIE(InfoExtractor):
|
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
|
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||||
_API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
|
first_page = fetch_page(1)
|
||||||
|
metadata = get_metadata(first_page)
|
||||||
|
|
||||||
|
paged_list = InAdvancePagedList(
|
||||||
|
lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page),
|
||||||
|
metadata['page_count'], metadata['page_size'])
|
||||||
|
|
||||||
|
return metadata, paged_list
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||||
|
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://space.bilibili.com/3985676/video',
|
'url': 'https://space.bilibili.com/3985676/video',
|
||||||
'info_dict': {},
|
'info_dict': {
|
||||||
'playlist_mincount': 112,
|
'id': '3985676',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 178,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, list_id):
|
def _real_extract(self, url):
|
||||||
count, max_count = 0, None
|
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||||
|
if not is_video_url:
|
||||||
|
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||||
|
'To download audios, add a "/audio" to the URL')
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
def fetch_page(page_idx):
|
||||||
data = self._download_json(
|
return self._download_json(
|
||||||
self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
|
'https://api.bilibili.com/x/space/arc/search', playlist_id,
|
||||||
|
note=f'Downloading page {page_idx}',
|
||||||
|
query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data']
|
||||||
|
|
||||||
max_count = max_count or traverse_obj(data, ('page', 'count'))
|
def get_metadata(page_data):
|
||||||
|
page_size = page_data['page']['ps']
|
||||||
|
entry_count = page_data['page']['count']
|
||||||
|
return {
|
||||||
|
'page_count': math.ceil(entry_count / page_size),
|
||||||
|
'page_size': page_size,
|
||||||
|
}
|
||||||
|
|
||||||
entries = traverse_obj(data, ('list', 'vlist'))
|
def get_entries(page_data):
|
||||||
if not entries:
|
for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
|
||||||
return
|
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||||
for entry in entries:
|
|
||||||
yield self.url_result(
|
|
||||||
'https://www.bilibili.com/video/%s' % entry['bvid'],
|
|
||||||
BiliBiliIE.ie_key(), entry['bvid'])
|
|
||||||
|
|
||||||
count += len(entries)
|
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
if max_count and count >= max_count:
|
return self.playlist_result(paged_list, playlist_id)
|
||||||
return
|
|
||||||
|
|
||||||
|
class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
||||||
|
_VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://space.bilibili.com/3985676/audio',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3985676',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
return self.playlist_result(self._entries(list_id), list_id)
|
|
||||||
|
def fetch_page(page_idx):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
|
||||||
|
note=f'Downloading page {page_idx}',
|
||||||
|
query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
|
||||||
|
|
||||||
|
def get_metadata(page_data):
|
||||||
|
return {
|
||||||
|
'page_count': page_data['pageCount'],
|
||||||
|
'page_size': page_data['pageSize'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_entries(page_data):
|
||||||
|
for entry in page_data.get('data', []):
|
||||||
|
yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
|
||||||
|
|
||||||
|
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
|
return self.playlist_result(paged_list, playlist_id)
|
||||||
|
|
||||||
|
|
||||||
|
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||||
|
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2142762_57445',
|
||||||
|
'title': '《底特律 变人》'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 31,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||||
|
playlist_id = f'{mid}_{sid}'
|
||||||
|
|
||||||
|
def fetch_page(page_idx):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
|
||||||
|
playlist_id, note=f'Downloading page {page_idx}',
|
||||||
|
query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data']
|
||||||
|
|
||||||
|
def get_metadata(page_data):
|
||||||
|
page_size = page_data['page']['page_size']
|
||||||
|
entry_count = page_data['page']['total']
|
||||||
|
return {
|
||||||
|
'page_count': math.ceil(entry_count / page_size),
|
||||||
|
'page_size': page_size,
|
||||||
|
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_entries(page_data):
|
||||||
|
for entry in page_data.get('archives', []):
|
||||||
|
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||||
|
BiliBiliIE, entry['bvid'])
|
||||||
|
|
||||||
|
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||||
|
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||||
|
|
||||||
|
|
||||||
class BilibiliCategoryIE(InfoExtractor):
|
class BilibiliCategoryIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in a new issue