mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 04:43:45 -05:00
[extractor/youtube:tab] Update tab handling for redesign (#5439)
Closes #5432, #5430, #5419 Authored by: coletdjnz, pukkandan
This commit is contained in:
parent
c61473c1d6
commit
86973308cd
3 changed files with 250 additions and 89 deletions
|
@ -88,7 +88,7 @@ # NEW FEATURES
|
||||||
* Supports some (but not all) age-gated content without cookies
|
* Supports some (but not all) age-gated content without cookies
|
||||||
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
* Download livestreams from the start using `--live-from-start` (*experimental*)
|
||||||
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
|
||||||
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour
|
* Channel URLs download all uploads of the channel, including shorts and live
|
||||||
|
|
||||||
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
|
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ ### Differences in default behavior
|
||||||
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
|
||||||
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
|
||||||
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
|
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
|
||||||
* YouTube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
|
||||||
* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
|
||||||
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
|
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
|
||||||
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
from test.helper import FakeYDL, is_download_test
|
from test.helper import FakeYDL, is_download_test
|
||||||
from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
|
from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
|
||||||
|
from yt_dlp.utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
@is_download_test
|
@is_download_test
|
||||||
|
@ -53,6 +54,18 @@ def test_youtube_flat_playlist_extraction(self):
|
||||||
self.assertEqual(video['duration'], 10)
|
self.assertEqual(video['duration'], 10)
|
||||||
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
||||||
|
|
||||||
|
def test_youtube_channel_no_uploads(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
dl.params['extract_flat'] = True
|
||||||
|
ie = YoutubeTabIE(dl)
|
||||||
|
# no uploads
|
||||||
|
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
|
||||||
|
ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA')
|
||||||
|
|
||||||
|
# no uploads and no UCID given
|
||||||
|
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
|
||||||
|
ie.extract('https://www.youtube.com/news')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -4589,14 +4589,17 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_selected_tab(tabs, fatal=True):
|
def _extract_selected_tab(tabs, fatal=True):
|
||||||
for tab in tabs:
|
for tab_renderer in tabs:
|
||||||
renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
|
if tab_renderer.get('selected'):
|
||||||
if renderer.get('selected') is True:
|
return tab_renderer
|
||||||
return renderer
|
|
||||||
else:
|
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError('Unable to find selected tab')
|
raise ExtractorError('Unable to find selected tab')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_tab_renderers(response):
|
||||||
|
return traverse_obj(
|
||||||
|
response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
|
||||||
|
|
||||||
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
|
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
|
||||||
playlist_id = title = description = channel_url = channel_name = channel_id = None
|
playlist_id = title = description = channel_url = channel_name = channel_id = None
|
||||||
tags = []
|
tags = []
|
||||||
|
@ -4897,8 +4900,7 @@ def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=Fals
|
||||||
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
|
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
|
||||||
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
|
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
|
||||||
# Reject webpage data if redirected to home page without explicitly requesting
|
# Reject webpage data if redirected to home page without explicitly requesting
|
||||||
selected_tab = self._extract_selected_tab(traverse_obj(
|
selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
|
||||||
data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
|
|
||||||
if (url != 'https://www.youtube.com/feed/recommended'
|
if (url != 'https://www.youtube.com/feed/recommended'
|
||||||
and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
|
and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
|
||||||
and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
|
and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
|
||||||
|
@ -5392,18 +5394,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'categories': ['News & Politics'],
|
'categories': ['News & Politics'],
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'release_timestamp': 1642502819,
|
'release_timestamp': int,
|
||||||
'channel': 'Sky News',
|
'channel': 'Sky News',
|
||||||
'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
|
'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
|
'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
|
||||||
'playable_in_embed': True,
|
'playable_in_embed': True,
|
||||||
'release_date': '20220118',
|
'release_date': r're:\d+',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
|
'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
|
||||||
'channel_follower_count': int
|
'channel_follower_count': int,
|
||||||
|
'concurrent_view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -5538,16 +5541,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
],
|
],
|
||||||
'playlist_mincount': 101,
|
'playlist_mincount': 101,
|
||||||
}, {
|
}, {
|
||||||
'note': 'Topic without a UU playlist',
|
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
|
||||||
|
# Treat as a general feed
|
||||||
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
||||||
'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
|
||||||
'the playlist redirect gave error',
|
|
||||||
],
|
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}, {
|
}, {
|
||||||
'note': 'Youtube music Album',
|
'note': 'Youtube music Album',
|
||||||
|
@ -5615,6 +5616,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'extractor_args': {'youtubetab': {'skip': ['webpage']}}
|
'extractor_args': {'youtubetab': {'skip': ['webpage']}}
|
||||||
},
|
},
|
||||||
|
'skip': 'Query for sorting no longer works',
|
||||||
}, {
|
}, {
|
||||||
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
||||||
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||||
|
@ -5633,10 +5635,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
|
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
|
||||||
'does not have a videos tab',
|
|
||||||
r'[Uu]navailable videos (are|will be) hidden',
|
|
||||||
],
|
|
||||||
'playlist_mincount': 101,
|
'playlist_mincount': 101,
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -5715,13 +5713,155 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_mincount': 50,
|
'playlist_mincount': 50,
|
||||||
|
|
||||||
|
}, {
|
||||||
|
# Channel with a real live tab (not to be mistaken with streams tab)
|
||||||
|
# Do not treat like it should redirect to live stream
|
||||||
|
'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
|
||||||
|
'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
# Tab name is not the same as tab id
|
||||||
|
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||||
|
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
}, {
|
||||||
|
# Home tab id is literally home. Not to get mistaken with featured
|
||||||
|
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||||
|
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
}, {
|
||||||
|
# Should get three playlists for videos, shorts and streams tabs
|
||||||
|
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||||
|
'title': 'Uploads for UCK9V2B22uJYu3N7eR_BT9QA'
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
# Shorts tab with channel with handle
|
||||||
|
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||||
|
'title': 'Not Just Bikes - Shorts',
|
||||||
|
'tags': 'count:12',
|
||||||
|
'uploader': 'Not Just Bikes',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
||||||
|
'description': 'md5:7513148b1f02b924783157d84c4ea555',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||||
|
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||||
|
'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
||||||
|
'channel': 'Not Just Bikes',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# Streams tab
|
||||||
|
'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
|
'title': '中村悠一 - Live',
|
||||||
|
'tags': 'count:7',
|
||||||
|
'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
|
'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
|
'channel': '中村悠一',
|
||||||
|
'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'uploader': '中村悠一',
|
||||||
|
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 60,
|
||||||
|
}, {
|
||||||
|
# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
|
||||||
|
# See test_youtube_lists
|
||||||
|
'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# No uploads and no UCID given. Should fail with no uploads error
|
||||||
|
# See test_youtube_lists
|
||||||
|
'url': 'https://www.youtube.com/news',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
# No videos tab but has a shorts tab
|
||||||
|
'url': 'https://www.youtube.com/c/TKFShorts',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
|
'title': 'Shorts Break - Shorts',
|
||||||
|
'tags': 'count:32',
|
||||||
|
'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
|
'channel': 'Shorts Break',
|
||||||
|
'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
|
||||||
|
'uploader': 'Shorts Break',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
|
'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
|
}, {
|
||||||
|
# Trending Now Tab. tab id is empty
|
||||||
|
'url': 'https://www.youtube.com/feed/trending',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'trending',
|
||||||
|
'title': 'trending - Now',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
|
}, {
|
||||||
|
# Trending Gaming Tab. tab id is empty
|
||||||
|
'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'trending',
|
||||||
|
'title': 'trending - Gaming',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
'playlist_mincount': 30,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if YoutubeIE.suitable(url) else super().suitable(url)
|
return False if YoutubeIE.suitable(url) else super().suitable(url)
|
||||||
|
|
||||||
_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
|
_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
|
||||||
|
|
||||||
|
def _get_url_mobj(self, url):
|
||||||
|
mobj = self._URL_RE.match(url).groupdict()
|
||||||
|
mobj.update((k, '') for k, v in mobj.items() if v is None)
|
||||||
|
return mobj
|
||||||
|
|
||||||
|
def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
|
||||||
|
tab_name = (tab.get('title') or '').lower()
|
||||||
|
tab_url = urljoin(base_url, traverse_obj(
|
||||||
|
tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
|
||||||
|
|
||||||
|
tab_id = (traverse_obj(tab, 'tabIdentifier', expected_type=str)
|
||||||
|
or tab_url and self._get_url_mobj(tab_url)['tab'][1:])
|
||||||
|
if tab_id:
|
||||||
|
return tab_id, tab_name
|
||||||
|
|
||||||
|
# Fallback to tab name if we cannot get the tab id.
|
||||||
|
# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
|
||||||
|
# Note that in the case of translated tab name this may result in an empty string, which we don't want.
|
||||||
|
self.write_debug(f'Falling back to selected tab name: {tab_name}')
|
||||||
|
return {
|
||||||
|
'home': 'featured',
|
||||||
|
'live': 'streams',
|
||||||
|
}.get(tab_name, tab_name), tab_name
|
||||||
|
|
||||||
|
def _has_tab(self, tabs, tab_id):
|
||||||
|
return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
|
||||||
|
|
||||||
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
|
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
|
||||||
def _real_extract(self, url, smuggled_data):
|
def _real_extract(self, url, smuggled_data):
|
||||||
|
@ -5730,14 +5870,8 @@ def _real_extract(self, url, smuggled_data):
|
||||||
urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
|
urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
|
||||||
compat_opts = self.get_param('compat_opts', [])
|
compat_opts = self.get_param('compat_opts', [])
|
||||||
|
|
||||||
def get_mobj(url):
|
mobj = self._get_url_mobj(url)
|
||||||
mobj = self._URL_RE.match(url).groupdict()
|
pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
|
||||||
mobj.update((k, '') for k, v in mobj.items() if v is None)
|
|
||||||
return mobj
|
|
||||||
|
|
||||||
mobj, redirect_warning = get_mobj(url), None
|
|
||||||
# Youtube returns incomplete data if tabname is not lower case
|
|
||||||
pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
|
|
||||||
if is_channel:
|
if is_channel:
|
||||||
if smuggled_data.get('is_music_url'):
|
if smuggled_data.get('is_music_url'):
|
||||||
if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
|
if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
|
||||||
|
@ -5750,19 +5884,16 @@ def get_mobj(url):
|
||||||
get_all=False, expected_type=str)
|
get_all=False, expected_type=str)
|
||||||
if not murl:
|
if not murl:
|
||||||
raise ExtractorError('Failed to resolve album to playlist')
|
raise ExtractorError('Failed to resolve album to playlist')
|
||||||
return self.url_result(murl, ie=YoutubeTabIE.ie_key())
|
return self.url_result(murl, YoutubeTabIE)
|
||||||
elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
|
elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
|
||||||
pre = f'https://www.youtube.com/channel/{item_id}'
|
pre = f'https://www.youtube.com/channel/{item_id}'
|
||||||
|
|
||||||
original_tab_name = tab
|
original_tab_id = tab[1:]
|
||||||
if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
|
if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
|
||||||
# Home URLs should redirect to /videos/
|
|
||||||
redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
|
|
||||||
'To download only the videos in the home page, add a "/featured" to the URL')
|
|
||||||
tab = '/videos'
|
tab = '/videos'
|
||||||
|
|
||||||
url = ''.join((pre, tab, post))
|
url = ''.join((pre, tab, post))
|
||||||
mobj = get_mobj(url)
|
mobj = self._get_url_mobj(url)
|
||||||
|
|
||||||
# Handle both video/playlist URLs
|
# Handle both video/playlist URLs
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
|
@ -5775,77 +5906,94 @@ def get_mobj(url):
|
||||||
# Common mistake: https://www.youtube.com/watch?list=playlist_id
|
# Common mistake: https://www.youtube.com/watch?list=playlist_id
|
||||||
self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
|
self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
|
||||||
url = f'https://www.youtube.com/playlist?list={playlist_id}'
|
url = f'https://www.youtube.com/playlist?list={playlist_id}'
|
||||||
mobj = get_mobj(url)
|
mobj = self._get_url_mobj(url)
|
||||||
|
|
||||||
if video_id and playlist_id:
|
if not self._yes_playlist(playlist_id, video_id):
|
||||||
if self.get_param('noplaylist'):
|
return self.url_result(
|
||||||
self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
|
f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
|
||||||
return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
|
|
||||||
ie=YoutubeIE.ie_key(), video_id=video_id)
|
|
||||||
self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
|
|
||||||
|
|
||||||
data, ytcfg = self._extract_data(url, item_id)
|
data, ytcfg = self._extract_data(url, item_id)
|
||||||
|
|
||||||
# YouTube may provide a non-standard redirect to the regional channel
|
# YouTube may provide a non-standard redirect to the regional channel
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/2694
|
# See: https://github.com/yt-dlp/yt-dlp/issues/2694
|
||||||
|
# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
|
||||||
redirect_url = traverse_obj(
|
redirect_url = traverse_obj(
|
||||||
data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
|
data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
|
||||||
if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
|
if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
|
||||||
redirect_url = ''.join((
|
redirect_url = ''.join((
|
||||||
urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
|
urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
|
||||||
self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
|
self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
|
||||||
return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
|
return self.url_result(redirect_url, YoutubeTabIE)
|
||||||
|
|
||||||
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
|
tab_results = []
|
||||||
if tabs:
|
tabs = self._extract_tab_renderers(data)
|
||||||
|
if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
|
||||||
selected_tab = self._extract_selected_tab(tabs)
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
selected_tab_url = urljoin(
|
selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
|
||||||
url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
|
self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
|
||||||
translated_tab_name = selected_tab.get('title', '').lower()
|
|
||||||
|
|
||||||
# Prefer tab name from tab url as it is always in en,
|
if not original_tab_id and selected_tab_name:
|
||||||
# but only when preferred lang is set as it may not extract reliably in all cases.
|
self.to_screen('Channel URLs download all uploads of the channel. '
|
||||||
selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
|
'To download only the videos in a specific tab, pass the tab\'s URL')
|
||||||
or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
|
if self._has_tab(tabs, 'streams'):
|
||||||
or translated_tab_name)
|
tab_results.append(self.url_result(''.join((pre, '/streams', post))))
|
||||||
|
if self._has_tab(tabs, 'shorts'):
|
||||||
|
tab_results.append(self.url_result(''.join((pre, '/shorts', post))))
|
||||||
|
# XXX: Members-only tab should also be extracted
|
||||||
|
|
||||||
if selected_tab_name == 'home':
|
if not tab_results and selected_tab_id != 'videos':
|
||||||
selected_tab_name = 'featured'
|
# Channel does not have streams, shorts or videos tabs
|
||||||
requested_tab_name = mobj['tab'][1:]
|
if item_id[:2] != 'UC':
|
||||||
|
raise ExtractorError('This channel has no uploads', expected=True)
|
||||||
|
|
||||||
if 'no-youtube-channel-redirect' not in compat_opts:
|
|
||||||
if requested_tab_name == 'live': # Live tab should have redirected to the video
|
|
||||||
raise UserNotLive(video_id=mobj['id'])
|
|
||||||
if requested_tab_name not in ('', selected_tab_name):
|
|
||||||
redirect_warning = f'The channel does not have a {requested_tab_name} tab'
|
|
||||||
if not original_tab_name:
|
|
||||||
if item_id[:2] == 'UC':
|
|
||||||
# Topic channels don't have /videos. Use the equivalent playlist instead
|
# Topic channels don't have /videos. Use the equivalent playlist instead
|
||||||
pl_id = f'UU{item_id[2:]}'
|
pl_id = f'UU{item_id[2:]}'
|
||||||
pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
|
pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
|
||||||
try:
|
try:
|
||||||
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
|
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
redirect_warning += ' and the playlist redirect gave error'
|
raise ExtractorError('This channel has no uploads', expected=True)
|
||||||
else:
|
else:
|
||||||
item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
|
item_id, url = pl_id, pl_url
|
||||||
redirect_warning += f'. Redirecting to playlist {pl_id} instead'
|
self.to_screen(
|
||||||
if selected_tab_name and selected_tab_name != requested_tab_name:
|
f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
|
||||||
redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
|
|
||||||
else:
|
elif tab_results and selected_tab_id != 'videos':
|
||||||
raise ExtractorError(redirect_warning, expected=True)
|
# When there are shorts/live tabs but not videos tab
|
||||||
|
url, data = ''.join((pre, post)), None
|
||||||
|
|
||||||
|
elif (original_tab_id or 'videos') != selected_tab_id:
|
||||||
|
if original_tab_id == 'live':
|
||||||
|
# Live tab should have redirected to the video
|
||||||
|
# Except in the case the channel has an actual live tab
|
||||||
|
# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
|
||||||
|
raise UserNotLive(video_id=mobj['id'])
|
||||||
|
elif selected_tab_name:
|
||||||
|
raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
|
||||||
|
|
||||||
|
# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
|
||||||
|
url = f'{pre}{post}'
|
||||||
|
|
||||||
if redirect_warning:
|
|
||||||
self.to_screen(redirect_warning)
|
|
||||||
self.write_debug(f'Final URL: {url}')
|
self.write_debug(f'Final URL: {url}')
|
||||||
|
|
||||||
# YouTube sometimes provides a button to reload playlist with unavailable videos.
|
# YouTube sometimes provides a button to reload playlist with unavailable videos.
|
||||||
if 'no-youtube-unavailable-videos' not in compat_opts:
|
if 'no-youtube-unavailable-videos' not in compat_opts:
|
||||||
data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
|
data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
|
||||||
self._extract_and_report_alerts(data, only_once=True)
|
self._extract_and_report_alerts(data, only_once=True)
|
||||||
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
|
|
||||||
|
tabs = self._extract_tab_renderers(data)
|
||||||
if tabs:
|
if tabs:
|
||||||
return self._extract_from_tabs(item_id, ytcfg, data, tabs)
|
tab_results[:0] = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
|
||||||
|
tab_results[0].update({
|
||||||
|
'extractor_key': YoutubeTabIE.ie_key(),
|
||||||
|
'extractor': YoutubeTabIE.IE_NAME,
|
||||||
|
'webpage_url': url,
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(tab_results) == 1:
|
||||||
|
return tab_results[0]
|
||||||
|
elif len(tab_results) > 1:
|
||||||
|
return self.playlist_result(tab_results, item_id, title=f'Uploads for {item_id}')
|
||||||
|
|
||||||
playlist = traverse_obj(
|
playlist = traverse_obj(
|
||||||
data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
|
data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
|
||||||
|
@ -5857,8 +6005,7 @@ def get_mobj(url):
|
||||||
if video_id:
|
if video_id:
|
||||||
if mobj['tab'] != '/live': # live tab is expected to redirect to video
|
if mobj['tab'] != '/live': # live tab is expected to redirect to video
|
||||||
self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
|
self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
|
||||||
return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
|
return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
|
||||||
ie=YoutubeIE.ie_key(), video_id=video_id)
|
|
||||||
|
|
||||||
raise ExtractorError('Unable to recognize tab page')
|
raise ExtractorError('Unable to recognize tab page')
|
||||||
|
|
||||||
|
@ -5891,12 +6038,13 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
|
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
|
||||||
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
|
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'uploader_url': 'https://www.youtube.com/user/Wickydoo',
|
'uploader_url': 'https://www.youtube.com/c/WickmanVT',
|
||||||
'modified_date': r're:\d{8}',
|
'modified_date': r're:\d{8}',
|
||||||
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
|
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
|
||||||
'channel': 'Wickman',
|
'channel': 'Wickman',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'channel_url': 'https://www.youtube.com/user/Wickydoo',
|
'channel_url': 'https://www.youtube.com/c/WickmanVT',
|
||||||
|
'availability': 'public',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 29,
|
'playlist_mincount': 29,
|
||||||
}, {
|
}, {
|
||||||
|
@ -5926,7 +6074,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
|
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
},
|
},
|
||||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
|
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
|
||||||
'playlist_mincount': 455,
|
'playlist_mincount': 455,
|
||||||
|
|
Loading…
Reference in a new issue