mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-27 01:01:17 +00:00
[youtube:history] Fix extraction (fixes #5702)
It uses the same method as YoutubeSubscriptionsIE, if other feed starts using it we should consider using base class.
This commit is contained in:
parent
12675275a1
commit
2bc4330303
1 changed files with 33 additions and 4 deletions
|
@ -1667,13 +1667,42 @@ def _real_extract(self, url):
|
|||
return self._extract_playlist('WL')
|
||||
|
||||
|
||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||
class YoutubeHistoryIE(YoutubePlaylistIE):
|
||||
IE_NAME = 'youtube:history'
|
||||
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
|
||||
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||
_FEED_NAME = 'history'
|
||||
_PERSONAL_FEED = True
|
||||
_PLAYLIST_TITLE = 'Youtube Watch History'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
title = 'Youtube History'
|
||||
page = self._download_webpage('https://www.youtube.com/feed/history', title)
|
||||
|
||||
# The extraction process is the same as for playlists, but the regex
|
||||
# for the video ids doesn't contain an index
|
||||
ids = []
|
||||
more_widget_html = content_html = page
|
||||
|
||||
for page_num in itertools.count(1):
|
||||
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||
new_ids = orderedSet(matches)
|
||||
ids.extend(new_ids)
|
||||
|
||||
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), title,
|
||||
'Downloading page #%s' % page_num,
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': title,
|
||||
'entries': self._ids_to_results(ids),
|
||||
}
|
||||
|
||||
|
||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||
|
|
Loading…
Reference in a new issue