From 76d1700b283ee482288eec12a6903a345742eead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 27 Nov 2013 20:01:51 +0100 Subject: [PATCH 1/3] [youtube:playlist] Fix the extraction of the title for some mixes (#1844) Like https://www.youtube.com/watch?v=g8jDB5xOiuE&list=RDIh2gxLqR7HM --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9ef5fecce..fb61f47e8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1547,7 +1547,9 @@ def _extract_mix(self, playlist_id): # the id of the playlist is just 'RD' + video_id url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id) webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') - title = clean_html(get_element_by_attribute('class', 'title long-title', webpage)) + title_span = (get_element_by_attribute('class', 'title long-title', webpage) or + get_element_by_attribute('class', 'title ', webpage)) + title = clean_html(title_span) video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id) ids = orderedSet(re.findall(video_re, webpage)) url_results = self._ids_to_results(ids) From 35907e23ec4d7e754ff239693500e05886b80ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 27 Nov 2013 21:24:55 +0100 Subject: [PATCH 2/3] [yahoo] Fix video extraction and use the new format system exclusively --- youtube_dl/extractor/yahoo.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 34e6afb20..617e3bb06 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -46,7 +46,7 @@ def _real_extract(self, url): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$', + items_json = self._search_regex(r'mediaItems: ({.*?})$', webpage, u'items', flags=re.MULTILINE) items = json.loads(items_json) info = items['mediaItems']['query']['results']['mediaObj'][0] @@ -91,17 +91,13 @@ def _real_extract(self, url): formats.append(format_info) formats = sorted(formats, key=lambda f:(f['height'], f['width'])) - info = { + return { 'id': video_id, 'title': meta['title'], 'formats': formats, 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'], } - # TODO: Remove when #980 has been merged - info.update(formats[-1]) - - return info class YahooSearchIE(SearchInfoExtractor): From 0e44d8381a439c84dd23477d32f7da4bb0a06293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 28 Nov 2013 00:33:27 +0100 Subject: [PATCH 3/3] [youtube:feeds] Use the 'paging' value from the downloaded json information (fixes #1845) --- youtube_dl/extractor/youtube.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index fb61f47e8..765b4a9bf 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1791,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties. """ _LOGIN_REQUIRED = True - _PAGING_STEP = 30 # use action_load_personal_feed instead of action_load_system_feed _PERSONAL_FEED = False @@ -1811,9 +1810,8 @@ def _real_initialize(self): def _real_extract(self, url): feed_entries = [] - # The step argument is available only in 2.7 or higher - for i in itertools.count(0): - paging = i*self._PAGING_STEP + paging = 0 + for i in itertools.count(1): info = self._download_webpage(self._FEED_TEMPLATE % paging, u'%s feed' % self._FEED_NAME, u'Downloading page %s' % i) @@ -1826,6 +1824,7 @@ def _real_extract(self, url): for video_id in ids) if info['paging'] is None: break + paging = info['paging'] return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): @@ -1845,7 +1844,6 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor): _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater' _FEED_NAME = 'watch_later' _PLAYLIST_TITLE = u'Youtube Watch Later' - _PAGING_STEP = 100 _PERSONAL_FEED = True class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): @@ -1855,13 +1853,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): _PERSONAL_FEED = True _PLAYLIST_TITLE = u'Youtube Watch History' - def _real_extract(self, url): - webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History') - data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging') - # The step is actually a ridiculously big number (like 1374343569725646) - self._PAGING_STEP = int(data_paging) - return super(YoutubeHistoryIE, self)._real_extract(url) - class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = u'youtube:favorites' IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'