mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
Allow iterators for playlist result entries
This commit is contained in:
parent
158f8cadc0
commit
b82f815f37
3 changed files with 26 additions and 16 deletions
|
@ -7,6 +7,7 @@
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import io
|
import io
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
|
@ -654,21 +655,28 @@ def make_result(embedded_info):
|
||||||
if playlistend == -1:
|
if playlistend == -1:
|
||||||
playlistend = None
|
playlistend = None
|
||||||
|
|
||||||
if isinstance(ie_result['entries'], list):
|
ie_entries = ie_result['entries']
|
||||||
n_all_entries = len(ie_result['entries'])
|
if isinstance(ie_entries, list):
|
||||||
entries = ie_result['entries'][playliststart:playlistend]
|
n_all_entries = len(ie_entries)
|
||||||
|
entries = ie_entries[playliststart:playlistend]
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
|
||||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||||
else:
|
elif isinstance(ie_entries, PagedList):
|
||||||
assert isinstance(ie_result['entries'], PagedList)
|
entries = ie_entries.getslice(
|
||||||
entries = ie_result['entries'].getslice(
|
|
||||||
playliststart, playlistend)
|
playliststart, playlistend)
|
||||||
n_entries = len(entries)
|
n_entries = len(entries)
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
"[%s] playlist %s: Downloading %d videos" %
|
"[%s] playlist %s: Downloading %d videos" %
|
||||||
(ie_result['extractor'], playlist, n_entries))
|
(ie_result['extractor'], playlist, n_entries))
|
||||||
|
else: # iterable
|
||||||
|
entries = list(itertools.islice(
|
||||||
|
ie_entries, playliststart, playlistend))
|
||||||
|
n_entries = len(entries)
|
||||||
|
self.to_screen(
|
||||||
|
"[%s] playlist %s: Downloading %d videos" %
|
||||||
|
(ie_result['extractor'], playlist, n_entries))
|
||||||
|
|
||||||
for i, entry in enumerate(entries, 1):
|
for i, entry in enumerate(entries, 1):
|
||||||
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
|
||||||
|
|
|
@ -158,8 +158,8 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
|
|
||||||
_type "playlist" indicates multiple videos.
|
_type "playlist" indicates multiple videos.
|
||||||
There must be a key "entries", which is a list or a PagedList object, each
|
There must be a key "entries", which is a list, an iterable, or a PagedList
|
||||||
element of which is a valid dictionary under this specfication.
|
object, each element of which is a valid dictionary by this specification.
|
||||||
|
|
||||||
Additionally, playlists can have "title" and "id" attributes with the same
|
Additionally, playlists can have "title" and "id" attributes with the same
|
||||||
semantics as videos (see above).
|
semantics as videos (see above).
|
||||||
|
|
|
@ -1262,8 +1262,12 @@ def _real_extract(self, url):
|
||||||
# The videos are contained in a single page
|
# The videos are contained in a single page
|
||||||
# the ajax pages can't be used, they are empty
|
# the ajax pages can't be used, they are empty
|
||||||
video_ids = self.extract_videos_from_page(channel_page)
|
video_ids = self.extract_videos_from_page(channel_page)
|
||||||
else:
|
entries = [
|
||||||
# Download all channel pages using the json-based channel_ajax query
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in video_ids]
|
||||||
|
return self.playlist_result(entries, channel_id)
|
||||||
|
|
||||||
|
def _entries():
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||||
page = self._download_json(
|
page = self._download_json(
|
||||||
|
@ -1271,16 +1275,14 @@ def _real_extract(self, url):
|
||||||
transform_source=uppercase_escape)
|
transform_source=uppercase_escape)
|
||||||
|
|
||||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||||
video_ids.extend(ids_in_page)
|
for video_id in ids_in_page:
|
||||||
|
yield self.url_result(
|
||||||
|
video_id, 'Youtube', video_id=video_id)
|
||||||
|
|
||||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||||
break
|
break
|
||||||
|
|
||||||
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
return self.playlist_result(_entries(), channel_id)
|
||||||
|
|
||||||
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
|
||||||
for video_id in video_ids]
|
|
||||||
return self.playlist_result(url_entries, channel_id)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in a new issue