mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
Add option --extractor-retries
to retry on known extractor errors
* Currently only used by youtube Fixes https://github.com/ytdl-org/youtube-dl/issues/28194 Possibly also fixes: https://github.com/ytdl-org/youtube-dl/issues/28289 (can not confirm since the issue isn't reliably reproducible)
This commit is contained in:
parent
f0884c8b3f
commit
62bff2c170
4 changed files with 48 additions and 30 deletions
|
@ -381,17 +381,18 @@ class YoutubeDL(object):
|
||||||
Use 'default' as the name for arguments to passed to all PP
|
Use 'default' as the name for arguments to passed to all PP
|
||||||
|
|
||||||
The following options are used by the extractors:
|
The following options are used by the extractors:
|
||||||
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
extractor_retries: Number of times to retry for known errors
|
||||||
|
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
|
||||||
hls_split_discontinuity: Split HLS playlists to different formats at
|
hls_split_discontinuity: Split HLS playlists to different formats at
|
||||||
discontinuities such as ad breaks (default: False)
|
discontinuities such as ad breaks (default: False)
|
||||||
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
youtube_include_dash_manifest: If True (default), DASH manifests and related
|
||||||
data will be downloaded and processed by extractor.
|
data will be downloaded and processed by extractor.
|
||||||
You can reduce network I/O by disabling it if you don't
|
You can reduce network I/O by disabling it if you don't
|
||||||
care about DASH. (only for youtube)
|
care about DASH. (only for youtube)
|
||||||
youtube_include_hls_manifest: If True (default), HLS manifests and related
|
youtube_include_hls_manifest: If True (default), HLS manifests and related
|
||||||
data will be downloaded and processed by extractor.
|
data will be downloaded and processed by extractor.
|
||||||
You can reduce network I/O by disabling it if you don't
|
You can reduce network I/O by disabling it if you don't
|
||||||
care about HLS. (only for youtube)
|
care about HLS. (only for youtube)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_NUMERIC_FIELDS = set((
|
_NUMERIC_FIELDS = set((
|
||||||
|
|
|
@ -181,19 +181,21 @@ def _real_main(argv=None):
|
||||||
# --yes-overwrites implies --no-continue
|
# --yes-overwrites implies --no-continue
|
||||||
opts.continue_dl = False
|
opts.continue_dl = False
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries, name=''):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
parsed_retries = float('inf')
|
parsed_retries = float('inf')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
parsed_retries = int(retries)
|
parsed_retries = int(retries)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
parser.error('invalid retry count specified')
|
parser.error('invalid %sretry count specified' % name)
|
||||||
return parsed_retries
|
return parsed_retries
|
||||||
if opts.retries is not None:
|
if opts.retries is not None:
|
||||||
opts.retries = parse_retries(opts.retries)
|
opts.retries = parse_retries(opts.retries)
|
||||||
if opts.fragment_retries is not None:
|
if opts.fragment_retries is not None:
|
||||||
opts.fragment_retries = parse_retries(opts.fragment_retries)
|
opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ')
|
||||||
|
if opts.extractor_retries is not None:
|
||||||
|
opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ')
|
||||||
if opts.buffersize is not None:
|
if opts.buffersize is not None:
|
||||||
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
|
||||||
if numeric_buffersize is None:
|
if numeric_buffersize is None:
|
||||||
|
@ -458,6 +460,7 @@ def report_args_compat(arg, name):
|
||||||
'overwrites': opts.overwrites,
|
'overwrites': opts.overwrites,
|
||||||
'retries': opts.retries,
|
'retries': opts.retries,
|
||||||
'fragment_retries': opts.fragment_retries,
|
'fragment_retries': opts.fragment_retries,
|
||||||
|
'extractor_retries': opts.extractor_retries,
|
||||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||||
'keep_fragments': opts.keep_fragments,
|
'keep_fragments': opts.keep_fragments,
|
||||||
'buffersize': opts.buffersize,
|
'buffersize': opts.buffersize,
|
||||||
|
|
|
@ -2762,28 +2762,36 @@ def extract_entries(parent_renderer): # this needs to called again for continua
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
break
|
break
|
||||||
count = 0
|
retries = self._downloader.params.get('extractor_retries', 3)
|
||||||
retries = 3
|
count = -1
|
||||||
while count <= retries:
|
last_error = None
|
||||||
|
while count < retries:
|
||||||
|
count += 1
|
||||||
|
if last_error:
|
||||||
|
self.report_warning('%s. Retrying ...' % last_error)
|
||||||
try:
|
try:
|
||||||
# Downloading page may result in intermittent 5xx HTTP error
|
|
||||||
# that is usually worked around with a retry
|
|
||||||
browse = self._download_json(
|
browse = self._download_json(
|
||||||
'https://www.youtube.com/browse_ajax', None,
|
'https://www.youtube.com/browse_ajax', None,
|
||||||
'Downloading page %d%s'
|
'Downloading page %d%s'
|
||||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||||
headers=headers, query=continuation)
|
headers=headers, query=continuation)
|
||||||
break
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
|
||||||
count += 1
|
# Downloading page may result in intermittent 5xx HTTP error
|
||||||
if count <= retries:
|
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||||
|
last_error = 'HTTP Error %s' % e.cause.code
|
||||||
|
if count < retries:
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
if not browse:
|
else:
|
||||||
break
|
response = try_get(browse, lambda x: x[1]['response'], dict)
|
||||||
response = try_get(browse, lambda x: x[1]['response'], dict)
|
|
||||||
if not response:
|
# Youtube sometimes sends incomplete data
|
||||||
|
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||||
|
if response.get('continuationContents') or response.get('onResponseReceivedActions'):
|
||||||
|
break
|
||||||
|
last_error = 'Incomplete data recieved'
|
||||||
|
if not browse or not response:
|
||||||
break
|
break
|
||||||
|
|
||||||
known_continuation_renderers = {
|
known_continuation_renderers = {
|
||||||
|
@ -3004,11 +3012,16 @@ def _real_extract(self, url):
|
||||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
count = 0
|
retries = self._downloader.params.get('extractor_retries', 3)
|
||||||
retries = 3
|
count = -1
|
||||||
while count < retries:
|
while count < retries:
|
||||||
|
count += 1
|
||||||
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
||||||
webpage = self._download_webpage(url, item_id)
|
# See: https://github.com/yt-dlp/yt-dlp/issues/116
|
||||||
|
if count:
|
||||||
|
self.report_warning('Incomplete yt initial data recieved. Retrying ...')
|
||||||
|
webpage = self._download_webpage(url, item_id,
|
||||||
|
'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
|
||||||
identity_token = self._extract_identity_token(webpage, item_id)
|
identity_token = self._extract_identity_token(webpage, item_id)
|
||||||
data = self._extract_yt_initial_data(item_id, webpage)
|
data = self._extract_yt_initial_data(item_id, webpage)
|
||||||
err_msg = None
|
err_msg = None
|
||||||
|
@ -3023,9 +3036,6 @@ def _real_extract(self, url):
|
||||||
raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
|
raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
|
||||||
if data.get('contents') or data.get('currentVideoEndpoint'):
|
if data.get('contents') or data.get('currentVideoEndpoint'):
|
||||||
break
|
break
|
||||||
count += 1
|
|
||||||
self.to_screen(
|
|
||||||
'Incomplete yt initial data recieved. Retrying (attempt %d of %d)...' % (count, retries))
|
|
||||||
|
|
||||||
tabs = try_get(
|
tabs = try_get(
|
||||||
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
||||||
|
|
|
@ -1218,6 +1218,10 @@ def _dict_from_multiple_values_options_callback(
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
|
|
||||||
extractor = optparse.OptionGroup(parser, 'Extractor Options')
|
extractor = optparse.OptionGroup(parser, 'Extractor Options')
|
||||||
|
extractor.add_option(
|
||||||
|
'--extractor-retries',
|
||||||
|
dest='extractor_retries', metavar='RETRIES', default=10,
|
||||||
|
help='Number of retries for known extractor errors (default is %default), or "infinite"')
|
||||||
extractor.add_option(
|
extractor.add_option(
|
||||||
'--allow-dynamic-mpd', '--no-ignore-dynamic-mpd',
|
'--allow-dynamic-mpd', '--no-ignore-dynamic-mpd',
|
||||||
action='store_true', dest='dynamic_mpd', default=True,
|
action='store_true', dest='dynamic_mpd', default=True,
|
||||||
|
|
Loading…
Reference in a new issue