From 9f1a1c36e60b14f9ff47d83234b4ea61c5f5e2f7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 14:46:07 +0530 Subject: [PATCH] Separate `--check-all-formats` from `--check-formats` Previously, `--check-formats` tested only the selected video formats, but ALL thumbnails --- yt_dlp/YoutubeDL.py | 109 ++++++++++++++++++++++++-------------------- yt_dlp/options.py | 10 ++-- yt_dlp/utils.py | 2 + 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 27fac6263..071f2e943 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -224,7 +224,8 @@ class YoutubeDL(object): allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file check_formats Whether to test if the formats are downloadable. - Can be True (check all), False (check none) + Can be True (check all), False (check none), + 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils.py) @@ -1720,6 +1721,28 @@ def _filter(f): return op(actual_value, comparison_value) return _filter + def _check_formats(self, formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + temp_file = tempfile.NamedTemporaryFile( + suffix='.tmp', delete=False, + dir=self.get_output_path('temp') or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + def _default_format_spec(self, info_dict, download=True): def can_merge(): @@ -1759,7 +1782,7 @@ def syntax_error(note, start): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} - check_formats = self.params.get('check_formats') + check_formats = self.params.get('check_formats') == 'selected' def _parse_filter(tokens): filter_parts = [] @@ -1935,26 +1958,7 @@ def _check_formats(formats): if not check_formats: yield from formats return - for f in formats: - self.to_screen('[info] Testing format %s' % f['format_id']) - temp_file = tempfile.NamedTemporaryFile( - suffix='.tmp', delete=False, - dir=self.get_output_path('temp') or None) - temp_file.close() - try: - success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, IOError, OSError, ValueError) + network_exceptions: - success = False - finally: - if os.path.exists(temp_file.name): - try: - os.remove(temp_file.name) - except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) - if success: - yield f - else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + yield from self._check_formats(formats) def _build_selector_function(selector): if isinstance(selector, list): # , @@ -2111,42 +2115,45 @@ def _calc_cookies(self, info_dict): self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') + def _sort_thumbnails(self, thumbnails): + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + def _sanitize_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] - if thumbnails: - thumbnails.sort(key=lambda t: ( - t.get('preference') if t.get('preference') is not None else -1, - t.get('width') if t.get('width') is not None else -1, - t.get('height') if t.get('height') is not None else -1, - t.get('id') if t.get('id') is not None else '', - t.get('url'))) + if not thumbnails: + return - def thumbnail_tester(): - def test_thumbnail(t): - self.to_screen(f'[info] Testing thumbnail {t["id"]}') - try: - self.urlopen(HEADRequest(t['url'])) - except network_exceptions as err: - self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') - return False - return True - return test_thumbnail + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t - for i, t in enumerate(thumbnails): - if t.get('id') is None: - t['id'] = '%d' % i - if t.get('width') and t.get('height'): - t['resolution'] = '%dx%d' % (t['width'], t['height']) - t['url'] = sanitize_url(t['url']) + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats'): - info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() - else: - info_dict['thumbnails'] = thumbnails + if self.params.get('check_formats') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + else: + info_dict['thumbnails'] = thumbnails def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -2252,7 +2259,6 @@ def sanitize_numeric_fields(info): info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) - # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available formats = [info_dict] @@ -2335,6 +2341,9 @@ def is_wellformed(f): # TODO Central sorting goes here + if self.params.get('check_formats') is True: + formats = LazyList(self._check_formats(formats[::-1])).reverse() + if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 719a1bce4..5499ab13e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -562,12 +562,16 @@ def _dict_from_options_callback( help="Don't give any special preference to free containers (default)") video_format.add_option( '--check-formats', - action='store_true', dest='check_formats', default=None, - help='Check that the formats selected are actually downloadable') + action='store_const', const='selected', dest='check_formats', default=None, + help='Check that the selected formats are actually downloadable') + video_format.add_option( + '--check-all-formats', + action='store_true', dest='check_formats', + help='Check all formats for whether they are actually downloadable') video_format.add_option( '--no-check-formats', action='store_false', dest='check_formats', - help='Do not check that the formats selected are actually downloadable') + help='Do not check that the formats are actually downloadable') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08f9a5dc9..2c3ab00dc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4050,6 +4050,8 @@ def __iter__(self): def __exhaust(self): self.__cache.extend(self.__iterable) + # Discard the emptied iterable to make it pickle-able + self.__iterable = [] return self.__cache def exhaust(self):