From c9969434183c562eb9935aa20f147f234aa61e53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 8 Jun 2017 22:53:14 +0700 Subject: [PATCH] [YoutubeDL] Sanitize more fields (#13313) --- youtube_dl/YoutubeDL.py | 52 +++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7efa0c948..c05103bb6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -58,6 +58,7 @@ format_bytes, formatSeconds, GeoRestrictedError, + int_or_none, ISO3166Utils, locked_file, make_HTTPS_handler, @@ -302,6 +303,17 @@ class YoutubeDL(object): postprocessor. """ + _NUMERIC_FIELDS = set(( + 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', + 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', + 'average_rating', 'comment_count', 'age_limit', + 'start_time', 'end_time', + 'chapter_number', 'season_number', 'episode_number', + 'track_number', 'disc_number', 'release_year', + 'playlist_index', + )) + params = None _ies = [] _pps = [] @@ -639,22 +651,11 @@ def prepare_filename(self, info_dict): r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')], outtmpl) - NUMERIC_FIELDS = set(( - 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', - 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', - 'average_rating', 'comment_count', 'age_limit', - 'start_time', 'end_time', - 'chapter_number', 'season_number', 'episode_number', - 'track_number', 'disc_number', 'release_year', - 'playlist_index', - )) - # Missing numeric fields used together with integer presentation types # in format specification will break the argument substitution since # string 'NA' is returned for missing fields. We will patch output # template for missing fields to meet string presentation type. - for numeric_field in NUMERIC_FIELDS: + for numeric_field in self._NUMERIC_FIELDS: if numeric_field not in template_dict: # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type @@ -1345,9 +1346,28 @@ def process_video_result(self, info_dict, download=True): if 'title' not in info_dict: raise ExtractorError('Missing "title" field in extractor result') - if not isinstance(info_dict['id'], compat_str): - self.report_warning('"id" field is not a string - forcing string conversion') - info_dict['id'] = compat_str(info_dict['id']) + def report_force_conversion(field, field_not, conversion): + self.report_warning( + '"%s" field is not %s - forcing %s conversion, there is an error in extractor' + % (field, field_not, conversion)) + + def sanitize_string_field(info, string_field): + field = info.get(string_field) + if field is None or isinstance(field, compat_str): + return + report_force_conversion(string_field, 'a string', 'string') + info[string_field] = compat_str(field) + + def sanitize_numeric_fields(info): + for numeric_field in self._NUMERIC_FIELDS: + field = info.get(numeric_field) + if field is None or isinstance(field, compat_numeric_types): + continue + report_force_conversion(numeric_field, 'numeric', 'int') + info[numeric_field] = int_or_none(field) + + sanitize_string_field(info_dict, 'id') + sanitize_numeric_fields(info_dict) if 'playlist' not in info_dict: # It isn't part of a playlist @@ -1435,6 +1455,8 @@ def process_video_result(self, info_dict, download=True): if 'url' not in format: raise ExtractorError('Missing "url" key in result (index %d)' % i) + sanitize_string_field(format, 'format_id') + sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) if format.get('format_id') is None: