From cedc70b29277c7ceebee4843bc2f13ebe88109b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Jul 2016 00:28:07 +0700 Subject: [PATCH] [facebook] Fix invalid video being extracted (Closes #9851) --- youtube_dl/extractor/facebook.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f5d4f966a8..0d43acc4ac 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -219,12 +219,23 @@ def _extract_from_url(self, url, video_id, fatal_if_no_video=True): BEFORE = '{swf.addParam(param[0], param[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' - m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage) - if m: - swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"') + PATTERN = re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER) + + for m in re.findall(PATTERN, webpage): + swf_params = m.replace('\\\\', '\\').replace('\\"', '"') data = dict(json.loads(swf_params)) params_raw = compat_urllib_parse_unquote(data['params']) - video_data = json.loads(params_raw)['video_data'] + video_data_candidate = json.loads(params_raw)['video_data'] + for _, f in video_data_candidate.items(): + if not f: + continue + if isinstance(f, dict): + f = [f] + if isinstance(f, list): + continue + if f[0].get('video_id') == video_id: + video_data = video_data_candidate + break def video_data_list2dict(video_data): ret = {}