From 3b52a606881e6adadc33444abdeacce562b79330 Mon Sep 17 00:00:00 2001 From: ringus1 Date: Tue, 9 May 2023 01:19:42 +0200 Subject: [PATCH] [extractor/facebook] Fix metadata extraction (#6856) Closes #3432 Authored by: ringus1 --- yt_dlp/extractor/facebook.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 1404be612..9d871eb28 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -390,7 +390,10 @@ def extract_metadata(webpage): k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) - uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} + uploader_data = ( + get_first(media, ('owner', {dict})) + or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) + or get_first(post, ('node', 'actors', ..., {dict})) or {}) page_title = title or self._html_search_regex(( r']*class="uiHeaderTitle"[^>]*>(?P[^<]*)', @@ -415,16 +418,17 @@ def extract_metadata(webpage): # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/ if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail): thumbnail = None - view_count = parse_count(self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None)) info_dict = { 'description': description, 'uploader': uploader, 'uploader_id': uploader_data.get('id'), 'timestamp': timestamp, 'thumbnail': thumbnail, - 'view_count': view_count, + 'view_count': parse_count(self._search_regex( + (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',), + webpage, 'view count', default=None)), + 'concurrent_view_count': get_first(post, ( + ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), } info_json_ld = self._search_json_ld(webpage, video_id, default={})