From 90f42294096d4fc38fb4355564c083733d638b0d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Apr 2022 22:15:38 +0530 Subject: [PATCH] [telegram] Fix metadata extraction Closes #3528 --- yt_dlp/extractor/telegram.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 2dfa261e9..bb9ca8c45 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import clean_html, get_element_by_class class TelegramEmbedIE(InfoExtractor): @@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + webpage = self._download_webpage(url, video_id, query={'embed': 0}) + webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') formats = [{ 'url': self._proto_relative_url(self._search_regex( @@ -29,9 +30,12 @@ def _real_extract(self, url): return { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), - 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], webpage, + default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), 'formats': formats, }