[telegram] Fix metadata extraction

Closes #3528
This commit is contained in:
pukkandan 2022-04-23 22:15:38 +05:30
parent b0f636beb4
commit 90f4229409
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -1,4 +1,5 @@
from .common import InfoExtractor
from ..utils import clean_html, get_element_by_class
class TelegramEmbedIE(InfoExtractor):
@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
webpage = self._download_webpage(url, video_id, query={'embed': 0})
webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page')
formats = [{
'url': self._proto_relative_url(self._search_regex(
@ -29,9 +30,12 @@ def _real_extract(self, url):
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['og:description', 'twitter:description'], webpage,
default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))),
'thumbnail': self._search_regex(
r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'formats': formats,
}