[ie/massengeschmack.tv] Fix title extraction (#7813)

Authored by: sb0stn
This commit is contained in:
Sebastian Koch 2023-09-17 22:54:00 +02:00 committed by GitHub
parent 63e0c5748c
commit 81f46ac573
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -17,11 +17,12 @@ class MassengeschmackTVIE(InfoExtractor):
_TEST = {
'url': 'https://massengeschmack.tv/play/fktv202',
'md5': 'a9e054db9c2b5a08f0a0527cc201e8d3',
'md5': '9996f314994a49fefe5f39aa1b07ae21',
'info_dict': {
'id': 'fktv202',
'ext': 'mp4',
'title': 'Fernsehkritik-TV - Folge 202',
'title': 'Fernsehkritik-TV #202',
'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg'
},
}
@ -29,9 +30,6 @@ def _real_extract(self, url):
episode = self._match_id(url)
webpage = self._download_webpage(url, episode)
title = clean_html(self._html_search_regex(
'<h3>([^<]+)</h3>', webpage, 'title'))
thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
formats = []
@ -67,7 +65,8 @@ def _real_extract(self, url):
return {
'id': episode,
'title': title,
'title': clean_html(self._html_search_regex(
r'<span[^>]+\bid=["\']clip-title["\'][^>]*>([^<]+)', webpage, 'title', fatal=False)),
'formats': formats,
'thumbnail': thumbnail,
'thumbnail': self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False),
}