[ceskatelevize] Fetch iframe from nextJS data (#1904)

Closes #1899
Authored by: mkubecek
This commit is contained in:
Michal Kubeček 2021-12-07 17:44:43 +01:00 committed by GitHub
parent 66f4c04e50
commit 443b21dc4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -12,8 +12,7 @@
ExtractorError,
float_or_none,
sanitized_Request,
unescapeHTML,
update_url_query,
traverse_obj,
urlencode_postdata,
USER_AGENTS,
)
@ -99,11 +98,13 @@ def _real_extract(self, url):
playlist_description = playlist_description.replace('\xa0', ' ')
if parsed_url.path.startswith('/porady/'):
refer_url = update_url_query(unescapeHTML(self._search_regex(
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
webpage, 'iframe player url', group='url')), query={'autoStart': 'true'})
webpage = self._download_webpage(refer_url, playlist_id)
next_data = self._search_nextjs_data(webpage, playlist_id)
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
if not idec:
raise ExtractorError('Failed to find IDEC id')
iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage: