From 443b21dc4e499286df3852f88fbd48e7f61b1f6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 7 Dec 2021 17:44:43 +0100 Subject: [PATCH] [ceskatelevize] Fetch iframe from nextJS data (#1904) Closes #1899 Authored by: mkubecek --- yt_dlp/extractor/ceskatelevize.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index f766dfbb7..6ca2f38b5 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -12,8 +12,7 @@ ExtractorError, float_or_none, sanitized_Request, - unescapeHTML, - update_url_query, + traverse_obj, urlencode_postdata, USER_AGENTS, ) @@ -99,11 +98,13 @@ def _real_extract(self, url): playlist_description = playlist_description.replace('\xa0', ' ') if parsed_url.path.startswith('/porady/'): - refer_url = update_url_query(unescapeHTML(self._search_regex( - (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={'autoStart': 'true'}) - webpage = self._download_webpage(refer_url, playlist_id) + next_data = self._search_nextjs_data(webpage, playlist_id) + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + raise ExtractorError('Failed to find IDEC id') + iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) + webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, + query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s

' % NOT_AVAILABLE_STRING in webpage: