From bb36a55c415bc3538c841080f1e2366fb361c4a1 Mon Sep 17 00:00:00 2001 From: nikhil Date: Thu, 29 Jul 2021 11:31:27 -0400 Subject: [PATCH] [nbcolympics:stream] Fix extractor PR: https://github.com/ytdl-org/youtube-dl/pull/29688 Closes: #617, https://github.com/ytdl-org/youtube-dl/issues/29665 * Livestreams are untested * If using ffmpeg as downloader, v4.3+ is needed since `-http_seekable` option is necessary * Instead of making a seperate key for each arg that needs to be passed to ffmpeg, I made `_ffmpeg_args` * This deprecates `_seekable`, but the option is kept for compatibility Authored by: nchilada, pukkandan --- yt_dlp/downloader/external.py | 3 ++ yt_dlp/extractor/adobepass.py | 4 +- yt_dlp/extractor/nbc.py | 96 ++++++++++++++++++++++++----------- yt_dlp/extractor/turner.py | 2 +- 4 files changed, 72 insertions(+), 33 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 8c8d838e1..f91e10599 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -382,6 +382,9 @@ def _call_downloader(self, tmpfilename, info_dict): if not self.params.get('verbose'): args += ['-hide_banner'] + args += info_dict.get('_ffmpeg_args', []) + + # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 9ac1d7793..3a8dc5b7d 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1581,7 +1581,7 @@ def extract_redirect_url(html, url=None, fatal=False): hidden_data['history'] = 1 provider_login_page_res = self._download_webpage_handle( - urlh.geturl(), video_id, 'Sending first bookend.', + urlh.geturl(), video_id, 'Sending first bookend', query=hidden_data) provider_association_redirect, urlh = post_form( @@ -1600,7 +1600,7 @@ def extract_redirect_url(html, url=None, fatal=False): hidden_data['history'] = 3 mvpd_confirm_page_res = self._download_webpage_handle( - urlh.geturl(), video_id, 'Sending final bookend.', + urlh.geturl(), video_id, 'Sending final bookend', query=hidden_data) post_form(mvpd_confirm_page_res, 'Confirming Login') diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 8c63cf818..5ebb1c869 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -508,43 +508,79 @@ def _real_extract(self, url): class NBCOlympicsStreamIE(AdobePassIE): IE_NAME = 'nbcolympics:stream' _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P[0-9a-z-]+)' - _TEST = { - 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8', - 'info_dict': { - 'id': '203493', - 'ext': 'mp4', - 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + _TESTS = [ + { + 'note': 'Tokenized m3u8 source URL', + 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11', + 'info_dict': { + 'id': '2019740', + 'ext': 'mp4', + 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$", + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'note': 'Plain m3u8 source URL', + 'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars', + 'info_dict': { + 'id': '2021729', + 'ext': 'mp4', + 'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + 'skip_download': 'm3u8', + }, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json' + ] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid') - resource = self._search_regex( - r"resource\s*=\s*'(.+)';", webpage, - 'resource').replace("' + pid + '", pid) + event_config = self._download_json( - self._DATA_URL_TEMPLATE % ('event_config', pid), - pid)['eventConfig'] - title = self._live_title(event_config['eventTitle']) + f'http://stream.nbcolympics.com/data/event_config_{pid}.json', + pid, 'Downloading event config')['eventConfig'] + + title = event_config['eventTitle'] + is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus')) + if is_live: + title = self._live_title(title) + source_url = self._download_json( - self._DATA_URL_TEMPLATE % ('live_sources', pid), - pid)['videoSources'][0]['sourceUrl'] - media_token = self._extract_mvpd_auth( - url, pid, event_config.get('requestorId', 'NBCOlympics'), resource) - formats = self._extract_m3u8_formats(self._download_webpage( - 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={ - 'cdn': 'akamai', - 'mediaToken': base64.b64encode(media_token.encode()), - 'resource': base64.b64encode(resource.encode()), - 'url': source_url, - }), pid, 'mp4') + f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging', + pid, 'Downloading leap config' + )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl'] + + if event_config.get('cdnToken'): + ap_resource = self._get_mvpd_resource( + event_config.get('resourceId', 'NBCOlympics'), + re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid, + event_config.get('ratingId', 'NO VALUE')) + media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource) + + source_url = self._download_json( + 'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL', + data=json.dumps({ + 'application': 'NBCSports', + 'authentication-type': 'adobe-pass', + 'cdn': 'akamai', + 'pid': pid, + 'platform': 'desktop', + 'requestorId': 'NBCOlympics', + 'resourceId': base64.b64encode(ap_resource.encode()).decode(), + 'token': base64.b64encode(media_token.encode()).decode(), + 'url': source_url, + 'version': 'v1', + }).encode(), + )['akamai'][0]['tokenizedUrl'] + + formats = self._extract_m3u8_formats(source_url, pid, 'mp4', live=is_live) + for f in formats: + # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to + # download with ffmpeg without this option + f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0'] self._sort_formats(formats) return { @@ -552,5 +588,5 @@ def _real_extract(self, url): 'display_id': display_id, 'title': title, 'formats': formats, - 'is_live': True, + 'is_live': is_live, } diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 3d7a348b0..32125bc79 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -144,7 +144,7 @@ def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal= m3u8_id=format_id or 'hls', fatal=False) if '/secure/' in video_url and '?hdnea=' in video_url: for f in m3u8_formats: - f['_seekable'] = False + f['_ffmpeg_args'] = ['-seekable', '0'] formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats(