[nbcolympics] Update extractor for 2020 olympics (#621)

Fixes: https://github.com/yt-dlp/yt-dlp/issues/617#issuecomment-891834323

Authored by: wesnm
This commit is contained in:
Wes 2021-08-03 23:19:44 -05:00 committed by GitHub
parent 888299e6ca
commit 3e376d183e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -12,6 +12,7 @@
int_or_none,
parse_age_limit,
parse_duration,
RegexNotFoundError,
smuggle_url,
try_get,
unified_timestamp,
@ -460,7 +461,7 @@ def _real_extract(self, url):
class NBCOlympicsIE(InfoExtractor):
IE_NAME = 'nbcolympics'
_VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
_TEST = {
# Geo-restricted to US
@ -483,6 +484,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, display_id)
try:
drupal_settings = self._parse_json(self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings'), display_id)
@ -490,6 +492,10 @@ def _real_extract(self, url):
iframe_url = drupal_settings['vod']['iframe_url']
theplatform_url = iframe_url.replace(
'vplayer.nbcolympics.com', 'player.theplatform.com')
except RegexNotFoundError:
theplatform_url = self._search_regex(
r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
webpage, 'embedding URL', group="embedUrl")
return {
'_type': 'url_transparent',