From 11398b922c0469e4143f72951d3c9c55587ef39d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 30 Sep 2022 15:43:40 +0000 Subject: [PATCH] [extractor/nbc] Add NBCStations extractor (#5077) Closes #4571 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/nbc.py | 172 ++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fa33866df0..76cba4ba22 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1079,6 +1079,7 @@ NBCSportsIE, NBCSportsStreamIE, NBCSportsVPlayerIE, + NBCStationsIE, ) from .ndr import ( NDRIE, diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 910cbedf67..6b482620a7 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -7,14 +7,20 @@ from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..utils import ( + ExtractorError, int_or_none, parse_age_limit, parse_duration, RegexNotFoundError, smuggle_url, + str_or_none, + traverse_obj, try_get, + unified_strdate, unified_timestamp, update_url_query, + url_basename, + variadic, ) @@ -584,3 +590,169 @@ def _real_extract(self, url): 'formats': formats, 'is_live': is_live, } + + +class NBCStationsIE(InfoExtractor): + _DOMAIN_RE = '|'.join(map(re.escape, ( + 'nbcbayarea', 'nbcboston', 'nbcchicago', 'nbcconnecticut', 'nbcdfw', 'nbclosangeles', + 'nbcmiami', 'nbcnewyork', 'nbcphiladelphia', 'nbcsandiego', 'nbcwashington', + 'necn', 'telemundo52', 'telemundoarizona', 'telemundochicago', 'telemundonuevainglaterra', + ))) + _VALID_URL = rf'https?://(?:www\.)?(?P{_DOMAIN_RE})\.com/(?:[^/?#]+/)*(?P[^/?#]+)/?(?:$|[#?])' + + _TESTS = [{ + 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/', + 'md5': '462041d91bd762ef5a38b7d85d6dc18f', + 'info_dict': { + 'id': '2968618', + 'ext': 'mp4', + 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory', + 'description': None, + 'timestamp': 1661135892, + 'upload_date': '20220821', + 'uploader': 'NBC 4', + 'uploader_id': 'KNBC', + 'channel': 'nbclosangeles', + }, + }, { + 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/', + 'md5': '0917dcf7885be1023a9220630d415f67', + 'info_dict': { + 'id': '2247002', + 'ext': 'mp4', + 'title': 'Huracán complica que televidente de Tucson reciba reembolso', + 'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf', + 'timestamp': 1660886507, + 'upload_date': '20220819', + 'uploader': 'Telemundo Arizona', + 'uploader_id': 'KTAZ', + 'channel': 'telemundoarizona', + }, + }] + + _RESOLUTIONS = { + '1080': '1920', + '720': '1280', + '540': '960', + '360': '640', + '234': '416', + } + + def _real_extract(self, url): + channel, video_id = self._match_valid_url(url).group('site', 'id') + webpage = self._download_webpage(url, video_id) + + nbc_data = self._search_json( + r'