Update to ytdl-commit-d495292

[ard] Relax _VALID_URL and fix video ids d495292852 Closes #357
2024-11-21 20:46:36 -05:00 · 2021-06-01 02:37:01 +05:30 · 2021-06-01 02:37:01 +05:30 · 14eb1ee1cb
commit 14eb1ee1cb
parent 879e7199bb
4 changed files with 54 additions and 23 deletions
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@ -290,14 +290,14 @@ def _real_extract(self, url):
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-?(?:video-?)?(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
    _TESTS = [{
        # available till 7.01.2022
        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
        'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
        'info_dict': {
-            'display_id': 'maischberger-die-woche',
+            'id': 'maischberger-die-woche-video100',
-            'id': '100',
+            'display_id': 'maischberger-die-woche-video100',
            'ext': 'mp4',
            'duration': 3687.0,
            'title': 'maischberger. die woche vom 7. Januar 2021',
@ -305,7 +305,10 @@ class ARDIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
-        'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+        'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
        'only_matching': True,
    }, {
        'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
        'only_matching': True,
    }, {
        'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html',
@ -313,11 +316,17 @@ class ARDIE(InfoExtractor):
    }, {
        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
        'only_matching': True,
    }, {
        'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
        'only_matching': True,
    }, {
        'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('id')
        player_url = mobj.group('mainurl') + '~playerXml.xml'
        doc = self._download_xml(player_url, display_id)
@ -368,7 +377,7 @@ def _real_extract(self, url):
        self._sort_formats(formats)
        return {
-            'id': mobj.group('id'),
+            'id': xpath_text(video_node, './videoId', default=display_id),
            'formats': formats,
            'display_id': display_id,
            'title': video_node.find('./title').text,
--- a/yt_dlp/extractor/ted.py
+++ b/yt_dlp/extractor/ted.py
@ -123,6 +123,10 @@ class TEDIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
    }, {
        # with own formats and private Youtube external
        'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity',
        'only_matching': True,
    }]
    _NATIVE_FORMATS = {
@ -210,16 +214,6 @@ def _talk_info(self, url, video_name):
        player_talk = talk_info['player_talks'][0]
        external = player_talk.get('external')
        if isinstance(external, dict):
            service = external.get('service')
            if isinstance(service, compat_str):
                ext_url = None
                if service.lower() == 'youtube':
                    ext_url = external.get('code')
                return self.url_result(ext_url or external['uri'])
        resources_ = player_talk.get('resources') or talk_info.get('resources')
        http_url = None
@ -294,6 +288,16 @@ def _talk_info(self, url, video_name):
                'vcodec': 'none',
            })
        if not formats:
            external = player_talk.get('external')
            if isinstance(external, dict):
                service = external.get('service')
                if isinstance(service, compat_str):
                    ext_url = None
                    if service.lower() == 'youtube':
                        ext_url = external.get('code')
                    return self.url_result(ext_url or external['uri'])
        self._sort_formats(formats)
        video_id = compat_str(talk_info['id'])
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@ -49,6 +49,7 @@ class TwitchBaseIE(InfoExtractor):
        'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
        'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
        'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
        'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
        'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
        'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
    }
@ -893,7 +894,25 @@ class TwitchClipsIE(TwitchBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        clip = self._download_base_gql(
+        clip = self._download_gql(
            video_id, [{
                'operationName': 'VideoAccessToken_Clip',
                'variables': {
                    'slug': video_id,
                },
            }],
            'Downloading clip access token GraphQL')[0]['data']['clip']
        if not clip:
            raise ExtractorError(
                'This clip is no longer available', expected=True)
        access_query = {
            'sig': clip['playbackAccessToken']['signature'],
            'token': clip['playbackAccessToken']['value'],
        }
        data = self._download_base_gql(
            video_id, {
                'query': '''{
  clip(slug: "%s") {
@ -918,11 +937,10 @@ def _real_extract(self, url):
    }
    viewCount
  }
-}''' % video_id}, 'Downloading clip GraphQL')['data']['clip']
+}''' % video_id}, 'Downloading clip GraphQL', fatal=False)
-        if not clip:
+        if data:
-            raise ExtractorError(
+            clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
                'This clip is no longer available', expected=True)
        formats = []
        for option in clip.get('videoQualities', []):
@ -932,7 +950,7 @@ def _real_extract(self, url):
            if not source:
                continue
            formats.append({
-                'url': source,
+                'url': update_url_query(source, access_query),
                'format_id': option.get('quality'),
                'height': int_or_none(option.get('quality')),
                'fps': int_or_none(option.get('frameRate')),
--- a/yt_dlp/extractor/ustream.py
+++ b/yt_dlp/extractor/ustream.py
@ -75,7 +75,7 @@ class UstreamIE(InfoExtractor):
    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
        if mobj is not None:
            return mobj.group('url')