[ted] check for resources validity and extract subtitled downloads(closes #22513)

2024-11-21 20:46:36 -05:00 · 2019-09-26 11:44:57 +01:00 · 2019-09-26 11:44:57 +01:00 · 2a88a0c44d
commit 2a88a0c44d
parent 33c1c7d80f
1 changed files with 47 additions and 35 deletions
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@ -182,20 +182,29 @@ def _talk_info(self, url, video_name):
        title = talk_info['title'].strip()
-        native_downloads = try_get(
+        downloads = talk_info.get('downloads') or {}
-            talk_info,
+        native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
            (lambda x: x['downloads']['nativeDownloads'],
             lambda x: x['nativeDownloads']),
            dict) or {}
        formats = [{
            'url': format_url,
            'format_id': format_id,
            'format': format_id,
        } for (format_id, format_url) in native_downloads.items() if format_url is not None]
        subtitled_downloads = downloads.get('subtitledDownloads') or {}
        for lang, subtitled_download in subtitled_downloads.items():
            for q in self._NATIVE_FORMATS:
                q_url = subtitled_download.get(q)
                if not q_url:
                    continue
                formats.append({
                    'url': q_url,
                    'format_id': '%s-%s' % (q, lang),
                    'language': lang,
                })
        if formats:
            for f in formats:
-                finfo = self._NATIVE_FORMATS.get(f['format_id'])
+                finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
                if finfo:
                    f.update(finfo)
@ -215,34 +224,7 @@ def _talk_info(self, url, video_name):
        http_url = None
        for format_id, resources in resources_.items():
-            if format_id == 'h264':
+            if format_id == 'hls':
                for resource in resources:
                    h264_url = resource.get('file')
                    if not h264_url:
                        continue
                    bitrate = int_or_none(resource.get('bitrate'))
                    formats.append({
                        'url': h264_url,
                        'format_id': '%s-%sk' % (format_id, bitrate),
                        'tbr': bitrate,
                    })
                    if re.search(r'\d+k', h264_url):
                        http_url = h264_url
            elif format_id == 'rtmp':
                streamer = talk_info.get('streamer')
                if not streamer:
                    continue
                for resource in resources:
                    formats.append({
                        'format_id': '%s-%s' % (format_id, resource.get('name')),
                        'url': streamer,
                        'play_path': resource['file'],
                        'ext': 'flv',
                        'width': int_or_none(resource.get('width')),
                        'height': int_or_none(resource.get('height')),
                        'tbr': int_or_none(resource.get('bitrate')),
                    })
            elif format_id == 'hls':
                if not isinstance(resources, dict):
                    continue
                stream_url = url_or_none(resources.get('stream'))
@ -251,6 +233,36 @@ def _talk_info(self, url, video_name):
                formats.extend(self._extract_m3u8_formats(
                    stream_url, video_name, 'mp4', m3u8_id=format_id,
                    fatal=False))
            else:
                if not isinstance(resources, list):
                    continue
                if format_id == 'h264':
                    for resource in resources:
                        h264_url = resource.get('file')
                        if not h264_url:
                            continue
                        bitrate = int_or_none(resource.get('bitrate'))
                        formats.append({
                            'url': h264_url,
                            'format_id': '%s-%sk' % (format_id, bitrate),
                            'tbr': bitrate,
                        })
                        if re.search(r'\d+k', h264_url):
                            http_url = h264_url
                elif format_id == 'rtmp':
                    streamer = talk_info.get('streamer')
                    if not streamer:
                        continue
                    for resource in resources:
                        formats.append({
                            'format_id': '%s-%s' % (format_id, resource.get('name')),
                            'url': streamer,
                            'play_path': resource['file'],
                            'ext': 'flv',
                            'width': int_or_none(resource.get('width')),
                            'height': int_or_none(resource.get('height')),
                            'tbr': int_or_none(resource.get('bitrate')),
                        })
        m3u8_formats = list(filter(
            lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',