yt-dlp/yt_dlp/extractor/ted.py

import itertools
import re

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    parse_duration,
    str_to_int,
    try_get,
    unified_strdate,
    url_or_none,
)


class TedBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://www\.ted\.com/(?:{type})(?:/lang/[^/#?]+)?/(?P<id>[\w-]+)'

    def _parse_playlist(self, playlist):
        for entry in try_get(playlist, lambda x: x['videos']['nodes'], list):
            if entry.get('__typename') == 'Video' and entry.get('canonicalUrl'):
                yield self.url_result(entry['canonicalUrl'], TedTalkIE.ie_key())


class TedTalkIE(TedBaseIE):
    _VALID_URL = TedBaseIE._VALID_URL_BASE.format(type='talks')
    _TESTS = [{
        'url': 'https://www.ted.com/talks/candace_parker_how_to_break_down_barriers_and_not_accept_limits',
        'md5': '47e82c666d9c3261d4fe74748a90aada',
        'info_dict': {
            'id': '86532',
            'ext': 'mp4',
            'title': 'How to break down barriers and not accept limits',
            'description': 'md5:000707cece219d1e165b11550d612331',
            'view_count': int,
            'tags': ['personal growth', 'equality', 'activism', 'motivation', 'social change', 'sports'],
            'uploader': 'Candace Parker',
            'duration': 676.0,
            'upload_date': '20220114',
            'release_date': '20211201',
            'thumbnail': r're:http.*\.jpg',
        },
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        talk_info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['videoData']
        video_id = talk_info['id']
        player_data = self._parse_json(talk_info.get('playerData'), video_id)

        http_url = None
        formats, subtitles = [], {}
        for format_id, resources in (player_data.get('resources') or {}).items():
            if format_id == 'hls':
                stream_url = url_or_none(try_get(resources, lambda x: x['stream']))
                if not stream_url:
                    continue
                m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                    stream_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
                formats.extend(m3u8_formats)
                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
                continue

            if not isinstance(resources, list):
                continue
            if format_id == 'h264':
                for resource in resources:
                    h264_url = resource.get('file')
                    if not h264_url:
                        continue
                    bitrate = int_or_none(resource.get('bitrate'))
                    formats.append({
                        'url': h264_url,
                        'format_id': f'{format_id}-{bitrate}k',
                        'tbr': bitrate,
                    })
                    if re.search(r'\d+k', h264_url):
                        http_url = h264_url
            elif format_id == 'rtmp':
                streamer = talk_info.get('streamer')
                if not streamer:
                    continue
                formats.extend({
                    'format_id': '{}-{}'.format(format_id, resource.get('name')),
                    'url': streamer,
                    'play_path': resource['file'],
                    'ext': 'flv',
                    'width': int_or_none(resource.get('width')),
                    'height': int_or_none(resource.get('height')),
                    'tbr': int_or_none(resource.get('bitrate')),
                } for resource in resources if resource.get('file'))

        if http_url:
            m3u8_formats = [f for f in formats if f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none']
            for m3u8_format in m3u8_formats:
                bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)
                if not bitrate:
                    continue
                bitrate_url = re.sub(r'\d+k', bitrate, http_url)
                if not self._is_valid_url(
                        bitrate_url, video_id, f'{bitrate} bitrate'):
                    continue
                f = m3u8_format.copy()
                f.update({
                    'url': bitrate_url,
                    'format_id': m3u8_format['format_id'].replace('hls', 'http'),
                    'protocol': 'http',
                })
                if f.get('acodec') == 'none':
                    del f['acodec']
                formats.append(f)

        audio_download = talk_info.get('audioDownload')
        if audio_download:
            formats.append({
                'url': audio_download,
                'format_id': 'audio',
                'vcodec': 'none',
            })

        if not formats:
            external = player_data.get('external') or {}
            service = external.get('service') or ''
            ext_url = external.get('code') if service.lower() == 'youtube' else None
            return self.url_result(ext_url or external['uri'])

        thumbnail = player_data.get('thumb') or self._og_search_property('image', webpage)
        if thumbnail:
            # trim thumbnail resize parameters
            thumbnail = thumbnail.split('?')[0]

        return {
            'id': video_id,
            'title': talk_info.get('title') or self._og_search_title(webpage),
            'uploader': talk_info.get('presenterDisplayName'),
            'thumbnail': thumbnail,
            'description': talk_info.get('description') or self._og_search_description(webpage),
            'subtitles': subtitles,
            'formats': formats,
            'duration': talk_info.get('duration') or parse_duration(self._og_search_property('video:duration', webpage)),
            'view_count': str_to_int(talk_info.get('viewedCount')),
            'upload_date': unified_strdate(talk_info.get('publishedAt')),
            'release_date': unified_strdate(talk_info.get('recordedOn')),
            'tags': try_get(player_data, lambda x: x['targeting']['tag'].split(',')),
        }


class TedSeriesIE(TedBaseIE):
    _VALID_URL = fr'{TedBaseIE._VALID_URL_BASE.format(type=r"series")}(?:#season_(?P<season>\d+))?'
    _TESTS = [{
        'url': 'https://www.ted.com/series/small_thing_big_idea',
        'info_dict': {
            'id': '3',
            'title': 'Small Thing Big Idea',
            'series': 'Small Thing Big Idea',
            'description': 'md5:6869ca52cec661aef72b3e9f7441c55c',
        },
        'playlist_mincount': 16,
    }, {
        'url': 'https://www.ted.com/series/the_way_we_work#season_2',
        'info_dict': {
            'id': '8_2',
            'title': 'The Way We Work Season 2',
            'series': 'The Way We Work',
            'description': 'md5:59469256e533e1a48c4aa926a382234c',
            'season_number': 2,
        },
        'playlist_mincount': 8,
    }]

    def _real_extract(self, url):
        display_id, season = self._match_valid_url(url).group('id', 'season')
        webpage = self._download_webpage(url, display_id, 'Downloading series webpage')
        info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']

        entries = itertools.chain.from_iterable(
            self._parse_playlist(s) for s in info['seasons'] if season in [None, s.get('seasonNumber')])

        series_id = try_get(info, lambda x: x['series']['id'])
        series_name = try_get(info, lambda x: x['series']['name']) or self._og_search_title(webpage, fatal=False)

        return self.playlist_result(
            entries,
            f'{series_id}_{season}' if season and series_id else series_id,
            f'{series_name} Season {season}' if season else series_name,
            self._og_search_description(webpage),
            series=series_name, season_number=int_or_none(season))


class TedPlaylistIE(TedBaseIE):
    _VALID_URL = TedBaseIE._VALID_URL_BASE.format(type=r'playlists(?:/\d+)?')
    _TESTS = [{
        'url': 'https://www.ted.com/playlists/171/the_most_popular_talks_of_all',
        'info_dict': {
            'id': '171',
            'title': 'The most popular talks of all time',
            'description': 'md5:d2f22831dc86c7040e733a3cb3993d78',
        },
        'playlist_mincount': 25,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        playlist = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['playlist']

        return self.playlist_result(
            self._parse_playlist(playlist), playlist.get('id'),
            playlist.get('title') or self._og_search_title(webpage, default='').replace(' | TED Talks', '') or None,
            self._og_search_description(webpage))


class TedEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://embed(?:-ssl)?\.ted\.com/'
    _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL}.+?)\1']

    _TESTS = [{
        'url': 'https://embed.ted.com/talks/janet_stovall_how_to_get_serious_about_diversity_and_inclusion_in_the_workplace',
        'info_dict': {
            'id': '21802',
            'ext': 'mp4',
            'title': 'How to get serious about diversity and inclusion in the workplace',
            'description': 'md5:0978aafe396e05341f8ecc795d22189d',
            'view_count': int,
            'tags': list,
            'uploader': 'Janet Stovall',
            'duration': 664.0,
            'upload_date': '20180822',
            'release_date': '20180719',
            'thumbnail': r're:http.*\.jpg',
        },
    }]

    def _real_extract(self, url):
        return self.url_result(re.sub(r'://embed(-ssl)?', '://www', url), TedTalkIE.ie_key())
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`import itertools`
Move TED IE into its own file 2013-06-23 15:55:53 -04:00			`import re`

Improve subtitles support For each language the extractor builds a list with the available formats sorted (like for video formats), then YoutubeDL selects one of them using the '--sub-format' option which now allows giving the format preferences (for example 'ass/srt/best'). For each format the 'url' field can be set so that we only download the contents if needed, or if the contents needs to be processed (like in crunchyroll) the 'data' field can be used. The reasons for this change are: * We weren't checking that the format given with '--sub-format' was available, checking it in each extractor would be repetitive. * It allows to easily support giving a format preference. * The subtitles were automatically downloaded in the extractor, but I think that if you use for example the '--dump-json' option you want to finish as fast as possible. Currently only the ted extractor has been updated, but the old system still works. 2015-02-15 12:03:41 -05:00			`from .common import InfoExtractor`
[ted] Fix extraction (closes #13535)) 2017-07-01 07:39:01 -04:00			`from ..utils import (`
			`int_or_none,`
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> 2024-05-26 15:27:21 -04:00			`parse_duration,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`str_to_int,`
[ted] Fix extraction (closes #13535)) 2017-07-01 07:39:01 -04:00			`try_get,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`unified_strdate,`
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> 2024-05-26 15:27:21 -04:00			`url_or_none,`
[ted] Fix extraction (closes #13535)) 2017-07-01 07:39:01 -04:00			`)`
[ted] fixed error in case of no subtitles present I created a test, but I leave it commented since TED videos get new subtitles frequently. 2013-11-05 06:00:13 -05:00
[ted] Use unicode_literals 2014-01-16 21:52:17 -05:00
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`class TedBaseIE(InfoExtractor):`
			`_VALID_URL_BASE = r'https?://www\.ted\.com/(?:{type})(?:/lang/[^/#?]+)?/(?P<id>[\w-]+)'`

			`def _parse_playlist(self, playlist):`
			`for entry in try_get(playlist, lambda x: x['videos']['nodes'], list):`
			`if entry.get('__typename') == 'Video' and entry.get('canonicalUrl'):`
			`yield self.url_result(entry['canonicalUrl'], TedTalkIE.ie_key())`


			`class TedTalkIE(TedBaseIE):`
			`_VALID_URL = TedBaseIE._VALID_URL_BASE.format(type='talks')`
[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00			`_TESTS = [{`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'url': 'https://www.ted.com/talks/candace_parker_how_to_break_down_barriers_and_not_accept_limits',`
			`'md5': '47e82c666d9c3261d4fe74748a90aada',`
[ted] Fix extraction for videos without nativeDownloads (closes #16756, closes #17085) 2018-07-28 10:26:23 -04:00			`'info_dict': {`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'id': '86532',`
[ted] Fix extraction for videos without nativeDownloads (closes #16756, closes #17085) 2018-07-28 10:26:23 -04:00			`'ext': 'mp4',`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'title': 'How to break down barriers and not accept limits',`
			`'description': 'md5:000707cece219d1e165b11550d612331',`
[ted] Improve extraction and update tests 2018-07-28 11:09:53 -04:00			`'view_count': int,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'tags': ['personal growth', 'equality', 'activism', 'motivation', 'social change', 'sports'],`
			`'uploader': 'Candace Parker',`
			`'duration': 676.0,`
			`'upload_date': '20220114',`
			`'release_date': '20211201',`
			`'thumbnail': r're:http.*\.jpg',`
[ted] Fix extraction for videos without nativeDownloads (closes #16756, closes #17085) 2018-07-28 10:26:23 -04:00			`},`
[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00			`}]`
Move TED IE into its own file 2013-06-23 15:55:53 -04:00
			`def _real_extract(self, url):`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`
			`talk_info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['videoData']`
			`video_id = talk_info['id']`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`player_data = self._parse_json(talk_info.get('playerData'), video_id)`
[ted] Fix extraction (closes #13535)) 2017-07-01 07:39:01 -04:00
[ted] extract all http formats 2016-04-30 10:41:22 -04:00			`http_url = None`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`formats, subtitles = [], {}`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`for format_id, resources in (player_data.get('resources') or {}).items():`
[ted] check for resources validity and extract subtitled downloads(closes #22513) 2019-09-26 06:44:57 -04:00			`if format_id == 'hls':`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`stream_url = url_or_none(try_get(resources, lambda x: x['stream']))`
[ted] Improve extraction and update tests 2018-07-28 11:09:53 -04:00			`if not stream_url:`
			`continue`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(`
			`stream_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)`
			`formats.extend(m3u8_formats)`
			`subtitles = self._merge_subtitles(subtitles, m3u8_subs)`
			`continue`

			`if not isinstance(resources, list):`
			`continue`
			`if format_id == 'h264':`
			`for resource in resources:`
			`h264_url = resource.get('file')`
			`if not h264_url:`
[ted] check for resources validity and extract subtitled downloads(closes #22513) 2019-09-26 06:44:57 -04:00			`continue`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`bitrate = int_or_none(resource.get('bitrate'))`
			`formats.append({`
			`'url': h264_url,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'format_id': f'{format_id}-{bitrate}k',`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'tbr': bitrate,`
			`})`
			`if re.search(r'\d+k', h264_url):`
			`http_url = h264_url`
			`elif format_id == 'rtmp':`
			`streamer = talk_info.get('streamer')`
			`if not streamer:`
			`continue`
			`formats.extend({`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'format_id': '{}-{}'.format(format_id, resource.get('name')),`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'url': streamer,`
			`'play_path': resource['file'],`
			`'ext': 'flv',`
			`'width': int_or_none(resource.get('width')),`
			`'height': int_or_none(resource.get('height')),`
			`'tbr': int_or_none(resource.get('bitrate')),`
			`} for resource in resources if resource.get('file'))`
[ted] extract all http formats 2016-04-30 10:41:22 -04:00
			`if http_url:`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`m3u8_formats = [f for f in formats if f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none']`
[ted] extract all http formats 2016-04-30 10:41:22 -04:00			`for m3u8_format in m3u8_formats:`
			`bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)`
			`if not bitrate:`
			`continue`
[ted] Improve extraction and update tests 2018-07-28 11:09:53 -04:00			`bitrate_url = re.sub(r'\d+k', bitrate, http_url)`
			`if not self._is_valid_url(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`bitrate_url, video_id, f'{bitrate} bitrate'):`
[ted] Improve extraction and update tests 2018-07-28 11:09:53 -04:00			`continue`
[ted] extract all http formats 2016-04-30 10:41:22 -04:00			`f = m3u8_format.copy()`
			`f.update({`
[ted] Improve extraction and update tests 2018-07-28 11:09:53 -04:00			`'url': bitrate_url,`
[ted] extract all http formats 2016-04-30 10:41:22 -04:00			`'format_id': m3u8_format['format_id'].replace('hls', 'http'),`
			`'protocol': 'http',`
			`})`
[ted] correct acodec for http formats(#18923) 2019-01-19 15:25:53 -05:00			`if f.get('acodec') == 'none':`
			`del f['acodec']`
[ted] extract all http formats 2016-04-30 10:41:22 -04:00			`formats.append(f)`
[ted] Extract all formats (Closes #5397) 2015-04-10 13:36:28 -04:00
			`audio_download = talk_info.get('audioDownload')`
			`if audio_download:`
			`formats.append({`
			`'url': audio_download,`
			`'format_id': 'audio',`
[ted] Clarify audio/video-only formats 2015-04-20 11:42:20 -04:00			`'vcodec': 'none',`
[ted] Extract all formats (Closes #5397) 2015-04-10 13:36:28 -04:00			`})`

Update to ytdl-commit-d495292 [ard] Relax _VALID_URL and fix video ids https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf Closes #357 2021-05-31 17:07:01 -04:00			`if not formats:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`external = player_data.get('external') or {}`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`service = external.get('service') or ''`
			`ext_url = external.get('code') if service.lower() == 'youtube' else None`
			`return self.url_result(ext_url or external['uri'])`
Update to ytdl-commit-d495292 [ard] Relax _VALID_URL and fix video ids https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf Closes #357 2021-05-31 17:07:01 -04:00
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`thumbnail = player_data.get('thumb') or self._og_search_property('image', webpage)`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`if thumbnail:`
			`# trim thumbnail resize parameters`
			`thumbnail = thumbnail.split('?')[0]`
[ted] Added support for subtitle download 2013-11-02 14:48:39 -04:00
[ted] simplify 2013-11-15 08:06:38 -05:00			`return {`
[ted] Added support for subtitle download 2013-11-02 14:48:39 -04:00			`'id': video_id,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'title': talk_info.get('title') or self._og_search_title(webpage),`
			`'uploader': talk_info.get('presenterDisplayName'),`
			`'thumbnail': thumbnail,`
			`'description': talk_info.get('description') or self._og_search_description(webpage),`
			`'subtitles': subtitles,`
[ted] Prepare #980 merge 2013-10-04 04:32:34 -04:00			`'formats': formats,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`'duration': talk_info.get('duration') or parse_duration(self._og_search_property('video:duration', webpage)),`
			`'view_count': str_to_int(talk_info.get('viewedCount')),`
			`'upload_date': unified_strdate(talk_info.get('publishedAt')),`
			`'release_date': unified_strdate(talk_info.get('recordedOn')),`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'tags': try_get(player_data, lambda x: x['targeting']['tag'].split(',')),`
[ted] Prepare #980 merge 2013-10-04 04:32:34 -04:00			`}`

[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`class TedSeriesIE(TedBaseIE):`
			`_VALID_URL = fr'{TedBaseIE._VALID_URL_BASE.format(type=r"series")}(?:#season_(?P<season>\d+))?'`
			`_TESTS = [{`
			`'url': 'https://www.ted.com/series/small_thing_big_idea',`
			`'info_dict': {`
			`'id': '3',`
			`'title': 'Small Thing Big Idea',`
			`'series': 'Small Thing Big Idea',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'description': 'md5:6869ca52cec661aef72b3e9f7441c55c',`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`},`
			`'playlist_mincount': 16,`
			`}, {`
			`'url': 'https://www.ted.com/series/the_way_we_work#season_2',`
			`'info_dict': {`
			`'id': '8_2',`
			`'title': 'The Way We Work Season 2',`
			`'series': 'The Way We Work',`
			`'description': 'md5:59469256e533e1a48c4aa926a382234c',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'season_number': 2,`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`},`
			`'playlist_mincount': 8,`
			`}]`
[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`def _real_extract(self, url):`
			`display_id, season = self._match_valid_url(url).group('id', 'season')`
			`webpage = self._download_webpage(url, display_id, 'Downloading series webpage')`
			`info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']`
[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`entries = itertools.chain.from_iterable(`
			`self._parse_playlist(s) for s in info['seasons'] if season in [None, s.get('seasonNumber')])`
[ted] Add support for watch/ URLs (Fixes #2637) 2014-03-26 21:22:40 -04:00
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`series_id = try_get(info, lambda x: x['series']['id'])`
			`series_name = try_get(info, lambda x: x['series']['name']) or self._og_search_title(webpage, fatal=False)`

			`return self.playlist_result(`
			`entries,`
			`f'{series_id}_{season}' if season and series_id else series_id,`
			`f'{series_name} Season {season}' if season else series_name,`
			`self._og_search_description(webpage),`
			`series=series_name, season_number=int_or_none(season))`


			`class TedPlaylistIE(TedBaseIE):`
			`_VALID_URL = TedBaseIE._VALID_URL_BASE.format(type=r'playlists(?:/\d+)?')`
			`_TESTS = [{`
			`'url': 'https://www.ted.com/playlists/171/the_most_popular_talks_of_all',`
			`'info_dict': {`
			`'id': '171',`
			`'title': 'The most popular talks of all time',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'description': 'md5:d2f22831dc86c7040e733a3cb3993d78',`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00			`},`
			`'playlist_mincount': 25,`
			`}]`

			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`
			`playlist = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['playlist']`

			`return self.playlist_result(`
			`self._parse_playlist(playlist), playlist.get('id'),`
			`playlist.get('title') or self._og_search_title(webpage, default='').replace(' \| TED Talks', '') or None,`
			`self._og_search_description(webpage))`


			`class TedEmbedIE(InfoExtractor):`
			`_VALID_URL = r'https?://embed(?:-ssl)?\.ted\.com/'`
[extractors] Use new framework for existing embeds (#4307) `Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated 2022-07-31 21:23:25 -04:00			`_EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>{_VALID_URL}.+?)\1']`
[Ted] Rewrite extractor (#2359) Closes #2343 Authored by: pukkandan, trassshhub 2022-01-19 11:04:20 -05:00
			`_TESTS = [{`
			`'url': 'https://embed.ted.com/talks/janet_stovall_how_to_get_serious_about_diversity_and_inclusion_in_the_workplace',`
			`'info_dict': {`
			`'id': '21802',`
			`'ext': 'mp4',`
			`'title': 'How to get serious about diversity and inclusion in the workplace',`
			`'description': 'md5:0978aafe396e05341f8ecc795d22189d',`
			`'view_count': int,`
			`'tags': list,`
			`'uploader': 'Janet Stovall',`
			`'duration': 664.0,`
			`'upload_date': '20180822',`
			`'release_date': '20180719',`
			`'thumbnail': r're:http.*\.jpg',`
			`},`
			`}]`

			`def _real_extract(self, url):`
			`return self.url_result(re.sub(r'://embed(-ssl)?', '://www', url), TedTalkIE.ie_key())`