yt-dlp/yt_dlp/extractor/audimedia.py

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    parse_iso8601,
)


class AudiMediaIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
        'md5': '79a8b71c46d49042609795ab59779b66',
        'info_dict': {
            'id': '1565',
            'ext': 'mp4',
            'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
            'description': 'md5:60e5d30a78ced725f7b8d34370762941',
            'upload_date': '20151124',
            'timestamp': 1448354940,
            'duration': 74022,
            'view_count': int,
        },
    }, {
        'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)

        raw_payload = self._search_regex([
            r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
            r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
            r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
            r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
            r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
        ], webpage, 'raw payload')
        _, stage_mode, video_id, _ = raw_payload.split('-')

        # TODO: handle s and e stage_mode (live streams and ended live streams)
        if stage_mode not in ('s', 'e'):
            video_data = self._download_json(
                'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
                video_id, query={
                    'embed[]': ['video_versions', 'thumbnail_image'],
                })['results']
            formats = []

            stream_url_hls = video_data.get('stream_url_hls')
            if stream_url_hls:
                formats.extend(self._extract_m3u8_formats(
                    stream_url_hls, video_id, 'mp4',
                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))

            stream_url_hds = video_data.get('stream_url_hds')
            if stream_url_hds:
                formats.extend(self._extract_f4m_formats(
                    stream_url_hds + '?hdcore=3.4.0',
                    video_id, f4m_id='hds', fatal=False))

            for video_version in video_data.get('video_versions', []):
                video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                if not video_version_url:
                    continue
                f = {
                    'url': video_version_url,
                    'width': int_or_none(video_version.get('width')),
                    'height': int_or_none(video_version.get('height')),
                    'abr': int_or_none(video_version.get('audio_bitrate')),
                    'vbr': int_or_none(video_version.get('video_bitrate')),
                }
                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
                if bitrate:
                    f.update({
                        'format_id': f'http-{bitrate}',
                    })
                formats.append(f)

            return {
                'id': video_id,
                'title': video_data['title'],
                'description': video_data.get('subtitle'),
                'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
                'timestamp': parse_iso8601(video_data.get('publication_date')),
                'duration': int_or_none(video_data.get('duration')),
                'view_count': int_or_none(video_data.get('view_count')),
                'formats': formats,
            }
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`from .common import InfoExtractor`
			`from ..utils import (`
			`int_or_none,`
			`parse_iso8601,`
			`)`


			`class AudiMediaIE(InfoExtractor):`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en\|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'`
			`_TESTS = [{`
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`'md5': '79a8b71c46d49042609795ab59779b66',`
			`'info_dict': {`
[audimedia] correct test case id 2015-12-21 17:02:55 -05:00			`'id': '1565',`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`'ext': 'mp4',`
			`'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',`
			`'description': 'md5:60e5d30a78ced725f7b8d34370762941',`
			`'upload_date': '20151124',`
			`'timestamp': 1448354940,`
			`'duration': 74022,`
			`'view_count': int,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`},`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`}, {`
			`'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',`
			`'only_matching': True,`
			`}]`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00
			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`
[audimedia] flake8 2015-12-03 16:25:08 -05:00
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`raw_payload = self._search_regex([`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',`
			`r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',`
			`r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',`
			`r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',`
			`r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',`
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`], webpage, 'raw payload')`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`_, stage_mode, video_id, _ = raw_payload.split('-')`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00
			`# TODO: handle s and e stage_mode (live streams and ended live streams)`
			`if stage_mode not in ('s', 'e'):`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`video_data = self._download_json(`
			`'https://www.audimedia.tv/api/video/v1/videos/' + video_id,`
			`video_id, query={`
			`'embed[]': ['video_versions', 'thumbnail_image'],`
			`})['results']`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`formats = []`

[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`stream_url_hls = video_data.get('stream_url_hls')`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`if stream_url_hls:`
[audimedia] split long lines 2015-12-29 08:53:06 -05:00			`formats.extend(self._extract_m3u8_formats(`
			`stream_url_hls, video_id, 'mp4',`
			`entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`stream_url_hds = video_data.get('stream_url_hds')`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`if stream_url_hds:`
[audimedia] split long lines 2015-12-29 08:53:06 -05:00			`formats.extend(self._extract_f4m_formats(`
			`stream_url_hds + '?hdcore=3.4.0',`
			`video_id, f4m_id='hds', fatal=False))`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`for video_version in video_data.get('video_versions', []):`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`video_version_url = video_version.get('download_url') or video_version.get('stream_url')`
			`if not video_version_url:`
			`continue`
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`f = {`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`'url': video_version_url,`
			`'width': int_or_none(video_version.get('width')),`
			`'height': int_or_none(video_version.get('height')),`
			`'abr': int_or_none(video_version.get('audio_bitrate')),`
			`'vbr': int_or_none(video_version.get('video_bitrate')),`
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`}`
			`bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)`
			`if bitrate:`
			`f.update({`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-11 19:09:58 -04:00			`'format_id': f'http-{bitrate}',`
[audimedia] update _VALID_URL and video_id regex and improve http format_id 2016-03-04 11:55:50 -05:00			`})`
			`formats.append(f)`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00
			`return {`
			`'id': video_id,`
[audimedia] fix extraction(closes #15309) 2018-05-31 07:39:45 -04:00			`'title': video_data['title'],`
			`'description': video_data.get('subtitle'),`
			`'thumbnail': video_data.get('thumbnail_image', {}).get('file'),`
			`'timestamp': parse_iso8601(video_data.get('publication_date')),`
			`'duration': int_or_none(video_data.get('duration')),`
			`'view_count': int_or_none(video_data.get('view_count')),`
[audimedia] Add new extractor(closes #7654) 2015-11-26 15:24:10 -05:00			`'formats': formats,`
			`}`