[Reddit] Add support for 1080p videos (#1682)

Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova
2024-11-21 20:46:36 -05:00 · 2021-11-19 00:48:48 +02:00 · 2021-11-19 00:48:48 +02:00 · e16fefd869
commit e16fefd869
parent c6118ca2cc
3 changed files with 71 additions and 44 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1206,10 +1206,7 @@
    RedBullTVRrnContentIE,
    RedBullIE,
 )
-from .reddit import (
+from .reddit import RedditIE
    RedditIE,
    RedditRIE,
 )
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .rentv import (
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor):
                'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
            }
        },
        {
            # Reddit-hosted video that will redirect and be processed by RedditIE
            # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
            'url': 'https://v.redd.it/zv89llsvexdz',
            'md5': '87f5f02f6c1582654146f830f21f8662',
            'info_dict': {
                'id': 'zv89llsvexdz',
                'ext': 'mp4',
                'timestamp': 1501941939.0,
                'title': 'That small heart attack.',
                'upload_date': '20170805',
                'uploader': 'Antw87'
            }
        },
        {
            # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
            'url': 'https://v.redd.it/33hgok7dfbz71/',
            'md5': '7a1d587940242c9bb3bd6eb320b39258',
            'info_dict': {
                'id': '33hgok7dfbz71',
                'ext': 'mp4',
                'title': "The game Didn't want me to Knife that Guy I guess",
                'uploader': 'paraf1ve',
                'timestamp': 1636788683.0,
                'upload_date': '20211113'
            }
        }
        #
    ]
    def report_following_redirect(self, new_url):
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@ -8,43 +8,11 @@
    try_get,
    unescapeHTML,
    url_or_none,
    traverse_obj
 )
 class RedditIE(InfoExtractor):
    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
    _TEST = {
        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
        'url': 'https://v.redd.it/zv89llsvexdz',
        'md5': '0a070c53eba7ec4534d95a5a1259e253',
        'info_dict': {
            'id': 'zv89llsvexdz',
            'ext': 'mp4',
            'title': 'zv89llsvexdz',
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        formats = self._extract_m3u8_formats(
            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
        formats.extend(self._extract_mpd_formats(
            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
            mpd_id='dash', fatal=False))
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
        }
 class RedditRIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
@ -147,19 +115,53 @@ def add_thumbnail(src):
                for resolution in resolutions:
                    add_thumbnail(resolution)
-        return {
+        info = {
            '_type': 'url_transparent',
            'url': video_url,
            'title': data.get('title'),
            'thumbnails': thumbnails,
            'timestamp': float_or_none(data.get('created_utc')),
            'uploader': data.get('author'),
            'duration': int_or_none(try_get(
                data,
                (lambda x: x['media']['reddit_video']['duration'],
                 lambda x: x['secure_media']['reddit_video']['duration']))),
            'like_count': int_or_none(data.get('ups')),
            'dislike_count': int_or_none(data.get('downs')),
            'comment_count': int_or_none(data.get('num_comments')),
            'age_limit': age_limit,
        }
        # Check if media is hosted on reddit:
        reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False)
        if reddit_video:
            playlist_urls = [
                try_get(reddit_video, lambda x: unescapeHTML(x[y]))
                for y in ('dash_url', 'hls_url')
            ]
            # Update video_id
            display_id = video_id
            video_id = self._search_regex(
                r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'],
                'video_id', default=display_id)
            dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
            hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
            formats = self._extract_m3u8_formats(
                hls_playlist_url, display_id, 'mp4',
                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
            formats.extend(self._extract_mpd_formats(
                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
            self._sort_formats(formats)
            return {
                **info,
                'id': video_id,
                'display_id': display_id,
                'formats': formats,
                'duration': int_or_none(reddit_video.get('duration')),
            }
        # Not hosted on reddit, must continue extraction
        return {
            **info,
            'display_id': video_id,
            '_type': 'url_transparent',
            'url': video_url,
        }