[9gag] Add extractor

2024-11-30 10:29:57 -05:00 · 2013-12-05 14:29:08 +01:00 · 2013-12-05 14:29:08 +01:00 · 7fc3fa0545
commit 7fc3fa0545
parent 29030c0a4c
3 changed files with 73 additions and 3 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -405,7 +405,8 @@ def add_extra_info(info_dict, extra_info):
        for key, value in extra_info.items():
            info_dict.setdefault(key, value)
-    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+    def extract_info(self, url, download=True, ie_key=None, extra_info={},
                     process=True):
        '''
        Returns a list with a dictionary for each video we find.
        If 'download', also downloads the videos.
@ -441,7 +442,10 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}):
                        'webpage_url': url,
                        'extractor_key': ie.ie_key(),
                    })
                if process:
                    return self.process_ie_result(ie_result, download, extra_info)
                else:
                    return ie_result
            except ExtractorError as de: # An error we somewhat expected
                self.report_error(compat_str(de), de.format_traceback())
                break
@ -474,8 +478,32 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
                                     download,
                                     ie_key=ie_result.get('ie_key'),
                                     extra_info=extra_info)
-        elif result_type == 'playlist':
+        elif result_type == 'url_transparent':
            # Use the information from the embedding page
            info = self.extract_info(
                ie_result['url'], ie_key=ie_result.get('ie_key'),
                extra_info=extra_info, download=False, process=False)
            def make_result(embedded_info):
                new_result = ie_result.copy()
                for f in ('_type', 'url', 'ext', 'player_url', 'formats',
                          'entries', 'urlhandle', 'ie_key', 'duration',
                          'subtitles', 'annotations', 'format'):
                    if f in new_result:
                        del new_result[f]
                    if f in embedded_info:
                        new_result[f] = embedded_info[f]
                return new_result
            new_result = make_result(info)
            assert new_result.get('_type') != 'url_transparent'
            if new_result.get('_type') == 'compat_list':
                new_result['entries'] = [
                    make_result(e) for e in new_result['entries']]
            return self.process_ie_result(
                new_result, download=download, extra_info=extra_info)
        elif result_type == 'playlist':
            # We process each entry in the playlist
            playlist = ie_result.get('title', None) or ie_result.get('id', None)
            self.to_screen(u'[download] Downloading playlist: %s' % playlist)
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -102,6 +102,7 @@
 from .newgrounds import NewgroundsIE
 from .nhl import NHLIE, NHLVideocenterIE
 from .niconico import NiconicoIE
 from .ninegag import NineGagIE
 from .nowvideo import NowVideoIE
 from .ooyala import OoyalaIE
 from .orf import ORFIE
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dl/extractor/ninegag.py
@ -0,0 +1,41 @@
 import json
 import re
 from .common import InfoExtractor
 class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
    _VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
    _TEST = {
        u"url": u"http://9gag.tv/v/1912",
        u"file": u"1912.mp4",
        u"info_dict": {
            u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
            u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
        },
        u'add_ie': [u'Youtube']
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        data_json = self._html_search_regex(r'''(?x)
            <div\s*id="tv-video"\s*data-video-source="youtube"\s*
                data-video-meta="([^"]+)"''', webpage, u'video metadata')
        data = json.loads(data_json)
        return {
            '_type': 'url_transparent',
            'url': data['youtubeVideoId'],
            'ie_key': 'Youtube',
            'id': video_id,
            'title': data['title'],
            'description': data['description'],
            'view_count': int(data['view_count']),
            'thumbnail': data['thumbnail_url'],
        }