mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-30 10:29:57 -05:00
[9gag] Add extractor
This commit is contained in:
parent
29030c0a4c
commit
7fc3fa0545
3 changed files with 73 additions and 3 deletions
|
@ -405,7 +405,8 @@ def add_extra_info(info_dict, extra_info):
|
||||||
for key, value in extra_info.items():
|
for key, value in extra_info.items():
|
||||||
info_dict.setdefault(key, value)
|
info_dict.setdefault(key, value)
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
|
process=True):
|
||||||
'''
|
'''
|
||||||
Returns a list with a dictionary for each video we find.
|
Returns a list with a dictionary for each video we find.
|
||||||
If 'download', also downloads the videos.
|
If 'download', also downloads the videos.
|
||||||
|
@ -441,7 +442,10 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}):
|
||||||
'webpage_url': url,
|
'webpage_url': url,
|
||||||
'extractor_key': ie.ie_key(),
|
'extractor_key': ie.ie_key(),
|
||||||
})
|
})
|
||||||
|
if process:
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
|
else:
|
||||||
|
return ie_result
|
||||||
except ExtractorError as de: # An error we somewhat expected
|
except ExtractorError as de: # An error we somewhat expected
|
||||||
self.report_error(compat_str(de), de.format_traceback())
|
self.report_error(compat_str(de), de.format_traceback())
|
||||||
break
|
break
|
||||||
|
@ -474,8 +478,32 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
download,
|
download,
|
||||||
ie_key=ie_result.get('ie_key'),
|
ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info)
|
extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'url_transparent':
|
||||||
|
# Use the information from the embedding page
|
||||||
|
info = self.extract_info(
|
||||||
|
ie_result['url'], ie_key=ie_result.get('ie_key'),
|
||||||
|
extra_info=extra_info, download=False, process=False)
|
||||||
|
|
||||||
|
def make_result(embedded_info):
|
||||||
|
new_result = ie_result.copy()
|
||||||
|
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
|
||||||
|
'entries', 'urlhandle', 'ie_key', 'duration',
|
||||||
|
'subtitles', 'annotations', 'format'):
|
||||||
|
if f in new_result:
|
||||||
|
del new_result[f]
|
||||||
|
if f in embedded_info:
|
||||||
|
new_result[f] = embedded_info[f]
|
||||||
|
return new_result
|
||||||
|
new_result = make_result(info)
|
||||||
|
|
||||||
|
assert new_result.get('_type') != 'url_transparent'
|
||||||
|
if new_result.get('_type') == 'compat_list':
|
||||||
|
new_result['entries'] = [
|
||||||
|
make_result(e) for e in new_result['entries']]
|
||||||
|
|
||||||
|
return self.process_ie_result(
|
||||||
|
new_result, download=download, extra_info=extra_info)
|
||||||
|
elif result_type == 'playlist':
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||||
|
|
|
@ -102,6 +102,7 @@
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
from .niconico import NiconicoIE
|
from .niconico import NiconicoIE
|
||||||
|
from .ninegag import NineGagIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
|
|
41
youtube_dl/extractor/ninegag.py
Normal file
41
youtube_dl/extractor/ninegag.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class NineGagIE(InfoExtractor):
|
||||||
|
IE_NAME = '9gag'
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://9gag.tv/v/1912",
|
||||||
|
u"file": u"1912.mp4",
|
||||||
|
u"info_dict": {
|
||||||
|
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||||
|
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
|
||||||
|
},
|
||||||
|
u'add_ie': [u'Youtube']
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data_json = self._html_search_regex(r'''(?x)
|
||||||
|
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
||||||
|
data-video-meta="([^"]+)"''', webpage, u'video metadata')
|
||||||
|
|
||||||
|
data = json.loads(data_json)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': data['youtubeVideoId'],
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
'id': video_id,
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data['description'],
|
||||||
|
'view_count': int(data['view_count']),
|
||||||
|
'thumbnail': data['thumbnail_url'],
|
||||||
|
}
|
Loading…
Reference in a new issue