mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[ninegag] Fix extraction
This commit is contained in:
parent
9d2ecdbc71
commit
84769e708c
2 changed files with 24 additions and 12 deletions
|
@ -144,6 +144,10 @@ def try_rm_tcs_files():
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(got, compat_str) and match_rex.match(got),
|
isinstance(got, compat_str) and match_rex.match(got),
|
||||||
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
u'field %s (value: %r) should match %r' % (info_field, got, match_str))
|
||||||
|
elif isinstance(expected, type):
|
||||||
|
got = info_dict.get(info_field)
|
||||||
|
self.assertTrue(isinstance(got, expected),
|
||||||
|
u'Expected type %r, but got value %r of type %r' % (expected, got, type(got)))
|
||||||
else:
|
else:
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
got = 'md5:' + md5(info_dict.get(info_field))
|
got = 'md5:' + md5(info_dict.get(info_field))
|
||||||
|
|
|
@ -15,7 +15,9 @@ class NineGagIE(InfoExtractor):
|
||||||
"file": "1912.mp4",
|
"file": "1912.mp4",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
"description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
|
||||||
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome"
|
"title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
|
||||||
|
"view_count": int,
|
||||||
|
"thumbnail": "re:^https?://",
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube']
|
'add_ie': ['Youtube']
|
||||||
}
|
}
|
||||||
|
@ -25,21 +27,27 @@ def _real_extract(self, url):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
data_json = self._html_search_regex(r'''(?x)
|
|
||||||
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
|
|
||||||
data-video-meta="([^"]+)"''', webpage, 'video metadata')
|
|
||||||
|
|
||||||
data = json.loads(data_json)
|
youtube_id = self._html_search_regex(
|
||||||
|
r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
|
||||||
|
webpage, 'video ID')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
view_count_str = self._html_search_regex(
|
||||||
|
r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
|
||||||
|
fatal=False)
|
||||||
|
view_count = (
|
||||||
|
None if view_count_str is None
|
||||||
|
else int(view_count_str.replace(',', '')))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': data['youtubeVideoId'],
|
'url': youtube_id,
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': data['title'],
|
'title': self._og_search_title(webpage),
|
||||||
'description': data['description'],
|
'description': description,
|
||||||
'view_count': int(data['view_count']),
|
'view_count': view_count,
|
||||||
'like_count': int(data['statistic']['like']),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'dislike_count': int(data['statistic']['dislike']),
|
|
||||||
'thumbnail': data['thumbnail_url'],
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue