Merge pull request #7045 from remitamine/ign

[ign] add support for pcmag and extract all formats and more metadata(fixes #5335)(fixes #7006)
This commit is contained in:
remitamine 2015-12-25 20:06:27 +01:00
commit 9f0ee2a388
2 changed files with 115 additions and 32 deletions

View file

@ -259,7 +259,11 @@
from .huffpost import HuffPostIE from .huffpost import HuffPostIE
from .hypem import HypemIE from .hypem import HypemIE
from .iconosquare import IconosquareIE from .iconosquare import IconosquareIE
from .ign import IGNIE, OneUPIE from .ign import (
IGNIE,
OneUPIE,
PCMagIE,
)
from .imdb import ( from .imdb import (
ImdbIE, ImdbIE,
ImdbListIE ImdbListIE

View file

@ -3,6 +3,10 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
)
class IGNIE(InfoExtractor): class IGNIE(InfoExtractor):
@ -11,25 +15,24 @@ class IGNIE(InfoExtractor):
Some videos of it.ign.com are also supported Some videos of it.ign.com are also supported
""" """
_VALID_URL = r'https?://.+?\.ign\.com/(?P<type>videos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P<name_or_id>.+)' _VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'ign.com' IE_NAME = 'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' _API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
_DESCRIPTION_RE = [ _EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
r'<span class="page-object-description">(.+?)</span>',
r'id="my_show_video">.*?<p>(.*?)</p>',
r'<meta name="description" content="(.*?)"',
]
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
'md5': 'eac8bdc1890980122c3b66f14bdd02e9', 'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
'info_dict': { 'info_dict': {
'id': '8f862beef863986b2785559b9e1aa599', 'id': '8f862beef863986b2785559b9e1aa599',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Last of Us Review', 'title': 'The Last of Us Review',
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
'timestamp': 1370440800,
'upload_date': '20130605',
'uploader_id': 'cberidon@ign.com',
} }
}, },
{ {
@ -44,6 +47,9 @@ class IGNIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'GTA 5 Video Review', 'title': 'GTA 5 Video Review',
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
'timestamp': 1379339880,
'upload_date': '20130916',
'uploader_id': 'danieljkrupa@gmail.com',
}, },
}, },
{ {
@ -52,6 +58,9 @@ class IGNIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '26 Twisted Moments from GTA 5 in Slow Motion', 'title': '26 Twisted Moments from GTA 5 in Slow Motion',
'description': 'The twisted beauty of GTA 5 in stunning slow motion.', 'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
'timestamp': 1386878820,
'upload_date': '20131212',
'uploader_id': 'togilvie@ign.com',
}, },
}, },
], ],
@ -66,12 +75,20 @@ class IGNIE(InfoExtractor):
'id': '078fdd005f6d3c02f63d795faa1b984f', 'id': '078fdd005f6d3c02f63d795faa1b984f',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Rewind Theater - Wild Trailer Gamescom 2014', 'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
'description': ( 'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
'Giant skeletons, bloody hunts, and captivating' 'timestamp': 1408047180,
' natural beauty take our breath away.' 'upload_date': '20140814',
), 'uploader_id': 'jamesduggan1990@gmail.com',
}, },
}, },
{
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
'only_matching': True,
},
{
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
'only_matching': True,
},
] ]
def _find_video_id(self, webpage): def _find_video_id(self, webpage):
@ -82,7 +99,7 @@ def _find_video_id(self, webpage):
r'<object id="vid_(.+?)"', r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"', r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
] ]
return self._search_regex(res_id, webpage, 'video id') return self._search_regex(res_id, webpage, 'video id', default=None)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -91,8 +108,8 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, name_or_id) webpage = self._download_webpage(url, name_or_id)
if page_type != 'video': if page_type != 'video':
multiple_urls = re.findall( multiple_urls = re.findall(
'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]', r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
webpage) webpage)
if multiple_urls: if multiple_urls:
entries = [self.url_result(u, ie='IGN') for u in multiple_urls] entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
return { return {
@ -102,22 +119,50 @@ def _real_extract(self, url):
} }
video_id = self._find_video_id(webpage) video_id = self._find_video_id(webpage)
result = self._get_video_info(video_id) if not video_id:
description = self._html_search_regex(self._DESCRIPTION_RE, return self.url_result(self._search_regex(self._EMBED_RE, webpage, 'embed url'))
webpage, 'video description', flags=re.DOTALL) return self._get_video_info(video_id)
result['description'] = description
return result
def _get_video_info(self, video_id): def _get_video_info(self, video_id):
config_url = self._CONFIG_URL_TEMPLATE % video_id api_data = self._download_json(self._API_URL_TEMPLATE % video_id, video_id)
config = self._download_json(config_url, video_id)
media = config['playlist']['media'] formats = []
m3u8_url = api_data['refs'].get('m3uUrl')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
f4m_url = api_data['refs'].get('f4mUrl')
if f4m_url:
f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
if f4m_formats:
formats.extend(f4m_formats)
for asset in api_data['assets']:
formats.append({
'url': asset['url'],
'tbr': asset.get('actual_bitrate_kbps'),
'fps': asset.get('frame_rate'),
'height': int_or_none(asset.get('height')),
'width': int_or_none(asset.get('width')),
})
self._sort_formats(formats)
thumbnails = [{
'url': thumbnail['url']
} for thumbnail in api_data.get('thumbnails', [])]
metadata = api_data['metadata']
return { return {
'id': media['metadata']['videoId'], 'id': api_data.get('videoId') or video_id,
'url': media['url'], 'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
'title': media['metadata']['title'], 'description': metadata.get('description'),
'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'), 'timestamp': parse_iso8601(metadata.get('publishDate')),
'duration': int_or_none(metadata.get('duration')),
'display_id': metadata.get('slug') or video_id,
'uploader_id': metadata.get('creator'),
'thumbnails': thumbnails,
'formats': formats,
} }
@ -125,16 +170,17 @@ class OneUPIE(IGNIE):
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html' _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
IE_NAME = '1up.com' IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
_TESTS = [{ _TESTS = [{
'url': 'http://gamevideos.1up.com/video/id/34976.html', 'url': 'http://gamevideos.1up.com/video/id/34976.html',
'md5': '68a54ce4ebc772e4b71e3123d413163d', 'md5': 'c9cc69e07acb675c31a16719f909e347',
'info_dict': { 'info_dict': {
'id': '34976', 'id': '34976',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Sniper Elite V2 - Trailer', 'title': 'Sniper Elite V2 - Trailer',
'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', 'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
'timestamp': 1313099220,
'upload_date': '20110811',
'uploader_id': 'IGN',
} }
}] }]
@ -143,3 +189,36 @@ def _real_extract(self, url):
result = super(OneUPIE, self)._real_extract(url) result = super(OneUPIE, self)._real_extract(url)
result['id'] = mobj.group('name_or_id') result['id'] = mobj.group('name_or_id')
return result return result
class PCMagIE(IGNIE):
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = 'pcmag'
_EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]'
_TESTS = [{
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
'md5': '212d6154fd0361a2781075f1febbe9ad',
'info_dict': {
'id': 'ee10d774b508c9b8ec07e763b9125b91',
'ext': 'mp4',
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
'timestamp': 1420571160,
'upload_date': '20150106',
'uploader_id': 'cozzipix@gmail.com',
}
},{
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
'md5': '94130c1ca07ba0adb6088350681f16c1',
'info_dict': {
'id': '042e560ba94823d43afcb12ddf7142ca',
'ext': 'mp4',
'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
'timestamp': 1412953920,
'upload_date': '20141010',
'uploader_id': 'chris_snyder@pcmag.com',
}
}]