Twittercard: support vmapurl method

This commit is contained in:
fnord 2015-07-21 16:45:36 -05:00
parent f57f84f606
commit c3dea3f878

View file

@ -12,17 +12,30 @@
class TwitterCardIE(InfoExtractor): class TwitterCardIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P<id>\d+)'
_TEST = { _TESTS = [
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', {
'md5': 'a74f50b310c83170319ba16de6955192', 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
'info_dict': { 'md5': 'a74f50b310c83170319ba16de6955192',
'id': '560070183650213889', 'info_dict': {
'ext': 'mp4', 'id': '560070183650213889',
'title': 'TwitterCard', 'ext': 'mp4',
'thumbnail': 're:^https?://.*\.jpg$', 'title': 'TwitterCard',
'duration': 30.033, 'thumbnail': 're:^https?://.*\.jpg$',
'duration': 30.033,
}
}, },
} {
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
'info_dict': {
'id': '623160978427936768',
'ext': 'mp4',
'title': 'TwitterCard',
'thumbnail': 're:^https?://.*\.jpg',
'duration': 80.155,
},
}
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -44,6 +57,20 @@ def _real_extract(self, url):
unescapeHTML(self._search_regex( unescapeHTML(self._search_regex(
r'data-player-config="([^"]+)"', webpage, 'data player config')), r'data-player-config="([^"]+)"', webpage, 'data player config')),
video_id) video_id)
if 'playlist' not in config:
if 'vmapUrl' in config:
webpage = self._download_webpage(config['vmapUrl'], video_id + ' (xml)')
video_url = self._search_regex(
r'<MediaFile>\s*<!\[CDATA\[(https?://.+?)\]\]>', webpage, 'data player config (xml)')
f = {
'url': video_url,
}
ext = re.search(r'\.([a-z0-9]{2,4})(\?.+)?$', video_url)
if ext:
f['ext'] = ext.group(1)
formats.append(f)
break # same video regardless of UA
continue
video_url = config['playlist'][0]['source'] video_url = config['playlist'][0]['source']