[yahoo] Bypass geo restriction for brightcove (#14210)

This commit is contained in:
Sergey M․ 2017-10-01 04:49:27 +07:00
parent b69ca0ccfc
commit c8da40d834
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -17,6 +17,7 @@
extract_attributes, extract_attributes,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
smuggle_url,
unescapeHTML, unescapeHTML,
) )
@ -29,7 +30,7 @@
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies' IE_DESC = 'Yahoo screen and movies'
_VALID_URL = r'(?P<host>https?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?' _VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?'
_TESTS = [ _TESTS = [
{ {
'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
@ -244,6 +245,11 @@ class YahooIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
},
{
# custom brightcove, geo-restricted to Australia, bypassable
'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
'only_matching': True,
} }
] ]
@ -274,10 +280,15 @@ def _real_extract(self, url):
if bc_url: if bc_url:
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key()) return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
def brightcove_url_result(bc_url):
return self.url_result(
smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
BrightcoveNewIE.ie_key())
# Look for Brightcove New Studio embeds # Look for Brightcove New Studio embeds
bc_url = BrightcoveNewIE._extract_url(self, webpage) bc_url = BrightcoveNewIE._extract_url(self, webpage)
if bc_url: if bc_url:
return self.url_result(bc_url, BrightcoveNewIE.ie_key()) return brightcove_url_result(bc_url)
brightcove_iframe = self._search_regex( brightcove_iframe = self._search_regex(
r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage, r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
@ -291,9 +302,9 @@ def _real_extract(self, url):
account_id = qs.get('accountId', ['2376984109001'])[0] account_id = qs.get('accountId', ['2376984109001'])[0]
brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0] brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
if account_id and brightcove_id: if account_id and brightcove_id:
return self.url_result( return brightcove_url_result(
'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (account_id, brightcove_id), 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
BrightcoveNewIE.ie_key()) % (account_id, brightcove_id))
# Query result is often embedded in webpage as JSON. Sometimes explicit requests # Query result is often embedded in webpage as JSON. Sometimes explicit requests
# to video API results in a failure with geo restriction reason therefore using # to video API results in a failure with geo restriction reason therefore using