[ooyala] Try mobile player JS URLs for all available devices (Closes #3498)

Looks like some videos are only available for particular devices
(e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
is only available for ipad)
Working around with fetching URLs for all the devices found starting with 'unknown'
until we succeed or eventually fail for each device.
This commit is contained in:
Sergey M․ 2014-08-12 20:54:08 +07:00
parent 90e075da3a
commit 6f600ff5d6

View file

@ -3,23 +3,38 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML from ..utils import (
unescapeHTML,
ExtractorError,
)
class OoyalaIE(InfoExtractor): class OoyalaIE(InfoExtractor):
_VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)' _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
_TEST = { _TESTS = [
# From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video {
'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
'info_dict': { 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c',
'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', 'info_dict': {
'ext': 'mp4', 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
'title': 'Explaining Data Recovery from Hard Drives and SSDs', 'ext': 'mp4',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
},
}, {
# Only available for ipad
'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'md5': '4b9754921fddb68106e48c142e2a01e6',
'info_dict': {
'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
'ext': 'mp4',
'title': 'Simulation Overview - Levels of Simulation',
'description': '',
},
}, },
} ]
@staticmethod @staticmethod
def _url_for_embed_code(embed_code): def _url_for_embed_code(embed_code):
@ -47,13 +62,30 @@ def _real_extract(self, url):
player = self._download_webpage(player_url, embedCode) player = self._download_webpage(player_url, embedCode)
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, 'mobile player url') player, 'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode) # Looks like some videos are only available for particular devices
videos_info = self._search_regex( # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', # is only available for ipad)
mobile_player, 'info').replace('\\"','"') # Working around with fetching URLs for all the devices found starting with 'unknown'
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"') # until we succeed or eventually fail for each device.
devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
devices.remove('unknown')
devices.insert(0, 'unknown')
for device in devices:
mobile_player = self._download_webpage(
'%s&device=%s' % (mobile_url, device), embedCode,
'Downloading mobile player JS for %s device' % device)
videos_info = self._search_regex(
r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
mobile_player, 'info', fatal=False, default=None)
if videos_info:
break
if not videos_info:
raise ExtractorError('Unable to extract info')
videos_info = videos_info.replace('\\"', '"')
videos_more_info = self._search_regex(
r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
videos_info = json.loads(videos_info) videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info) videos_more_info = json.loads(videos_more_info)
if videos_more_info.get('lineup'): if videos_more_info.get('lineup'):
videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]