[shahid] try to bypass geo restriction and extract more metadata(closes #10062)

This commit is contained in:
Remita Amine 2016-07-12 23:15:38 +01:00
parent 381ff44756
commit 41aa44259d

View file

@ -2,11 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none,
) )
@ -33,45 +33,27 @@ class ShahidIE(InfoExtractor):
'only_matching': True 'only_matching': True
}] }]
def _handle_error(self, response): def _call_api(self, path, video_id, note):
if not isinstance(response, dict): data = self._download_json(
return 'http://api.shahid.net/api/v1_1/' + path, video_id, note, query={
error = response.get('error') 'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
}).get('data', {})
error = data.get('error')
if error: if error:
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
expected=True) expected=True)
def _download_json(self, url, video_id, note='Downloading JSON metadata'): return data
response = super(ShahidIE, self)._download_json(url, video_id, note)['data']
self._handle_error(response)
return response
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) player = self._call_api(
'Content/Episode/%s' % video_id,
api_vars = { video_id, 'Downloading player JSON')
'id': video_id,
'type': 'player',
'url': 'http://api.shahid.net/api/v1_1',
'playerType': 'episode',
}
flashvars = self._search_regex(
r'var\s+flashvars\s*=\s*({[^}]+})', webpage, 'flashvars', default=None)
if flashvars:
for key in api_vars.keys():
value = self._search_regex(
r'\b%s\s*:\s*(?P<q>["\'])(?P<value>.+?)(?P=q)' % key,
flashvars, 'type', default=None, group='value')
if value:
api_vars[key] = value
player = self._download_json(
'https://shahid.mbc.net/arContent/getPlayerContent-param-.id-%s.type-%s.html'
% (video_id, api_vars['type']), video_id, 'Downloading player JSON')
if player.get('drm'): if player.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
@ -79,22 +61,11 @@ def _real_extract(self, url):
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats) self._sort_formats(formats)
video = self._download_json( video = self._call_api(
'%s/%s/%s?%s' % ( 'episode/%s' % video_id, video_id,
api_vars['url'], api_vars['playerType'], api_vars['id'], 'Downloading video JSON')['episode']
compat_urllib_parse_urlencode({
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
})),
video_id, 'Downloading video JSON')
video = video[api_vars['playerType']]
title = video['title'] title = video['title']
description = video.get('description')
thumbnail = video.get('thumbnailUrl')
duration = int_or_none(video.get('duration'))
timestamp = parse_iso8601(video.get('referenceDate'))
categories = [ categories = [
category['name'] category['name']
for category in video.get('genres', []) if 'name' in category] for category in video.get('genres', []) if 'name' in category]
@ -102,10 +73,16 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': video.get('description'),
'thumbnail': thumbnail, 'thumbnail': video.get('thumbnailUrl'),
'duration': duration, 'duration': int_or_none(video.get('duration')),
'timestamp': timestamp, 'timestamp': parse_iso8601(video.get('referenceDate')),
'categories': categories, 'categories': categories,
'series': video.get('showTitle') or video.get('showName'),
'season': video.get('seasonTitle'),
'season_number': int_or_none(video.get('seasonNumber')),
'season_id': str_or_none(video.get('seasonId')),
'episode_number': int_or_none(video.get('number')),
'episode_id': video_id,
'formats': formats, 'formats': formats,
} }