[crackle] Bypass geo restriction

This commit is contained in:
Sergey M․ 2018-03-24 01:49:50 +07:00
parent b9f5a41207
commit 7d34016fb0
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -4,20 +4,24 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
ExtractorError
) )
class CrackleIE(InfoExtractor): class CrackleIE(InfoExtractor):
_GEO_COUNTRIES = ['US']
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = { _TEST = {
# geo restricted to CA
'url': 'https://www.crackle.com/andromeda/2502343', 'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': { 'info_dict': {
'id': '2502343', 'id': '2502343',
@ -46,93 +50,115 @@ class CrackleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media = self._download_json( country_code = self._downloader.params.get('geo_bypass_country', None)
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' countries = [country_code] if country_code else (
% (video_id, self._GEO_COUNTRIES[0]), video_id, query={ 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
'disableProtocols': 'true',
'format': 'json'
})
title = media['Title'] last_e = None
formats = [] for country in countries:
for e in media['MediaURLs']: try:
if e.get('UseDRM') is True: media = self._download_json(
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
% (video_id, country), video_id,
'Downloading media JSON as %s' % country,
'Unable to download media JSON', query={
'disableProtocols': 'true',
'format': 'json'
})
except ExtractorError as e:
# 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
last_e = e
continue
raise
media_urls = media.get('MediaURLs')
if not media_urls or not isinstance(media_urls, list):
continue continue
format_url = e.get('Path')
if not format_url or not isinstance(format_url, compat_str):
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
description = media.get('Description') title = media['Title']
duration = int_or_none(media.get(
'DurationInSeconds')) or parse_duration(media.get('Duration'))
view_count = int_or_none(media.get('CountViews'))
average_rating = float_or_none(media.get('UserRating'))
age_limit = parse_age_limit(media.get('Rating'))
genre = media.get('Genre')
release_year = int_or_none(media.get('ReleaseYear'))
creator = media.get('Directors')
artist = media.get('Cast')
if media.get('MediaTypeDisplayValue') == 'Full Episode': formats = []
series = media.get('ShowName') for e in media['MediaURLs']:
episode = title if e.get('UseDRM') is True:
season_number = int_or_none(media.get('Season'))
episode_number = int_or_none(media.get('Episode'))
else:
series = episode = season_number = episode_number = None
subtitles = {}
cc_files = media.get('ClosedCaptionFiles')
if isinstance(cc_files, list):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue continue
cc_url = cc_file.get('Path') format_url = e.get('Path')
if not cc_url or not isinstance(cc_url, compat_str): if not format_url or not isinstance(format_url, compat_str):
continue continue
lang = cc_file.get('Locale') or 'en' ext = determine_ext(format_url)
subtitles.setdefault(lang, []).append({'url': cc_url}) if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
thumbnails = [] description = media.get('Description')
images = media.get('Images') duration = int_or_none(media.get(
if isinstance(images, list): 'DurationInSeconds')) or parse_duration(media.get('Duration'))
for image_key, image_url in images.items(): view_count = int_or_none(media.get('CountViews'))
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key) average_rating = float_or_none(media.get('UserRating'))
if not mobj: age_limit = parse_age_limit(media.get('Rating'))
continue genre = media.get('Genre')
thumbnails.append({ release_year = int_or_none(media.get('ReleaseYear'))
'url': image_url, creator = media.get('Directors')
'width': int(mobj.group(1)), artist = media.get('Cast')
'height': int(mobj.group(2)),
})
return { if media.get('MediaTypeDisplayValue') == 'Full Episode':
'id': video_id, series = media.get('ShowName')
'title': title, episode = title
'description': description, season_number = int_or_none(media.get('Season'))
'duration': duration, episode_number = int_or_none(media.get('Episode'))
'view_count': view_count, else:
'average_rating': average_rating, series = episode = season_number = episode_number = None
'age_limit': age_limit,
'genre': genre, subtitles = {}
'creator': creator, cc_files = media.get('ClosedCaptionFiles')
'artist': artist, if isinstance(cc_files, list):
'release_year': release_year, for cc_file in cc_files:
'series': series, if not isinstance(cc_file, dict):
'episode': episode, continue
'season_number': season_number, cc_url = cc_file.get('Path')
'episode_number': episode_number, if not cc_url or not isinstance(cc_url, compat_str):
'thumbnails': thumbnails, continue
'subtitles': subtitles, lang = cc_file.get('Locale') or 'en'
'formats': formats, subtitles.setdefault(lang, []).append({'url': cc_url})
}
thumbnails = []
images = media.get('Images')
if isinstance(images, list):
for image_key, image_url in images.items():
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
if not mobj:
continue
thumbnails.append({
'url': image_url,
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'age_limit': age_limit,
'genre': genre,
'creator': creator,
'artist': artist,
'release_year': release_year,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}
raise last_e