From 80f772c28a3277376620ed7f50308e12437e358d Mon Sep 17 00:00:00 2001 From: remitamine Date: Wed, 10 Feb 2016 22:16:21 +0100 Subject: [PATCH] [crackle] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/comcarcoff.py | 16 +++--- youtube_dl/extractor/crackle.py | 92 ++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 7 deletions(-) create mode 100644 youtube_dl/extractor/crackle.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 18951c287..f08f27480 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -126,6 +126,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE +from .crackle import CrackleIE from .criterion import CriterionIE from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py index 2efa200b5..7dff68492 100644 --- a/youtube_dl/extractor/comcarcoff.py +++ b/youtube_dl/extractor/comcarcoff.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( int_or_none, parse_duration, @@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor): _TESTS = [{ 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', 'info_dict': { - 'id': 'miranda-sings-happy-thanksgiving-miranda', + 'id': '2494164', 'ext': 'mp4', 'upload_date': '20141127', 'timestamp': 1417107600, 'duration': 1232, 'title': 'Happy Thanksgiving Miranda', 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', - 'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg', }, 'params': { 'skip_download': 'requires ffmpeg', @@ -39,15 +39,14 @@ def _real_extract(self, url): r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), display_id)['videoData'] - video_id = full_data['activeVideo']['video'] - video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] + display_id = full_data['activeVideo']['video'] + video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] + video_id = compat_str(video_data['mediaId']) thumbnails = [{ 'url': video_data['images']['thumb'], }, { 'url': video_data['images']['poster'], }] - formats = self._extract_m3u8_formats( - video_data['mediaUrl'], video_id, ext='mp4') timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( video_data.get('pubDate')) @@ -55,6 +54,8 @@ def _real_extract(self, url): video_data.get('duration')) return { + '_type': 'url_transparent', + 'url': 'crackle:%s' % video_id, 'id': video_id, 'display_id': display_id, 'title': video_data['title'], @@ -62,6 +63,7 @@ def _real_extract(self, url): 'timestamp': timestamp, 'duration': duration, 'thumbnails': thumbnails, - 'formats': formats, + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), } diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py new file mode 100644 index 000000000..a478333a2 --- /dev/null +++ b/youtube_dl/extractor/crackle.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class CrackleIE(InfoExtractor): + _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P\d+)' + _TEST = { + 'url': 'http://www.crackle.com/the-art-of-more/2496419', + 'info_dict': { + 'id': '2496419', + 'ext': 'mp4', + 'title': 'Heavy Lies the Head', + 'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } + + # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx + _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' + _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' + _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' + + # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx + _MEDIA_FILE_SLOTS = { + 'c544.flv': { + 'width': 544, + 'height': 306, + }, + '360p.mp4': { + 'width': 640, + 'height': 360, + }, + '480p.mp4': { + 'width': 852, + 'height': 478, + }, + '480p_1mbps.mp4': { + 'width': 852, + 'height': 478, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + item = self._download_xml( + 'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i') + title = item.attrib['t'] + + thumbnail = None + subtitles = {} + formats = self._extract_m3u8_formats('http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), video_id, 'mp4', fatal=None) + path = item.attrib.get('p') + if path: + thumbnail = self._THUMBNAIL_TEMPLATE % path + http_base_url = 'http://ahttp.crackle.com/' + path + for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): + formats.append({ + 'url': http_base_url + mfs_path, + 'format_id': mfs_path.split('.')[0], + 'width': mfs_info['width'], + 'height': mfs_info['height'], + }) + for cc in item.findall('cc'): + locale = cc.attrib.get('l') + v = cc.attrib.get('v') + if locale and v: + if locale not in subtitles: + subtitles[locale] = [] + subtitles[locale] = [{ + 'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), + 'ext': 'ttml', + }] + self._sort_formats(formats, ('width', 'height', 'tbr')) + + return { + 'id': video_id, + 'title': title, + 'description': item.attrib.get('d'), + 'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, + 'series': item.attrib.get('sn'), + 'season_number': int_or_none(item.attrib.get('se')), + 'episode_number': int_or_none(item.attrib.get('ep')), + 'thumbnail': thumbnail, + 'subtitles': subtitles, + 'formats': formats, + }