From 893f8832b52926847353f2b678e313687806a775 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 24 Mar 2014 22:01:47 +0100 Subject: [PATCH] [arte] Add support for embedded videos (Fixes #2620) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/arte.py | 25 ++++++++++++++++++++++--- youtube_dl/extractor/generic.py | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a7a339e7d..39b250b10 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -14,6 +14,7 @@ ArteTVConcertIE, ArteTVFutureIE, ArteTVDDCIE, + ArteTVEmbedIE, ) from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 49fb64077..257dc1f61 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -75,9 +75,7 @@ def _extract_from_webpage(self, webpage, video_id, lang): return self._extract_from_json_url(json_url, video_id, lang) def _extract_from_json_url(self, json_url, video_id, lang): - json_info = self._download_webpage(json_url, video_id, 'Downloading info json') - self.report_extraction(video_id) - info = json.loads(json_info) + info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] info_dict = { @@ -99,6 +97,8 @@ def _match_lang(f): l = 'F' elif lang == 'de': l = 'A' + else: + l = lang regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url @@ -228,3 +228,22 @@ class ArteTVConcertIE(ArteTVPlus7IE): 'description': 'md5:486eb08f991552ade77439fe6d82c305', }, } + + +class ArteTVEmbedIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:embed' + _VALID_URL = r'''(?x) + http://www\.arte\.tv + /playerv2/embed\.php\?json_url= + (?P + http://arte\.tv/papi/tvguide/videos/stream/player/ + (?P[^/]+)/(?P[^/]+)[^&]* + ) + ''' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + lang = mobj.group('lang') + json_url = mobj.group('json_url') + return self._extract_from_json_url(json_url, video_id, lang) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4d649fe71..e7ee31877 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -197,6 +197,21 @@ class GenericIE(InfoExtractor): 'description': 'No description', }, }, + # arte embed + { + 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html', + 'md5': '7653032cbb25bf6c80d80f217055fa43', + 'info_dict': { + 'id': '048195-004_PLUS7-F', + 'ext': 'flv', + 'title': 'X:enius', + 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168', + 'upload_date': '20140320', + }, + 'params': { + 'skip_download': 'Requires rtmpdump' + } + }, ] def report_download_webpage(self, video_id): @@ -525,6 +540,13 @@ def _real_extract(self, url): if mobj is not None: return self.url_result(mobj.group('url'), 'TED') + # Look for embedded arte.tv player + mobj = re.search( + r'