From a47b602b0877dcde1b795bf53bfe3629c6595870 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?=
Date: Thu, 23 Jul 2015 01:01:04 +0600
Subject: [PATCH] [tagesschau] Add support for audio
---
youtube_dl/extractor/tagesschau.py | 37 +++++++++++++++++++-----------
1 file changed, 24 insertions(+), 13 deletions(-)
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py
index b84892364..7fd0ba987 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dl/extractor/tagesschau.py
@@ -30,6 +30,16 @@ class TagesschauIE(InfoExtractor):
'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
'thumbnail': 're:^http:.*\.jpg$',
},
+ }, {
+ 'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
+ 'md5': 'aef45de271c4bf0a5db834aa40bf774c',
+ 'info_dict': {
+ 'id': '18407',
+ 'ext': 'mp3',
+ 'title': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+ 'description': 'Flüchtlingsdebatte: Hitzig, aber wenig hilfreich',
+ 'thumbnail': 're:^https?:.*\.jpg$',
+ },
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
'only_matching': True,
@@ -51,9 +61,6 @@ class TagesschauIE(InfoExtractor):
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html',
'only_matching': True,
- }, {
- 'url': 'http://www.tagesschau.de/multimedia/politikimradio/audio-18407.html',
- 'only_matching': True,
}]
_FORMATS = {
@@ -73,19 +80,26 @@ def _real_extract(self, url):
playerpage = self._download_webpage(
player_url, display_id, 'Downloading player page')
- medias = re.findall(
- r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"',
- playerpage)
formats = []
- for url, ext, res in medias:
+ for media in re.finditer(
+ r'''(?x)
+ (?P["\'])(?Phttp://media.+?)(?P=q_url)
+ ,\s*type:(?P["\'])(?Pvideo|audio)/(?P.+?)(?P=q_type)
+ (?:,\s*quality:(?P["\'])(?P.+?)(?P=q_quality))?
+ ''', playerpage):
+ url = media.group('url')
+ type_ = media.group('type')
+ ext = media.group('ext')
+ res = media.group('quality')
f = {
- 'format_id': res + '_' + ext,
+ 'format_id': '%s_%s' % (res, ext) if res else ext,
'url': url,
'ext': ext,
+ 'vcodec': 'none' if type_ == 'audio' else None,
}
f.update(self._FORMATS.get(res, {}))
formats.append(f)
- thumbnail_fn = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1]
+ thumbnail = self._og_search_thumbnail(playerpage)
title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip()
else:
@@ -123,9 +137,7 @@ def _real_extract(self, url):
'filesize_approx': parse_filesize(m.group('filesize_approx')),
})
formats.append(format)
- thumbnail_fn = self._search_regex(
- r'(?s)(.*?)
',
webpage, 'description', default=None)
@@ -133,7 +145,6 @@ def _real_extract(self, url):
r'(.*?)', webpage, 'title')
self._sort_formats(formats)
- thumbnail = 'http://www.tagesschau.de' + thumbnail_fn
return {
'id': display_id,