[3sat] Add support (Fixes #1001)

This commit is contained in:
Philipp Hagemeister 2013-07-08 01:13:55 +02:00
parent 9d7b44b4cc
commit 73e79f2a1b
4 changed files with 96 additions and 1 deletions

View file

@ -11,6 +11,7 @@
from .cspan import CSpanIE from .cspan import CSpanIE
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .depositfiles import DepositFilesIE from .depositfiles import DepositFilesIE
from .dreisat import DreiSatIE
from .eighttracks import EightTracksIE from .eighttracks import EightTracksIE
from .escapist import EscapistIE from .escapist import EscapistIE
from .facebook import FacebookIE from .facebook import FacebookIE

View file

@ -36,6 +36,8 @@ class InfoExtractor(object):
The following fields are optional: The following fields are optional:
format: The video format, defaults to ext (used for --get-format) format: The video format, defaults to ext (used for --get-format)
thumbnails: A list of dictionaries (with the entries "resolution" and
"url") for the varying thumbnails
thumbnail: Full URL to a video thumbnail image. thumbnail: Full URL to a video thumbnail image.
description: One-line video description. description: One-line video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.

View file

@ -0,0 +1,85 @@
# coding: utf-8
import re
import xml.etree.ElementTree
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
unified_strdate,
)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat.de/mediathek/index.php\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TEST = {
u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983",
u'file': u'36983.webm',
u'md5': u'57c97d0469d71cf874f6815aa2b7c944',
u'info_dict': {
u"title": u"Kaffeeland Schweiz",
u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...",
u"uploader": u"3sat",
u"upload_date": u"20130622"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
thumbnail_els = details_doc.findall('.//teaserimage')
thumbnails = [{
'width': te.attrib['key'].partition('x')[0],
'height': te.attrib['key'].partition('x')[2],
'url': te.text,
} for te in thumbnail_els]
information_el = details_doc.find('.//information')
video_title = information_el.find('./title').text
video_description = information_el.find('./detail').text
details_el = details_doc.find('.//details')
video_uploader = details_el.find('./channel').text
upload_date = unified_strdate(details_el.find('./airtime').text)
format_els = details_doc.findall('.//formitaet')
formats = [{
'format_id': fe.attrib['basetype'],
'width': int(fe.find('./width').text),
'height': int(fe.find('./height').text),
'url': fe.find('./url').text,
'filesize': int(fe.find('./filesize').text),
'video_bitrate': int(fe.find('./videoBitrate').text),
'3sat_qualityname': fe.find('./quality').text,
} for fe in format_els
if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')]
def _sortkey(format):
qidx = ['low', 'med', 'high', 'veryhigh'].index(format['3sat_qualityname'])
prefer_http = 1 if 'rtmp' in format['url'] else 0
return (qidx, prefer_http, format['video_bitrate'])
formats.sort(key=_sortkey)
info = {
'id': video_id,
'title': video_title,
'formats': formats,
'description': video_description,
'thumbnails': thumbnails,
'thumbnail': thumbnails[-1]['url'],
'uploader': video_uploader,
'upload_date': upload_date,
}
# TODO: Remove when #980 has been merged
info['url'] = formats[-1]['url']
info['ext'] = determine_ext(formats[-1]['url'])
return self.video_result(info)

View file

@ -623,7 +623,7 @@ def unified_strdate(date_str):
date_str = date_str.replace(',',' ') date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2 # %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S'] format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
for expression in format_expressions: for expression in format_expressions:
try: try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@ -631,6 +631,13 @@ def unified_strdate(date_str):
pass pass
return upload_date return upload_date
def determine_ext(url):
guess = url.partition(u'?')[0].rpartition(u'.')[2]
if re.match(r'^[A-Za-z0-9]+$', guess):
return guess
else:
return u'unknown_video'
def date_from_str(date_str): def date_from_str(date_str):
""" """
Return a datetime object from a string in the format YYYYMMDD or Return a datetime object from a string in the format YYYYMMDD or