2014-01-27 12:40:10 -05:00
# coding: utf-8
from __future__ import unicode_literals
2013-06-23 14:24:07 -04:00
import re
from . common import InfoExtractor
from . . utils import (
2014-01-27 12:40:10 -05:00
determine_ext ,
2013-06-23 14:24:07 -04:00
ExtractorError ,
)
2014-01-27 12:40:10 -05:00
2013-06-23 14:24:07 -04:00
class ARDIE ( InfoExtractor ) :
2014-01-27 12:40:10 -05:00
_VALID_URL = r ' ^https?://(?:(?:www \ .)?ardmediathek \ .de|mediathek \ .daserste \ .de)/(?:.*/)(?P<video_id>[^/ \ ?]+)(?: \ ?.*)? '
2013-06-27 14:46:46 -04:00
_TEST = {
2014-01-27 12:40:10 -05:00
' url ' : ' http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786 ' ,
' file ' : ' 19288786.mp4 ' ,
' md5 ' : ' 515bf47ce209fb3f5a61b7aad364634c ' ,
' info_dict ' : {
' title ' : ' Edward Snowden im Interview - Held oder Verräter? ' ,
' description ' : ' Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdc berwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend. ' ,
' thumbnail ' : ' http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037 ' ,
2013-06-27 14:46:46 -04:00
} ,
2014-01-27 12:40:10 -05:00
' skip ' : ' Blocked outside of Germany ' ,
2013-06-27 14:46:46 -04:00
}
2013-06-23 14:24:07 -04:00
def _real_extract ( self , url ) :
# determine video id from url
m = re . match ( self . _VALID_URL , url )
numid = re . search ( r ' documentId=([0-9]+) ' , url )
if numid :
video_id = numid . group ( 1 )
else :
video_id = m . group ( ' video_id ' )
2014-01-27 12:40:10 -05:00
webpage = self . _download_webpage ( url , video_id )
title = self . _html_search_regex (
2014-05-29 22:59:18 -04:00
[ r ' <h1(?: \ s+class= " boxTopHeadline " )?>(.*?)</h1> ' ,
2014-06-03 15:56:49 -04:00
r ' <meta name= " dcterms.title " content= " (.*?) " /> ' ,
2014-05-29 22:59:18 -04:00
r ' <h4 class= " headline " >(.*?)</h4> ' ] ,
webpage , ' title ' )
2014-01-27 12:40:10 -05:00
description = self . _html_search_meta (
' dcterms.abstract ' , webpage , ' description ' )
thumbnail = self . _og_search_thumbnail ( webpage )
2014-06-03 15:56:49 -04:00
media_info = self . _download_json (
' http://www.ardmediathek.de/play/media/ %s ' % video_id , video_id )
# The second element of the _mediaArray contains the standard http urls
streams = media_info [ ' _mediaArray ' ] [ 1 ] [ ' _mediaStreamArray ' ]
2013-06-23 14:24:07 -04:00
if not streams :
2014-01-27 12:40:10 -05:00
if ' " fsk " ' in webpage :
raise ExtractorError ( ' This video is only available after 20:00 ' )
formats = [ ]
2014-06-16 10:17:49 -04:00
2014-01-27 12:40:10 -05:00
for s in streams :
2014-06-16 10:17:49 -04:00
if type ( s [ ' _stream ' ] ) == list :
reverse = s [ ' _stream ' ] [ : : - 1 ]
for i in reverse :
quality = s [ ' _quality ' ] + reverse . index ( i )
formats . append ( {
' quality ' : quality ,
' url ' : i ,
' format_id ' : ' %s - %s ' % ( determine_ext ( i ) , quality )
} )
continue
2014-01-27 12:40:10 -05:00
format = {
2014-06-03 15:56:49 -04:00
' quality ' : s [ ' _quality ' ] ,
' url ' : s [ ' _stream ' ] ,
2014-01-27 12:40:10 -05:00
}
2014-06-03 15:56:49 -04:00
format [ ' format_id ' ] = ' %s - %s ' % (
determine_ext ( format [ ' url ' ] ) , format [ ' quality ' ] )
2014-01-27 12:40:10 -05:00
formats . append ( format )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' formats ' : formats ,
' thumbnail ' : thumbnail ,
}