2016-10-02 07:39:18 -04:00
# coding: utf-8
2014-03-01 05:47:52 -05:00
from __future__ import unicode_literals
import re
2015-02-17 15:17:47 -05:00
from . common import InfoExtractor
2014-12-13 06:24:42 -05:00
from . . compat import (
2015-07-17 13:36:11 -04:00
compat_urllib_parse_unquote ,
2014-03-01 10:17:29 -05:00
compat_urllib_parse_urlparse ,
2014-12-13 06:24:42 -05:00
)
from . . utils import (
2014-03-01 10:17:29 -05:00
ExtractorError ,
2015-01-01 09:01:55 -05:00
float_or_none ,
2015-11-21 11:18:17 -05:00
sanitized_Request ,
2017-04-08 08:42:09 -04:00
unescapeHTML ,
2018-03-19 12:28:37 -04:00
update_url_query ,
2016-03-25 16:19:24 -04:00
urlencode_postdata ,
2017-02-14 10:56:39 -05:00
USER_AGENTS ,
2014-03-01 05:47:52 -05:00
)
2015-02-17 15:17:47 -05:00
class CeskaTelevizeIE ( InfoExtractor ) :
2021-10-31 00:49:03 -04:00
_VALID_URL = r ' https?://(?:www \ .)?ceskatelevize \ .cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+) '
2015-09-06 18:52:26 -04:00
_TESTS = [ {
2016-05-09 10:37:20 -04:00
' url ' : ' http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en ' ,
' info_dict ' : {
' id ' : ' 61924494877028507 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Hyde Park Civilizace: Bonus 01 - En ' ,
' description ' : ' English Subtittles ' ,
2017-01-02 07:08:07 -05:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2016-05-09 10:37:20 -04:00
' duration ' : 81.3 ,
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
2015-09-06 18:52:26 -04:00
} , {
2016-05-07 14:15:49 -04:00
# live stream
' url ' : ' http://www.ceskatelevize.cz/ivysilani/zive/ct4/ ' ,
2015-09-06 18:52:26 -04:00
' info_dict ' : {
2016-05-07 14:15:49 -04:00
' id ' : 402 ,
2015-09-06 18:52:26 -04:00
' ext ' : ' mp4 ' ,
2017-01-02 07:08:07 -05:00
' title ' : r ' re:^ČT Sport \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
2016-05-07 14:15:49 -04:00
' is_live ' : True ,
2015-09-06 18:52:26 -04:00
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
2016-05-09 10:37:20 -04:00
' skip ' : ' Georestricted to Czech Republic ' ,
2015-09-06 18:52:26 -04:00
} , {
2017-04-08 08:41:14 -04:00
' url ' : ' http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100 % 25 ' ,
' only_matching ' : True ,
2021-10-31 00:49:03 -04:00
} , {
# video with 18+ caution trailer
' url ' : ' http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/ ' ,
' info_dict ' : {
' id ' : ' 215562210900007-bogotart ' ,
' title ' : ' Queer: Bogotart ' ,
' description ' : ' Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko ' ,
} ,
' playlist ' : [ {
' info_dict ' : {
' id ' : ' 61924494877311053 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Queer: Bogotart (Varování 18+) ' ,
' duration ' : 11.9 ,
} ,
} , {
' info_dict ' : {
' id ' : ' 61924494877068022 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Queer: Bogotart (Queer) ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' duration ' : 1558.3 ,
} ,
} ] ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
} , {
# iframe embed
' url ' : ' http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/ ' ,
' only_matching ' : True ,
2015-09-06 18:52:26 -04:00
} ]
2014-03-01 05:47:52 -05:00
def _real_extract ( self , url ) :
2017-04-08 08:41:14 -04:00
playlist_id = self . _match_id ( url )
2021-10-31 00:49:03 -04:00
parsed_url = compat_urllib_parse_urlparse ( url )
2015-09-06 18:52:26 -04:00
webpage = self . _download_webpage ( url , playlist_id )
2021-10-31 00:49:03 -04:00
site_name = self . _og_search_property ( ' site_name ' , webpage , fatal = False , default = None )
playlist_title = self . _og_search_title ( webpage , default = None )
if site_name and playlist_title :
playlist_title = playlist_title . replace ( f ' — { site_name } ' , ' ' , 1 )
playlist_description = self . _og_search_description ( webpage , default = None )
if playlist_description :
playlist_description = playlist_description . replace ( ' \xa0 ' , ' ' )
if parsed_url . path . startswith ( ' /porady/ ' ) :
refer_url = update_url_query ( unescapeHTML ( self . _search_regex (
( r ' <span[^>]* \ bdata-url=([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' ,
r ' <iframe[^>]+ \ bsrc=([ " \' ])(?P<url>(?:https?:)?//(?:www \ .)?ceskatelevize \ .cz/ivysilani/embed/iFramePlayer \ .php.*?) \ 1 ' ) ,
webpage , ' iframe player url ' , group = ' url ' ) ) , query = { ' autoStart ' : ' true ' } )
webpage = self . _download_webpage ( refer_url , playlist_id )
2014-03-01 05:47:52 -05:00
2014-03-01 10:54:37 -05:00
NOT_AVAILABLE_STRING = ' This content is not available at your territory due to limited copyright. '
if ' %s </p> ' % NOT_AVAILABLE_STRING in webpage :
raise ExtractorError ( NOT_AVAILABLE_STRING , expected = True )
2014-03-01 10:17:29 -05:00
2017-04-08 08:41:14 -04:00
type_ = None
episode_id = None
playlist = self . _parse_json (
self . _search_regex (
r ' getPlaylistUrl \ ( \ [( { .+?}) \ ] ' , webpage , ' playlist ' ,
default = ' {} ' ) , playlist_id )
if playlist :
type_ = playlist . get ( ' type ' )
episode_id = playlist . get ( ' id ' )
if not type_ :
type_ = self . _html_search_regex (
r ' getPlaylistUrl \ ( \ [ \ { " type " : " (.+?) " , " id " : " .+? " \ } \ ], ' ,
webpage , ' type ' )
if not episode_id :
episode_id = self . _html_search_regex (
r ' getPlaylistUrl \ ( \ [ \ { " type " : " .+? " , " id " : " (.+?) " \ } \ ], ' ,
webpage , ' episode_id ' )
2014-03-01 05:47:52 -05:00
data = {
2017-04-08 08:41:14 -04:00
' playlist[0][type] ' : type_ ,
2014-03-01 05:47:52 -05:00
' playlist[0][id] ' : episode_id ,
2021-10-31 00:49:03 -04:00
' requestUrl ' : parsed_url . path ,
2014-03-01 05:47:52 -05:00
' requestSource ' : ' iVysilani ' ,
}
2015-09-06 18:52:26 -04:00
entries = [ ]
2017-02-14 10:56:39 -05:00
for user_agent in ( None , USER_AGENTS [ ' Safari ' ] ) :
req = sanitized_Request (
2021-10-31 00:49:03 -04:00
' https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/ ' ,
2017-02-14 10:56:39 -05:00
data = urlencode_postdata ( data ) )
req . add_header ( ' Content-type ' , ' application/x-www-form-urlencoded ' )
req . add_header ( ' x-addr ' , ' 127.0.0.1 ' )
req . add_header ( ' X-Requested-With ' , ' XMLHttpRequest ' )
if user_agent :
req . add_header ( ' User-Agent ' , user_agent )
req . add_header ( ' Referer ' , url )
playlistpage = self . _download_json ( req , playlist_id , fatal = False )
if not playlistpage :
continue
playlist_url = playlistpage [ ' url ' ]
if playlist_url == ' error_region ' :
raise ExtractorError ( NOT_AVAILABLE_STRING , expected = True )
req = sanitized_Request ( compat_urllib_parse_unquote ( playlist_url ) )
req . add_header ( ' Referer ' , url )
playlist = self . _download_json ( req , playlist_id , fatal = False )
if not playlist :
continue
playlist = playlist . get ( ' playlist ' )
if not isinstance ( playlist , list ) :
continue
playlist_len = len ( playlist )
for num , item in enumerate ( playlist ) :
is_live = item . get ( ' type ' ) == ' LIVE '
formats = [ ]
for format_id , stream_url in item . get ( ' streamUrls ' , { } ) . items ( ) :
if ' playerType=flash ' in stream_url :
2017-02-15 12:04:15 -05:00
stream_formats = self . _extract_m3u8_formats (
2017-03-25 14:37:54 -04:00
stream_url , playlist_id , ' mp4 ' , ' m3u8_native ' ,
2017-02-15 12:04:15 -05:00
m3u8_id = ' hls- %s ' % format_id , fatal = False )
2017-02-14 10:56:39 -05:00
else :
2017-02-15 12:04:15 -05:00
stream_formats = self . _extract_mpd_formats (
stream_url , playlist_id ,
mpd_id = ' dash- %s ' % format_id , fatal = False )
2021-08-22 16:08:38 -04:00
if ' drmOnly=true ' in stream_url :
for f in stream_formats :
f [ ' has_drm ' ] = True
2019-03-09 07:14:41 -05:00
# See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
2017-02-15 12:04:15 -05:00
if format_id == ' audioDescription ' :
for f in stream_formats :
f [ ' source_preference ' ] = - 10
formats . extend ( stream_formats )
2017-02-14 10:56:39 -05:00
if user_agent and len ( entries ) == playlist_len :
entries [ num ] [ ' formats ' ] . extend ( formats )
continue
item_id = item . get ( ' id ' ) or item [ ' assetId ' ]
title = item [ ' title ' ]
duration = float_or_none ( item . get ( ' duration ' ) )
thumbnail = item . get ( ' previewImageUrl ' )
subtitles = { }
if item . get ( ' type ' ) == ' VOD ' :
subs = item . get ( ' subtitles ' )
if subs :
subtitles = self . extract_subtitles ( episode_id , subs )
if playlist_len == 1 :
final_title = playlist_title or title
if is_live :
final_title = self . _live_title ( final_title )
else :
final_title = ' %s ( %s ) ' % ( playlist_title , title )
entries . append ( {
' id ' : item_id ,
' title ' : final_title ,
' description ' : playlist_description if playlist_len == 1 else None ,
' thumbnail ' : thumbnail ,
' duration ' : duration ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' is_live ' : is_live ,
} )
for e in entries :
self . _sort_formats ( e [ ' formats ' ] )
2015-09-06 18:52:26 -04:00
return self . playlist_result ( entries , playlist_id , playlist_title , playlist_description )
2015-01-02 11:12:20 -05:00
2015-02-17 15:17:47 -05:00
def _get_subtitles ( self , episode_id , subs ) :
original_subtitles = self . _download_webpage (
subs [ 0 ] [ ' url ' ] , episode_id , ' Downloading subtitles ' )
srt_subs = self . _fix_subtitles ( original_subtitles )
return {
' cs ' : [ {
' ext ' : ' srt ' ,
' data ' : srt_subs ,
} ]
}
2015-01-06 18:03:14 -05:00
@staticmethod
def _fix_subtitles ( subtitles ) :
""" Convert millisecond-based subtitles to SRT """
2015-01-02 11:12:20 -05:00
def _msectotimecode ( msec ) :
2015-01-06 18:03:14 -05:00
""" Helper utility to convert milliseconds to timecode """
2015-01-02 11:12:20 -05:00
components = [ ]
for divider in [ 1000 , 60 , 60 , 100 ] :
components . append ( msec % divider )
msec / / = divider
2016-02-14 04:37:17 -05:00
return ' {3:02} : {2:02} : {1:02} , {0:03} ' . format ( * components )
2015-01-02 11:12:20 -05:00
def _fix_subtitle ( subtitle ) :
for line in subtitle . splitlines ( ) :
2016-02-14 04:37:17 -05:00
m = re . match ( r ' ^ \ s*([0-9]+); \ s*([0-9]+) \ s+([0-9]+) \ s*$ ' , line )
2015-01-02 11:12:20 -05:00
if m :
yield m . group ( 1 )
start , stop = ( _msectotimecode ( int ( t ) ) for t in m . groups ( ) [ 1 : ] )
2016-02-14 04:37:17 -05:00
yield ' {0} --> {1} ' . format ( start , stop )
2015-01-02 11:12:20 -05:00
else :
yield line
2016-02-14 04:37:17 -05:00
return ' \r \n ' . join ( _fix_subtitle ( subtitles ) )