2015-05-16 17:32:53 -04:00
import re
2015-05-16 17:01:52 -04:00
from . common import InfoExtractor
2023-07-09 03:53:02 -04:00
from . . networking . exceptions import HTTPError
2015-05-16 17:01:52 -04:00
from . . utils import (
2019-11-30 09:26:12 -05:00
ExtractorError ,
2024-05-26 15:27:21 -04:00
determine_ext ,
2015-05-16 17:01:52 -04:00
float_or_none ,
2024-05-26 15:27:21 -04:00
int_or_none ,
2016-07-29 10:43:17 -04:00
js_to_json ,
2015-05-16 17:01:52 -04:00
parse_iso8601 ,
2015-05-16 17:32:53 -04:00
remove_end ,
2019-11-30 09:26:12 -05:00
strip_or_none ,
2019-10-29 15:21:52 -04:00
try_get ,
2015-05-16 17:01:52 -04:00
)
class TV2IE ( InfoExtractor ) :
2022-09-30 12:57:15 -04:00
_VALID_URL = r ' https?://(?:www \ .)?tv2 \ .no/v(?:ideo)? \ d*/(?:[^?#]+/)*(?P<id> \ d+) '
2021-02-04 02:56:01 -05:00
_TESTS = [ {
2022-09-30 12:57:15 -04:00
' url ' : ' http://www.tv2.no/v/1791207/ ' ,
2015-05-16 17:01:52 -04:00
' info_dict ' : {
2022-09-30 12:57:15 -04:00
' id ' : ' 1791207 ' ,
2021-08-23 12:02:33 -04:00
' ext ' : ' mp4 ' ,
2022-09-30 12:57:15 -04:00
' title ' : ' Her kolliderer romsonden med asteroiden ' ,
' description ' : ' En romsonde har krasjet inn i en asteroide i verdensrommet. Kollisjonen skjedde klokken 01:14 natt til tirsdag 27. september norsk tid. \n \n Nasa kaller det sitt første forsøk på planetforsvar. ' ,
' timestamp ' : 1664238190 ,
' upload_date ' : ' 20220927 ' ,
' duration ' : 146 ,
' thumbnail ' : r ' re:^https://.*$ ' ,
2015-05-16 17:01:52 -04:00
' view_count ' : int ,
' categories ' : list ,
2016-01-30 14:42:34 -05:00
} ,
2021-11-23 06:45:41 -05:00
} , {
' url ' : ' http://www.tv2.no/v2/916509 ' ,
' only_matching ' : True ,
2022-09-30 12:57:15 -04:00
} , {
' url ' : ' https://www.tv2.no/video/nyhetene/her-kolliderer-romsonden-med-asteroiden/1791207/ ' ,
' only_matching ' : True ,
2021-02-04 02:56:01 -05:00
} ]
2021-08-23 12:02:33 -04:00
_PROTOCOLS = ( ' HLS ' , ' DASH ' )
2019-11-30 09:26:12 -05:00
_GEO_COUNTRIES = [ ' NO ' ]
2015-05-16 17:01:52 -04:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2021-08-23 12:02:33 -04:00
asset = self . _download_json ( ' https://sumo.tv2.no/rest/assets/ ' + video_id , video_id ,
' Downloading metadata JSON ' )
title = asset [ ' title ' ]
2021-02-04 02:56:01 -05:00
is_live = asset . get ( ' live ' ) is True
2015-05-16 17:01:52 -04:00
formats = [ ]
format_urls = [ ]
2019-11-30 09:26:12 -05:00
for protocol in self . _PROTOCOLS :
try :
2024-06-11 19:09:58 -04:00
data = self . _download_json ( f ' https://api.sumo.tv2.no/play/ { video_id } ?stream= { protocol } ' ,
2021-08-23 12:02:33 -04:00
video_id , ' Downloading playabck JSON ' ,
headers = { ' content-type ' : ' application/json ' } ,
2024-06-11 19:09:58 -04:00
data = b ' { " device " : { " id " : " 1-1-1 " , " name " : " Nettleser (HTML) " }} ' ) [ ' playback ' ]
2019-11-30 09:26:12 -05:00
except ExtractorError as e :
2023-07-09 03:53:02 -04:00
if isinstance ( e . cause , HTTPError ) and e . cause . status == 401 :
error = self . _parse_json ( e . cause . response . read ( ) . decode ( ) , video_id ) [ ' error ' ]
2019-11-30 09:26:12 -05:00
error_code = error . get ( ' code ' )
if error_code == ' ASSET_PLAYBACK_INVALID_GEO_LOCATION ' :
self . raise_geo_restricted ( countries = self . _GEO_COUNTRIES )
elif error_code == ' SESSION_NOT_AUTHENTICATED ' :
self . raise_login_required ( )
raise ExtractorError ( error [ ' description ' ] )
raise
2021-08-23 12:02:33 -04:00
items = data . get ( ' streams ' , [ ] )
2019-10-29 15:21:52 -04:00
for item in items :
2015-05-16 17:01:52 -04:00
video_url = item . get ( ' url ' )
if not video_url or video_url in format_urls :
continue
2024-06-11 19:09:58 -04:00
format_id = ' {} - {} ' . format ( protocol . lower ( ) , item . get ( ' type ' ) )
2015-05-16 17:01:52 -04:00
if not self . _is_valid_url ( video_url , video_id , format_id ) :
continue
format_urls . append ( video_url )
ext = determine_ext ( video_url )
if ext == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
2016-07-29 11:01:34 -04:00
video_url , video_id , f4m_id = format_id , fatal = False ) )
2015-05-16 17:01:52 -04:00
elif ext == ' m3u8 ' :
2019-11-30 09:50:17 -05:00
if not data . get ( ' drmProtected ' ) :
formats . extend ( self . _extract_m3u8_formats (
2022-03-02 12:29:01 -05:00
video_url , video_id , ' mp4 ' , live = is_live , m3u8_id = format_id , fatal = False ) )
2019-11-30 09:26:12 -05:00
elif ext == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
video_url , video_id , format_id , fatal = False ) )
2015-05-16 17:01:52 -04:00
elif ext == ' ism ' or video_url . endswith ( ' .ism/Manifest ' ) :
pass
else :
formats . append ( {
' url ' : video_url ,
' format_id ' : format_id ,
} )
2019-11-30 09:50:17 -05:00
if not formats and data . get ( ' drmProtected ' ) :
2021-08-22 16:08:38 -04:00
self . report_drm ( video_id )
2015-05-16 17:01:52 -04:00
thumbnails = [ {
2024-06-11 19:09:58 -04:00
' id ' : thumb_type ,
2021-08-23 12:02:33 -04:00
' url ' : thumb_url ,
2024-06-11 19:09:58 -04:00
} for thumb_type , thumb_url in ( asset . get ( ' images ' ) or { } ) . items ( ) ]
2015-05-16 17:01:52 -04:00
return {
' id ' : video_id ,
' url ' : video_url ,
2021-12-15 11:00:46 -05:00
' title ' : title ,
2019-11-30 09:26:12 -05:00
' description ' : strip_or_none ( asset . get ( ' description ' ) ) ,
2015-05-16 17:01:52 -04:00
' thumbnails ' : thumbnails ,
2021-08-23 12:02:33 -04:00
' timestamp ' : parse_iso8601 ( asset . get ( ' live_broadcast_time ' ) or asset . get ( ' update_time ' ) ) ,
2019-11-30 09:26:12 -05:00
' duration ' : float_or_none ( asset . get ( ' accurateDuration ' ) or asset . get ( ' duration ' ) ) ,
' view_count ' : int_or_none ( asset . get ( ' views ' ) ) ,
2021-08-23 12:02:33 -04:00
' categories ' : asset . get ( ' tags ' , ' ' ) . split ( ' , ' ) ,
2015-05-16 17:01:52 -04:00
' formats ' : formats ,
2021-02-04 02:56:01 -05:00
' is_live ' : is_live ,
2015-05-16 17:01:52 -04:00
}
2015-05-16 17:32:53 -04:00
class TV2ArticleIE ( InfoExtractor ) :
2022-09-30 12:57:15 -04:00
_VALID_URL = r ' https?://(?:www \ .)?tv2 \ .no/(?!v(?:ideo)? \ d*/)[^?#]+/(?P<id> \ d+) '
2015-05-16 17:32:53 -04:00
_TESTS = [ {
2022-09-30 12:57:15 -04:00
' url ' : ' https://www.tv2.no/underholdning/forraeder/katarina-flatland-angrer-etter-forraeder-exit/15095188/ ' ,
2015-05-16 17:32:53 -04:00
' info_dict ' : {
2022-09-30 12:57:15 -04:00
' id ' : ' 15095188 ' ,
' title ' : ' Katarina Flatland angrer etter Forræder-exit ' ,
' description ' : ' SANDEFJORD (TV 2): Katarina Flatland (33) måtte følge i sine fars fotspor, da hun ble forvist fra Forræder. ' ,
2015-05-16 17:32:53 -04:00
} ,
' playlist_count ' : 2 ,
} , {
' url ' : ' http://www.tv2.no/a/6930542 ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
playlist_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , playlist_id )
2016-07-29 10:43:17 -04:00
# Old embed pattern (looks unused nowadays)
assets = re . findall ( r ' data-assetid=[ " \' ]( \ d+) ' , webpage )
if not assets :
# New embed pattern
2022-09-30 12:57:15 -04:00
for v in re . findall ( r ' (?s)(?:TV2ContentboxVideo|TV2 \ .TV2Video) \ (( { .+?}) \ ) ' , webpage ) :
2016-07-29 10:43:17 -04:00
video = self . _parse_json (
v , playlist_id , transform_source = js_to_json , fatal = False )
if not video :
continue
asset = video . get ( ' assetId ' )
if asset :
assets . append ( asset )
2015-05-16 17:32:53 -04:00
entries = [
2024-06-11 19:09:58 -04:00
self . url_result ( f ' http://www.tv2.no/v/ { asset_id } ' , ' TV2 ' )
2016-07-29 10:43:17 -04:00
for asset_id in assets ]
2015-05-16 17:32:53 -04:00
title = remove_end ( self . _og_search_title ( webpage ) , ' - TV2.no ' )
description = remove_end ( self . _og_search_description ( webpage ) , ' - TV2.no ' )
return self . playlist_result ( entries , playlist_id , title , description )
2019-11-30 09:26:12 -05:00
2021-08-23 12:02:33 -04:00
class KatsomoIE ( InfoExtractor ) :
2024-03-08 19:02:45 -05:00
_WORKING = False
2021-02-04 02:56:01 -05:00
_VALID_URL = r ' https?://(?:www \ .)?(?:katsomo|mtv(uutiset)?) \ .fi/(?:sarja/[0-9a-z-]+- \ d+/[0-9a-z-]+-|(?:#!/)?jakso/(?: \ d+/[^/]+/)?|video/prog)(?P<id> \ d+) '
_TESTS = [ {
2019-11-30 09:26:12 -05:00
' url ' : ' https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321 ' ,
' info_dict ' : {
' id ' : ' 1181321 ' ,
' ext ' : ' mp4 ' ,
2021-02-04 02:56:01 -05:00
' title ' : ' Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle ' ,
2019-11-30 09:26:12 -05:00
' description ' : ' Päätöksen teki Pelicansin hallitus. ' ,
' timestamp ' : 1575116484 ,
' upload_date ' : ' 20191130 ' ,
' duration ' : 37.12 ,
' view_count ' : int ,
' categories ' : list ,
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
2021-02-04 02:56:01 -05:00
} , {
' url ' : ' http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.mtvuutiset.fi/video/prog1311159 ' ,
' only_matching ' : True ,
} , {
' url ' : ' https://www.katsomo.fi/#!/jakso/1311159 ' ,
' only_matching ' : True ,
} ]
2019-11-30 09:26:12 -05:00
_API_DOMAIN = ' api.katsomo.fi '
_PROTOCOLS = ( ' HLS ' , ' MPD ' )
_GEO_COUNTRIES = [ ' FI ' ]
2021-02-04 02:56:01 -05:00
2021-08-23 12:02:33 -04:00
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
2024-06-11 19:09:58 -04:00
api_base = f ' http:// { self . _API_DOMAIN } /api/web/asset/ { video_id } '
2021-08-23 12:02:33 -04:00
asset = self . _download_json (
api_base + ' .json ' , video_id ,
' Downloading metadata JSON ' ) [ ' asset ' ]
title = asset . get ( ' subtitle ' ) or asset [ ' title ' ]
is_live = asset . get ( ' live ' ) is True
formats = [ ]
format_urls = [ ]
for protocol in self . _PROTOCOLS :
try :
data = self . _download_json (
2024-06-11 19:09:58 -04:00
api_base + f ' /play.json?protocol= { protocol } &videoFormat=SMIL+ISMUSP ' ,
2021-08-23 12:02:33 -04:00
video_id , ' Downloading play JSON ' ) [ ' playback ' ]
except ExtractorError as e :
2023-07-09 03:53:02 -04:00
if isinstance ( e . cause , HTTPError ) and e . cause . status == 401 :
error = self . _parse_json ( e . cause . response . read ( ) . decode ( ) , video_id ) [ ' error ' ]
2021-08-23 12:02:33 -04:00
error_code = error . get ( ' code ' )
if error_code == ' ASSET_PLAYBACK_INVALID_GEO_LOCATION ' :
self . raise_geo_restricted ( countries = self . _GEO_COUNTRIES )
elif error_code == ' SESSION_NOT_AUTHENTICATED ' :
self . raise_login_required ( )
raise ExtractorError ( error [ ' description ' ] )
raise
items = try_get ( data , lambda x : x [ ' items ' ] [ ' item ' ] )
if not items :
continue
if not isinstance ( items , list ) :
items = [ items ]
for item in items :
if not isinstance ( item , dict ) :
continue
video_url = item . get ( ' url ' )
if not video_url or video_url in format_urls :
continue
2024-06-11 19:09:58 -04:00
format_id = ' {} - {} ' . format ( protocol . lower ( ) , item . get ( ' mediaFormat ' ) )
2021-08-23 12:02:33 -04:00
if not self . _is_valid_url ( video_url , video_id , format_id ) :
continue
format_urls . append ( video_url )
ext = determine_ext ( video_url )
if ext == ' f4m ' :
formats . extend ( self . _extract_f4m_formats (
video_url , video_id , f4m_id = format_id , fatal = False ) )
elif ext == ' m3u8 ' :
if not data . get ( ' drmProtected ' ) :
formats . extend ( self . _extract_m3u8_formats (
2022-03-02 12:29:01 -05:00
video_url , video_id , ' mp4 ' , live = is_live , m3u8_id = format_id , fatal = False ) )
2021-08-23 12:02:33 -04:00
elif ext == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
video_url , video_id , format_id , fatal = False ) )
elif ext == ' ism ' or video_url . endswith ( ' .ism/Manifest ' ) :
pass
else :
formats . append ( {
' url ' : video_url ,
' format_id ' : format_id ,
' tbr ' : int_or_none ( item . get ( ' bitrate ' ) ) ,
' filesize ' : int_or_none ( item . get ( ' fileSize ' ) ) ,
} )
if not formats and data . get ( ' drmProtected ' ) :
self . report_drm ( video_id )
thumbnails = [ {
' id ' : thumbnail . get ( ' @type ' ) ,
' url ' : thumbnail . get ( ' url ' ) ,
} for _ , thumbnail in ( asset . get ( ' imageVersions ' ) or { } ) . items ( ) ]
return {
' id ' : video_id ,
' url ' : video_url ,
2021-12-15 11:00:46 -05:00
' title ' : title ,
2021-08-23 12:02:33 -04:00
' description ' : strip_or_none ( asset . get ( ' description ' ) ) ,
' thumbnails ' : thumbnails ,
' timestamp ' : parse_iso8601 ( asset . get ( ' createTime ' ) ) ,
' duration ' : float_or_none ( asset . get ( ' accurateDuration ' ) or asset . get ( ' duration ' ) ) ,
' view_count ' : int_or_none ( asset . get ( ' views ' ) ) ,
' categories ' : asset . get ( ' keywords ' , ' ' ) . split ( ' , ' ) ,
' formats ' : formats ,
' is_live ' : is_live ,
}
2021-02-04 02:56:01 -05:00
class MTVUutisetArticleIE ( InfoExtractor ) :
2024-03-08 19:02:45 -05:00
_WORKING = False
2021-02-04 02:56:01 -05:00
_VALID_URL = r ' https?://(?:www \ .)mtvuutiset \ .fi/artikkeli/[^/]+/(?P<id> \ d+) '
_TESTS = [ {
' url ' : ' https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384 ' ,
' info_dict ' : {
' id ' : ' 1311159 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla ' ,
' description ' : ' Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla ' ,
' timestamp ' : 1600608966 ,
' upload_date ' : ' 20200920 ' ,
' duration ' : 153.7886666 ,
' view_count ' : int ,
' categories ' : list ,
} ,
' params ' : {
# m3u8 download
' skip_download ' : True ,
} ,
} , {
# multiple Youtube embeds
' url ' : ' https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962 ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
article_id = self . _match_id ( url )
article = self . _download_json (
' http://api.mtvuutiset.fi/mtvuutiset/api/json/ ' + article_id ,
article_id )
def entries ( ) :
for video in ( article . get ( ' videos ' ) or [ ] ) :
video_type = video . get ( ' videotype ' )
video_url = video . get ( ' url ' )
if not ( video_url and video_type in ( ' katsomo ' , ' youtube ' ) ) :
continue
yield self . url_result (
video_url , video_type . capitalize ( ) , video . get ( ' video_id ' ) )
return self . playlist_result (
entries ( ) , article_id , article . get ( ' title ' ) , article . get ( ' description ' ) )