2022-11-05 06:18:15 -04:00
import base64
2021-06-05 17:51:52 -04:00
import itertools
2018-10-26 14:40:44 -04:00
import re
2021-01-01 07:26:37 -05:00
from . common import InfoExtractor
2022-04-20 15:05:57 -04:00
from . . dependencies import websockets
2021-01-01 07:26:37 -05:00
from . . utils import (
clean_html ,
2022-01-13 06:47:33 -05:00
ExtractorError ,
2021-01-01 07:26:37 -05:00
float_or_none ,
get_element_by_class ,
get_element_by_id ,
parse_duration ,
2021-06-21 13:23:55 -04:00
qualities ,
2021-01-01 07:26:37 -05:00
str_to_int ,
2022-01-11 10:07:51 -05:00
traverse_obj ,
2021-06-05 17:51:52 -04:00
try_get ,
2021-01-01 07:26:37 -05:00
unified_timestamp ,
urlencode_postdata ,
2021-06-05 17:51:52 -04:00
urljoin ,
2021-01-01 07:26:37 -05:00
)
2018-10-26 14:40:44 -04:00
2018-11-02 13:27:36 -04:00
class TwitCastingIE ( InfoExtractor ) :
2021-06-05 17:51:52 -04:00
_VALID_URL = r ' https?://(?:[^/]+ \ .)?twitcasting \ .tv/(?P<uploader_id>[^/]+)/(?:movie|twplayer)/(?P<id> \ d+) '
2022-01-13 06:47:33 -05:00
_M3U8_HEADERS = {
' Origin ' : ' https://twitcasting.tv ' ,
' Referer ' : ' https://twitcasting.tv/ ' ,
}
2019-04-26 12:17:40 -04:00
_TESTS = [ {
2018-10-26 14:40:44 -04:00
' url ' : ' https://twitcasting.tv/ivetesangalo/movie/2357609 ' ,
' md5 ' : ' 745243cad58c4681dc752490f7540d7f ' ,
' info_dict ' : {
' id ' : ' 2357609 ' ,
' ext ' : ' mp4 ' ,
2019-04-26 05:34:23 -04:00
' title ' : ' Live #2357609 ' ,
2018-10-26 14:40:44 -04:00
' uploader_id ' : ' ivetesangalo ' ,
2021-01-01 07:26:37 -05:00
' description ' : ' Twitter Oficial da cantora brasileira Ivete Sangalo. ' ,
2018-10-26 14:40:44 -04:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2021-01-01 07:26:37 -05:00
' upload_date ' : ' 20110822 ' ,
' timestamp ' : 1314010824 ,
' duration ' : 32 ,
' view_count ' : int ,
2018-11-02 13:27:36 -04:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2019-04-26 12:17:40 -04:00
} , {
' url ' : ' https://twitcasting.tv/mttbernardini/movie/3689740 ' ,
' info_dict ' : {
' id ' : ' 3689740 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Live playing something #3689740 ' ,
' uploader_id ' : ' mttbernardini ' ,
2021-01-01 07:26:37 -05:00
' description ' : ' Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più. ' ,
2019-04-26 12:17:40 -04:00
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
2021-01-01 07:26:37 -05:00
' upload_date ' : ' 20120212 ' ,
' timestamp ' : 1329028024 ,
' duration ' : 681 ,
' view_count ' : int ,
2019-04-26 12:17:40 -04:00
} ,
' params ' : {
' skip_download ' : True ,
' videopassword ' : ' abc ' ,
} ,
2022-01-13 06:47:33 -05:00
} , {
' note ' : ' archive is split in 2 parts ' ,
' url ' : ' https://twitcasting.tv/loft_heaven/movie/685979292 ' ,
' info_dict ' : {
' id ' : ' 685979292 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 南波一海のhear_here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」” ' ,
' duration ' : 6964.599334 ,
} ,
' playlist_mincount ' : 2 ,
2019-04-26 12:17:40 -04:00
} ]
2018-10-26 14:40:44 -04:00
2022-11-05 06:18:15 -04:00
def _parse_data_movie_playlist ( self , dmp , video_id ) :
# attempt 1: parse as JSON directly
try :
return self . _parse_json ( dmp , video_id )
except ExtractorError :
pass
# attempt 2: decode reversed base64
decoded = base64 . b64decode ( dmp [ : : - 1 ] )
return self . _parse_json ( decoded , video_id )
2018-10-26 14:40:44 -04:00
def _real_extract ( self , url ) :
2021-08-18 21:41:24 -04:00
uploader_id , video_id = self . _match_valid_url ( url ) . groups ( )
2018-10-26 14:40:44 -04:00
2021-05-17 08:23:08 -04:00
video_password = self . get_param ( ' videopassword ' )
2019-04-26 12:17:40 -04:00
request_data = None
if video_password :
request_data = urlencode_postdata ( {
' password ' : video_password ,
2022-02-04 00:54:33 -05:00
} , encoding = ' utf-8 ' )
2022-02-15 09:30:11 -05:00
webpage , urlh = self . _download_webpage_handle (
2021-04-18 08:10:48 -04:00
url , video_id , data = request_data ,
headers = { ' Origin ' : ' https://twitcasting.tv ' } )
2022-02-15 09:30:11 -05:00
if urlh . geturl ( ) != url and request_data :
webpage = self . _download_webpage (
urlh . geturl ( ) , video_id , data = request_data ,
headers = { ' Origin ' : ' https://twitcasting.tv ' } ,
note = ' Retrying authentication ' )
2022-02-20 06:48:26 -05:00
# has to check here as the first request can contain password input form even if the password is correct
if re . search ( r ' <form \ s+method= " POST " > \ s*<input \ s+[^>]+?name= " password " ' , webpage ) :
raise ExtractorError ( ' This video is protected by a password, use the --video-password option ' , expected = True )
2018-10-26 14:40:44 -04:00
2021-06-05 17:51:52 -04:00
title = ( clean_html ( get_element_by_id ( ' movietitle ' , webpage ) )
or self . _html_search_meta ( [ ' og:title ' , ' twitter:title ' ] , webpage , fatal = True ) )
2018-11-02 13:27:36 -04:00
2022-01-13 06:47:33 -05:00
video_js_data = try_get (
webpage ,
2022-11-05 06:18:15 -04:00
lambda x : self . _parse_data_movie_playlist ( self . _search_regex (
2022-01-13 06:47:33 -05:00
r ' data-movie-playlist= \' ([^ \' ]+?) \' ' ,
x , ' movie playlist ' , default = None ) , video_id ) [ ' 2 ' ] , list )
2022-01-11 10:07:51 -05:00
2022-01-13 06:47:33 -05:00
thumbnail = traverse_obj ( video_js_data , ( 0 , ' thumbnailUrl ' ) ) or self . _og_search_thumbnail ( webpage )
2021-01-01 07:26:37 -05:00
description = clean_html ( get_element_by_id (
' authorcomment ' , webpage ) ) or self . _html_search_meta (
[ ' description ' , ' og:description ' , ' twitter:description ' ] , webpage )
2022-01-13 06:47:33 -05:00
duration = ( try_get ( video_js_data , lambda x : sum ( float_or_none ( y . get ( ' duration ' ) ) for y in x ) / 1000 )
or parse_duration ( clean_html ( get_element_by_class ( ' tw-player-duration-time ' , webpage ) ) ) )
2021-01-01 07:26:37 -05:00
view_count = str_to_int ( self . _search_regex (
2022-01-13 06:47:33 -05:00
( r ' Total \ s*: \ s*([ \ d,]+) \ s*Views ' , r ' 総視聴者 \ s*: \ s*([ \ d,]+) \ s*</ ' ) , webpage , ' views ' , None ) )
2021-01-01 07:26:37 -05:00
timestamp = unified_timestamp ( self . _search_regex (
r ' data-toggle= " true " [^>]+datetime= " ([^ " ]+) " ' ,
webpage , ' datetime ' , None ) )
2018-11-02 13:27:36 -04:00
2022-01-13 06:47:33 -05:00
stream_server_data = self . _download_json (
' https://twitcasting.tv/streamserver.php?target= %s &mode=client ' % uploader_id , video_id ,
' Downloading live info ' , fatal = False )
2021-06-05 17:51:52 -04:00
2022-01-13 06:47:33 -05:00
is_live = ' data-status= " online " ' in webpage
if not traverse_obj ( stream_server_data , ' llfmp4 ' ) and is_live :
self . raise_login_required ( method = ' cookies ' )
base_dict = {
2018-10-26 14:40:44 -04:00
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
2021-01-01 07:26:37 -05:00
' timestamp ' : timestamp ,
2018-10-26 14:40:44 -04:00
' uploader_id ' : uploader_id ,
2021-01-01 07:26:37 -05:00
' duration ' : duration ,
' view_count ' : view_count ,
2021-06-05 17:51:52 -04:00
' is_live ' : is_live ,
2018-10-26 14:40:44 -04:00
}
2021-06-05 17:51:52 -04:00
2022-01-13 06:47:33 -05:00
def find_dmu ( x ) :
data_movie_url = self . _search_regex (
r ' data-movie-url=([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' ,
x , ' m3u8 url ' , group = ' url ' , default = None )
if data_movie_url :
return [ data_movie_url ]
m3u8_urls = ( try_get ( webpage , find_dmu , list )
or traverse_obj ( video_js_data , ( . . . , ' source ' , ' url ' ) )
or ( [ f ' https://twitcasting.tv/ { uploader_id } /metastream.m3u8 ' ] if is_live else None ) )
if not m3u8_urls :
raise ExtractorError ( ' Failed to get m3u8 playlist ' )
if is_live :
m3u8_url = m3u8_urls [ 0 ]
formats = self . _extract_m3u8_formats (
m3u8_url , video_id , ext = ' mp4 ' , m3u8_id = ' hls ' ,
live = True , headers = self . _M3U8_HEADERS )
2022-02-15 09:30:11 -05:00
if traverse_obj ( stream_server_data , ( ' hls ' , ' source ' ) ) :
formats . extend ( self . _extract_m3u8_formats (
m3u8_url , video_id , ext = ' mp4 ' , m3u8_id = ' source ' ,
live = True , query = { ' mode ' : ' source ' } ,
note = ' Downloading source quality m3u8 ' ,
headers = self . _M3U8_HEADERS , fatal = False ) )
2022-01-13 06:47:33 -05:00
2022-04-20 15:05:57 -04:00
if websockets :
2022-01-13 06:47:33 -05:00
qq = qualities ( [ ' base ' , ' mobilesource ' , ' main ' ] )
streams = traverse_obj ( stream_server_data , ( ' llfmp4 ' , ' streams ' ) ) or { }
for mode , ws_url in streams . items ( ) :
formats . append ( {
' url ' : ws_url ,
' format_id ' : ' ws- %s ' % mode ,
' ext ' : ' mp4 ' ,
' quality ' : qq ( mode ) ,
2022-02-15 09:30:11 -05:00
' source_preference ' : - 10 ,
2022-01-13 06:47:33 -05:00
# TwitCasting simply sends moof atom directly over WS
' protocol ' : ' websocket_frag ' ,
} )
infodict = {
2022-11-17 00:10:03 -05:00
' formats ' : formats ,
' _format_sort_fields ' : ( ' source ' , ) ,
2022-01-13 06:47:33 -05:00
}
2022-04-07 00:42:01 -04:00
elif len ( m3u8_urls ) == 1 :
formats = self . _extract_m3u8_formats (
m3u8_urls [ 0 ] , video_id , ' mp4 ' , headers = self . _M3U8_HEADERS )
infodict = {
# No problem here since there's only one manifest
' formats ' : formats ,
2022-04-27 12:59:45 -04:00
' http_headers ' : self . _M3U8_HEADERS ,
2022-04-07 00:42:01 -04:00
}
2022-01-13 06:47:33 -05:00
else :
infodict = {
' _type ' : ' multi_video ' ,
' entries ' : [ {
' id ' : f ' { video_id } - { num } ' ,
' url ' : m3u8_url ,
' ext ' : ' mp4 ' ,
# Requesting the manifests here will cause download to fail.
# So use ffmpeg instead. See: https://github.com/yt-dlp/yt-dlp/issues/382
' protocol ' : ' m3u8 ' ,
' http_headers ' : self . _M3U8_HEADERS ,
* * base_dict ,
} for ( num , m3u8_url ) in enumerate ( m3u8_urls ) ] ,
}
return {
' id ' : video_id ,
* * base_dict ,
* * infodict ,
}
2021-06-05 17:51:52 -04:00
class TwitCastingLiveIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:[^/]+ \ .)?twitcasting \ .tv/(?P<id>[^/]+)/?(?:[#?]|$) '
_TESTS = [ {
' url ' : ' https://twitcasting.tv/ivetesangalo ' ,
' only_matching ' : True ,
} ]
def _real_extract ( self , url ) :
uploader_id = self . _match_id ( url )
self . to_screen (
' Downloading live video of user {0} . '
' Pass " https://twitcasting.tv/ {0} /show " to download the history ' . format ( uploader_id ) )
webpage = self . _download_webpage ( url , uploader_id )
current_live = self . _search_regex (
( r ' data-type= " movie " data-id= " ( \ d+) " > ' ,
r ' tw-sound-flag-open-link " data-id= " ( \ d+) " style= ' , ) ,
webpage , ' current live ID ' , default = None )
2022-02-16 06:32:14 -05:00
if not current_live :
# fetch unfiltered /show to find running livestreams; we can't get ID of the password-protected livestream above
webpage = self . _download_webpage (
f ' https://twitcasting.tv/ { uploader_id } /show/ ' , uploader_id ,
note = ' Downloading live history ' )
is_live = self . _search_regex ( r ' (?s)(<span \ s*class= " tw-movie-thumbnail-badge " \ s*data-status= " live " > \ s*LIVE) ' , webpage , ' is live? ' , default = None )
if is_live :
# get the first live; running live is always at the first
current_live = self . _search_regex (
r ' (?s)<a \ s+class= " tw-movie-thumbnail " \ s*href= " /[^/]+/movie/(?P<video_id> \ d+) " \ s*>.+?</a> ' ,
webpage , ' current live ID 2 ' , default = None , group = ' video_id ' )
2021-06-05 17:51:52 -04:00
if not current_live :
raise ExtractorError ( ' The user is not currently live ' )
return self . url_result ( ' https://twitcasting.tv/ %s /movie/ %s ' % ( uploader_id , current_live ) )
class TwitCastingUserIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:[^/]+ \ .)?twitcasting \ .tv/(?P<id>[^/]+)/show/?(?:[#?]|$) '
_TESTS = [ {
' url ' : ' https://twitcasting.tv/noriyukicas/show ' ,
' only_matching ' : True ,
} ]
def _entries ( self , uploader_id ) :
base_url = next_url = ' https://twitcasting.tv/ %s /show ' % uploader_id
for page_num in itertools . count ( 1 ) :
webpage = self . _download_webpage (
next_url , uploader_id , query = { ' filter ' : ' watchable ' } , note = ' Downloading page %d ' % page_num )
matches = re . finditer (
r ''' (?isx)<a \ s+class= " tw-movie-thumbnail " \ s*href= " (?P<url>/[^/]+/movie/ \ d+) " \ s*>.+?</a> ''' ,
webpage )
for mobj in matches :
yield self . url_result ( urljoin ( base_url , mobj . group ( ' url ' ) ) )
next_url = self . _search_regex (
r ' <a href= " (/ %s /show/ %d - \ d+)[? " ] ' % ( re . escape ( uploader_id ) , page_num ) ,
webpage , ' next url ' , default = None )
next_url = urljoin ( base_url , next_url )
if not next_url :
return
def _real_extract ( self , url ) :
uploader_id = self . _match_id ( url )
return self . playlist_result (
self . _entries ( uploader_id ) , uploader_id , ' %s - Live History ' % uploader_id )