2022-10-19 12:01:21 -04:00
import json
2015-06-27 15:22:25 -04:00
import re
from . common import InfoExtractor
2022-10-11 01:54:38 -04:00
from . periscope import PeriscopeBaseIE , PeriscopeIE
2019-11-09 03:23:20 -05:00
from . . compat import (
compat_parse_qs ,
compat_urllib_parse_unquote ,
compat_urllib_parse_urlparse ,
)
2015-06-27 15:22:25 -04:00
from . . utils import (
2017-06-03 12:41:55 -04:00
ExtractorError ,
2022-10-11 01:54:38 -04:00
dict_get ,
2023-07-06 15:39:51 -04:00
filter_dict ,
2015-06-27 15:22:25 -04:00
float_or_none ,
2022-10-11 01:54:38 -04:00
format_field ,
2015-11-11 13:13:42 -05:00
int_or_none ,
2022-10-11 01:54:38 -04:00
make_archive_id ,
2023-05-01 19:55:28 -04:00
remove_end ,
2022-10-11 01:54:38 -04:00
str_or_none ,
strip_or_none ,
2022-01-10 08:20:01 -05:00
traverse_obj ,
2022-10-19 12:01:21 -04:00
try_call ,
2017-06-03 12:41:55 -04:00
try_get ,
2019-11-09 03:23:20 -05:00
unified_timestamp ,
update_url_query ,
2021-05-06 12:01:20 -04:00
url_or_none ,
2017-06-03 12:41:55 -04:00
xpath_text ,
2015-06-27 15:22:25 -04:00
)
2016-02-21 03:41:24 -05:00
class TwitterBaseIE ( InfoExtractor ) :
2023-06-08 14:47:13 -04:00
_NETRC_MACHINE = ' twitter '
2019-11-09 03:23:20 -05:00
_API_BASE = ' https://api.twitter.com/1.1/ '
2022-10-19 12:01:21 -04:00
_GRAPHQL_API_BASE = ' https://twitter.com/i/api/graphql/ '
2022-10-12 00:12:31 -04:00
_BASE_REGEX = r ' https?://(?:(?:www|m(?:obile)?) \ .)?(?:twitter \ .com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid \ .onion)/ '
2023-07-06 15:39:51 -04:00
_AUTH = ' AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs % 3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA '
_LEGACY_AUTH = ' AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4 % 3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE '
2023-06-08 14:47:13 -04:00
_flow_token = None
_LOGIN_INIT_DATA = json . dumps ( {
' input_flow_data ' : {
' flow_context ' : {
' debug_overrides ' : { } ,
' start_location ' : {
' location ' : ' unknown '
}
}
} ,
' subtask_versions ' : {
' action_list ' : 2 ,
' alert_dialog ' : 1 ,
' app_download_cta ' : 1 ,
' check_logged_in_account ' : 1 ,
' choice_selection ' : 3 ,
' contacts_live_sync_permission_prompt ' : 0 ,
' cta ' : 7 ,
' email_verification ' : 2 ,
' end_flow ' : 1 ,
' enter_date ' : 1 ,
' enter_email ' : 2 ,
' enter_password ' : 5 ,
' enter_phone ' : 2 ,
' enter_recaptcha ' : 1 ,
' enter_text ' : 5 ,
' enter_username ' : 2 ,
' generic_urt ' : 3 ,
' in_app_notification ' : 1 ,
' interest_picker ' : 3 ,
' js_instrumentation ' : 1 ,
' menu_dialog ' : 1 ,
' notifications_permission_prompt ' : 2 ,
' open_account ' : 2 ,
' open_home_timeline ' : 1 ,
' open_link ' : 1 ,
' phone_verification ' : 4 ,
' privacy_options ' : 1 ,
' security_key ' : 3 ,
' select_avatar ' : 4 ,
' select_banner ' : 2 ,
' settings_list ' : 7 ,
' show_code ' : 1 ,
' sign_up ' : 2 ,
' sign_up_review ' : 4 ,
' tweet_selection_urt ' : 1 ,
' update_users ' : 1 ,
' upload_media ' : 1 ,
' user_recommendations_list ' : 4 ,
' user_recommendations_urt ' : 1 ,
' wait_spinner ' : 3 ,
' web_modal ' : 1
}
} , separators = ( ' , ' , ' : ' ) ) . encode ( )
2019-11-09 03:23:20 -05:00
def _extract_variant_formats ( self , variant , video_id ) :
variant_url = variant . get ( ' url ' )
if not variant_url :
2021-04-15 08:12:59 -04:00
return [ ] , { }
2019-11-09 03:23:20 -05:00
elif ' .m3u8 ' in variant_url :
2021-04-15 08:12:59 -04:00
return self . _extract_m3u8_formats_and_subtitles (
2019-11-09 03:23:20 -05:00
variant_url , video_id , ' mp4 ' , ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False )
else :
tbr = int_or_none ( dict_get ( variant , ( ' bitrate ' , ' bit_rate ' ) ) , 1000 ) or None
f = {
' url ' : variant_url ,
' format_id ' : ' http ' + ( ' - %d ' % tbr if tbr else ' ' ) ,
' tbr ' : tbr ,
}
self . _search_dimensions_in_video_url ( f , variant_url )
2021-04-15 08:12:59 -04:00
return [ f ] , { }
2019-11-09 03:23:20 -05:00
2017-07-11 03:48:34 -04:00
def _extract_formats_from_vmap_url ( self , vmap_url , video_id ) :
2021-05-06 12:01:20 -04:00
vmap_url = url_or_none ( vmap_url )
if not vmap_url :
2022-01-10 08:20:01 -05:00
return [ ] , { }
2016-02-21 03:41:24 -05:00
vmap_data = self . _download_xml ( vmap_url , video_id )
2019-11-09 03:23:20 -05:00
formats = [ ]
2021-04-15 08:12:59 -04:00
subtitles = { }
2019-11-09 03:23:20 -05:00
urls = [ ]
for video_variant in vmap_data . findall ( ' .// { http://twitter.com/schema/videoVMapV2.xsd}videoVariant ' ) :
video_variant . attrib [ ' url ' ] = compat_urllib_parse_unquote (
video_variant . attrib [ ' url ' ] )
urls . append ( video_variant . attrib [ ' url ' ] )
2021-04-15 08:12:59 -04:00
fmts , subs = self . _extract_variant_formats (
video_variant . attrib , video_id )
formats . extend ( fmts )
subtitles = self . _merge_subtitles ( subtitles , subs )
2019-11-09 03:23:20 -05:00
video_url = strip_or_none ( xpath_text ( vmap_data , ' .//MediaFile ' ) )
if video_url not in urls :
2021-04-15 08:12:59 -04:00
fmts , subs = self . _extract_variant_formats ( { ' url ' : video_url } , video_id )
formats . extend ( fmts )
subtitles = self . _merge_subtitles ( subtitles , subs )
return formats , subtitles
2016-02-21 03:41:24 -05:00
2017-06-03 12:41:55 -04:00
@staticmethod
def _search_dimensions_in_video_url ( a_format , video_url ) :
m = re . search ( r ' /(?P<width> \ d+)x(?P<height> \ d+)/ ' , video_url )
if m :
a_format . update ( {
' width ' : int ( m . group ( ' width ' ) ) ,
' height ' : int ( m . group ( ' height ' ) ) ,
} )
2023-06-08 14:47:13 -04:00
@property
2022-10-19 12:01:21 -04:00
def is_logged_in ( self ) :
return bool ( self . _get_cookies ( self . _API_BASE ) . get ( ' auth_token ' ) )
2023-07-06 15:39:51 -04:00
def _fetch_guest_token ( self , display_id ) :
guest_token = traverse_obj ( self . _download_json (
f ' { self . _API_BASE } guest/activate.json ' , display_id , ' Downloading guest token ' , data = b ' ' ,
headers = self . _set_base_headers ( legacy = display_id and self . _configuration_arg ( ' legacy_api ' ) ) ) ,
( ' guest_token ' , { str } ) )
if not guest_token :
2023-07-05 16:30:23 -04:00
raise ExtractorError ( ' Could not retrieve guest token ' )
2023-07-06 15:39:51 -04:00
return guest_token
2023-07-05 16:30:23 -04:00
2023-07-06 15:39:51 -04:00
def _set_base_headers ( self , legacy = False ) :
bearer_token = self . _LEGACY_AUTH if legacy and not self . is_logged_in else self . _AUTH
return filter_dict ( {
' Authorization ' : f ' Bearer { bearer_token } ' ,
' x-csrf-token ' : try_call ( lambda : self . _get_cookies ( self . _API_BASE ) [ ' ct0 ' ] . value ) ,
} )
2023-06-08 14:47:13 -04:00
def _call_login_api ( self , note , headers , query = { } , data = None ) :
response = self . _download_json (
f ' { self . _API_BASE } onboarding/task.json ' , None , note ,
headers = headers , query = query , data = data , expected_status = 400 )
error = traverse_obj ( response , ( ' errors ' , 0 , ' message ' , { str } ) )
if error :
raise ExtractorError ( f ' Login failed, Twitter API says: { error } ' , expected = True )
elif traverse_obj ( response , ' status ' ) != ' success ' :
raise ExtractorError ( ' Login was unsuccessful ' )
subtask = traverse_obj (
response , ( ' subtasks ' , . . . , ' subtask_id ' , { str } ) , get_all = False )
if not subtask :
raise ExtractorError ( ' Twitter API did not return next login subtask ' )
self . _flow_token = response [ ' flow_token ' ]
2022-10-19 12:01:21 -04:00
2023-06-08 14:47:13 -04:00
return subtask
def _perform_login ( self , username , password ) :
if self . is_logged_in :
return
2023-07-06 15:39:51 -04:00
webpage = self . _download_webpage ( ' https://twitter.com/ ' , None , ' Downloading login page ' )
guest_token = self . _search_regex (
r ' \ .cookie \ s*= \ s*[ " \' ]gt=( \ d+); ' , webpage , ' gt ' , default = None ) or self . _fetch_guest_token ( None )
headers = {
* * self . _set_base_headers ( ) ,
2023-06-08 14:47:13 -04:00
' content-type ' : ' application/json ' ,
2023-07-06 15:39:51 -04:00
' x-guest-token ' : guest_token ,
2023-06-08 14:47:13 -04:00
' x-twitter-client-language ' : ' en ' ,
' x-twitter-active-user ' : ' yes ' ,
' Referer ' : ' https://twitter.com/ ' ,
' Origin ' : ' https://twitter.com ' ,
2023-07-06 15:39:51 -04:00
}
2023-06-08 14:47:13 -04:00
def build_login_json ( * subtask_inputs ) :
return json . dumps ( {
' flow_token ' : self . _flow_token ,
' subtask_inputs ' : subtask_inputs
} , separators = ( ' , ' , ' : ' ) ) . encode ( )
def input_dict ( subtask_id , text ) :
return {
' subtask_id ' : subtask_id ,
' enter_text ' : {
' text ' : text ,
' link ' : ' next_link '
}
}
2022-10-19 12:01:21 -04:00
2023-06-08 14:47:13 -04:00
next_subtask = self . _call_login_api (
' Downloading flow token ' , headers , query = { ' flow_name ' : ' login ' } , data = self . _LOGIN_INIT_DATA )
while not self . is_logged_in :
if next_subtask == ' LoginJsInstrumentationSubtask ' :
next_subtask = self . _call_login_api (
' Submitting JS instrumentation response ' , headers , data = build_login_json ( {
' subtask_id ' : next_subtask ,
' js_instrumentation ' : {
' response ' : ' {} ' ,
' link ' : ' next_link '
}
} ) )
elif next_subtask == ' LoginEnterUserIdentifierSSO ' :
next_subtask = self . _call_login_api (
' Submitting username ' , headers , data = build_login_json ( {
' subtask_id ' : next_subtask ,
' settings_list ' : {
' setting_responses ' : [ {
' key ' : ' user_identifier ' ,
' response_data ' : {
' text_data ' : {
' result ' : username
}
}
} ] ,
' link ' : ' next_link '
}
} ) )
elif next_subtask == ' LoginEnterAlternateIdentifierSubtask ' :
next_subtask = self . _call_login_api (
' Submitting alternate identifier ' , headers ,
data = build_login_json ( input_dict ( next_subtask , self . _get_tfa_info (
' one of username, phone number or email that was not used as --username ' ) ) ) )
elif next_subtask == ' LoginEnterPassword ' :
next_subtask = self . _call_login_api (
' Submitting password ' , headers , data = build_login_json ( {
' subtask_id ' : next_subtask ,
' enter_password ' : {
' password ' : password ,
' link ' : ' next_link '
}
} ) )
elif next_subtask == ' AccountDuplicationCheck ' :
next_subtask = self . _call_login_api (
' Submitting account duplication check ' , headers , data = build_login_json ( {
' subtask_id ' : next_subtask ,
' check_logged_in_account ' : {
' link ' : ' AccountDuplicationCheck_false '
}
} ) )
elif next_subtask == ' LoginTwoFactorAuthChallenge ' :
next_subtask = self . _call_login_api (
' Submitting 2FA token ' , headers , data = build_login_json ( input_dict (
next_subtask , self . _get_tfa_info ( ' two-factor authentication token ' ) ) ) )
elif next_subtask == ' LoginAcid ' :
next_subtask = self . _call_login_api (
' Submitting confirmation code ' , headers , data = build_login_json ( input_dict (
next_subtask , self . _get_tfa_info ( ' confirmation code sent to your email or phone ' ) ) ) )
elif next_subtask == ' LoginSuccessSubtask ' :
raise ExtractorError ( ' Twitter API did not grant auth token cookie ' )
else :
raise ExtractorError ( f ' Unrecognized subtask ID " { next_subtask } " ' )
self . report_login ( )
def _call_api ( self , path , video_id , query = { } , graphql = False ) :
2023-07-06 15:39:51 -04:00
headers = self . _set_base_headers ( legacy = not graphql and self . _configuration_arg ( ' legacy_api ' ) )
headers . update ( {
' x-twitter-auth-type ' : ' OAuth2Session ' ,
' x-twitter-client-language ' : ' en ' ,
' x-twitter-active-user ' : ' yes ' ,
} if self . is_logged_in else {
' x-guest-token ' : self . _fetch_guest_token ( video_id )
} )
allowed_status = { 400 , 401 , 403 , 404 } if graphql else { 403 }
result = self . _download_json (
( self . _GRAPHQL_API_BASE if graphql else self . _API_BASE ) + path ,
video_id , headers = headers , query = query , expected_status = allowed_status ,
note = f ' Downloading { " GraphQL " if graphql else " legacy API " } JSON ' )
2023-07-05 16:30:23 -04:00
2023-07-06 15:39:51 -04:00
if result . get ( ' errors ' ) :
errors = ' , ' . join ( set ( traverse_obj ( result , ( ' errors ' , . . . , ' message ' , { str } ) ) ) )
raise ExtractorError (
f ' Error(s) while querying API: { errors or " Unknown error " } ' , expected = True )
2023-07-05 16:30:23 -04:00
2023-07-06 15:39:51 -04:00
return result
2022-10-19 12:01:21 -04:00
def _build_graphql_query ( self , media_id ) :
raise NotImplementedError ( ' Method must be implemented to support GraphQL ' )
def _call_graphql_api ( self , endpoint , media_id ) :
data = self . _build_graphql_query ( media_id )
query = { key : json . dumps ( value , separators = ( ' , ' , ' : ' ) ) for key , value in data . items ( ) }
return traverse_obj ( self . _call_api ( endpoint , media_id , query = query , graphql = True ) , ' data ' )
2019-11-09 03:23:20 -05:00
class TwitterCardIE ( InfoExtractor ) :
2015-10-18 05:13:58 -04:00
IE_NAME = ' twitter:card '
2019-11-09 03:23:20 -05:00
_VALID_URL = TwitterBaseIE . _BASE_REGEX + r ' i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id> \ d+) '
2015-07-21 17:45:36 -04:00
_TESTS = [
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/560070183650213889 ' ,
2016-02-21 03:57:56 -05:00
# MD5 checksums are different in different places
2015-07-21 17:45:36 -04:00
' info_dict ' : {
2022-10-19 12:01:21 -04:00
' id ' : ' 560070131976392705 ' ,
2015-07-21 17:45:36 -04:00
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : " Twitter - You can now shoot, edit and share video on Twitter. Capture life ' s most moving moments from your perspective. " ,
' description ' : ' md5:18d3e24bb4f6e5007487dd546e53bd96 ' ,
' uploader ' : ' Twitter ' ,
' uploader_id ' : ' Twitter ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2015-07-21 17:45:36 -04:00
' duration ' : 30.033 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1422366112 ,
' upload_date ' : ' 20150127 ' ,
2022-10-19 12:01:21 -04:00
' age_limit ' : 0 ,
' comment_count ' : int ,
' tags ' : [ ] ,
' repost_count ' : int ,
' like_count ' : int ,
' display_id ' : ' 560070183650213889 ' ,
' uploader_url ' : ' https://twitter.com/Twitter ' ,
2017-07-11 04:46:37 -04:00
} ,
2015-06-27 15:22:25 -04:00
} ,
2015-07-21 17:45:36 -04:00
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/623160978427936768 ' ,
2019-11-09 03:23:20 -05:00
' md5 ' : ' 7137eca597f72b9abbe61e5ae0161399 ' ,
2015-07-21 17:45:36 -04:00
' info_dict ' : {
' id ' : ' 623160978427936768 ' ,
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : " NASA - Fly over Pluto ' s icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. " ,
' description ' : " Fly over Pluto ' s icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA " ,
' uploader ' : ' NASA ' ,
' uploader_id ' : ' NASA ' ,
' timestamp ' : 1437408129 ,
' upload_date ' : ' 20150720 ' ,
2022-10-19 12:01:21 -04:00
' uploader_url ' : ' https://twitter.com/NASA ' ,
' age_limit ' : 0 ,
' comment_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' tags ' : [ ' PlutoFlyby ' ] ,
2015-07-21 17:45:36 -04:00
} ,
2022-10-19 12:01:21 -04:00
' params ' : { ' format ' : ' [protocol=https] ' }
2015-10-18 07:07:37 -04:00
} ,
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/654001591733886977 ' ,
2016-09-24 07:57:55 -04:00
' md5 ' : ' b6d9683dd3f48e340ded81c0e917ad46 ' ,
2015-10-18 07:07:37 -04:00
' info_dict ' : {
' id ' : ' dq4Oj5quskI ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Ubuntu 11.10 Overview ' ,
2016-09-24 07:57:55 -04:00
' description ' : ' md5:a831e97fa384863d6e26ce48d1c43376 ' ,
2015-10-18 07:07:37 -04:00
' upload_date ' : ' 20111013 ' ,
2019-11-09 03:23:20 -05:00
' uploader ' : ' OMG! UBUNTU! ' ,
2015-10-18 07:07:37 -04:00
' uploader_id ' : ' omgubuntu ' ,
2022-10-19 12:01:21 -04:00
' channel_url ' : ' https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ ' ,
' channel_id ' : ' UCIiSwcm9xiFb3Y4wjzR41eQ ' ,
' channel_follower_count ' : int ,
' chapters ' : ' count:8 ' ,
' uploader_url ' : ' http://www.youtube.com/user/omgubuntu ' ,
' duration ' : 138 ,
' categories ' : [ ' Film & Animation ' ] ,
' age_limit ' : 0 ,
' comment_count ' : int ,
' availability ' : ' public ' ,
' like_count ' : int ,
' thumbnail ' : ' https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg ' ,
' view_count ' : int ,
' tags ' : ' count:12 ' ,
' channel ' : ' OMG! UBUNTU! ' ,
' playable_in_embed ' : True ,
2015-10-18 07:07:37 -04:00
} ,
2015-11-14 04:03:26 -05:00
' add_ie ' : [ ' Youtube ' ] ,
2015-11-14 04:02:07 -05:00
} ,
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/665289828897005568 ' ,
' info_dict ' : {
' id ' : ' iBb2x00UVlv ' ,
' ext ' : ' mp4 ' ,
' upload_date ' : ' 20151113 ' ,
' uploader_id ' : ' 1189339351084113920 ' ,
2016-02-21 03:57:56 -05:00
' uploader ' : ' ArsenalTerje ' ,
' title ' : ' Vine by ArsenalTerje ' ,
2017-07-11 04:05:15 -04:00
' timestamp ' : 1447451307 ,
2022-10-19 12:01:21 -04:00
' alt_title ' : ' Vine by ArsenalTerje ' ,
' comment_count ' : int ,
' like_count ' : int ,
' thumbnail ' : r ' re:^https?://[^?#]+ \ .jpg ' ,
' view_count ' : int ,
' repost_count ' : int ,
2015-11-14 04:02:07 -05:00
} ,
' add_ie ' : [ ' Vine ' ] ,
2022-10-19 12:01:21 -04:00
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ,
{
2016-03-03 00:39:04 -05:00
' url ' : ' https://twitter.com/i/videos/tweet/705235433198714880 ' ,
2017-07-11 04:46:37 -04:00
' md5 ' : ' 884812a2adc8aaf6fe52b15ccbfa3b88 ' ,
2016-03-03 00:39:04 -05:00
' info_dict ' : {
' id ' : ' 705235433198714880 ' ,
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : " Brent Yarina - Khalil Iverson ' s missed highlight dunk. And made highlight dunk. In one highlight. " ,
' description ' : " Khalil Iverson ' s missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns " ,
' uploader ' : ' Brent Yarina ' ,
' uploader_id ' : ' BTNBrentYarina ' ,
' timestamp ' : 1456976204 ,
' upload_date ' : ' 20160303 ' ,
2016-03-03 00:39:04 -05:00
} ,
2019-11-09 03:23:20 -05:00
' skip ' : ' This content is no longer available. ' ,
2022-10-19 12:01:21 -04:00
} ,
{
2016-11-18 13:49:13 -05:00
' url ' : ' https://twitter.com/i/videos/752274308186120192 ' ,
' only_matching ' : True ,
2016-03-03 00:39:04 -05:00
} ,
2015-07-21 17:45:36 -04:00
]
2015-06-27 15:22:25 -04:00
def _real_extract ( self , url ) :
2019-11-09 03:23:20 -05:00
status_id = self . _match_id ( url )
return self . url_result (
' https://twitter.com/statuses/ ' + status_id ,
TwitterIE . ie_key ( ) , status_id )
2016-03-03 01:27:27 -05:00
2016-03-03 01:42:49 -05:00
2019-11-09 03:23:20 -05:00
class TwitterIE ( TwitterBaseIE ) :
2015-10-18 05:13:58 -04:00
IE_NAME = ' twitter '
2023-02-12 08:43:26 -05:00
_VALID_URL = TwitterBaseIE . _BASE_REGEX + r ' (?:(?:i/web|[^/]+)/status|statuses)/(?P<id> \ d+)(?:/(?:video|photo)/(?P<index> \ d+))? '
2015-07-21 17:38:40 -04:00
2015-11-11 13:13:42 -05:00
_TESTS = [ {
2015-10-18 05:07:48 -04:00
' url ' : ' https://twitter.com/freethenipple/status/643211948184596480 ' ,
2015-07-21 17:38:40 -04:00
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 643211870443208704 ' ,
' display_id ' : ' 643211948184596480 ' ,
2015-07-21 17:38:40 -04:00
' ext ' : ' mp4 ' ,
2015-10-18 06:04:13 -04:00
' title ' : ' FREE THE NIPPLE - FTN supporters on Hollywood Blvd today! ' ,
2017-01-02 07:08:07 -05:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ ' ,
2015-10-18 05:07:48 -04:00
' uploader ' : ' FREE THE NIPPLE ' ,
' uploader_id ' : ' freethenipple ' ,
2017-09-19 16:58:06 -04:00
' duration ' : 12.922 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1442188653 ,
' upload_date ' : ' 20150913 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/freethenipple ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ] ,
' age_limit ' : 18 ,
2015-07-21 17:38:40 -04:00
} ,
2015-11-11 13:13:42 -05:00
} , {
' url ' : ' https://twitter.com/giphz/status/657991469417025536/photo/1 ' ,
' md5 ' : ' f36dcd5fb92bf7057f155e7d927eeb42 ' ,
' info_dict ' : {
' id ' : ' 657991469417025536 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai ' ,
' description ' : ' Gifs on Twitter: " tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5 " ' ,
2017-01-02 07:08:07 -05:00
' thumbnail ' : r ' re:^https?://.* \ .png ' ,
2015-11-11 13:13:42 -05:00
' uploader ' : ' Gifs ' ,
' uploader_id ' : ' giphz ' ,
} ,
2016-02-21 04:29:28 -05:00
' expected_warnings ' : [ ' height ' , ' width ' ] ,
2016-06-08 09:12:14 -04:00
' skip ' : ' Account suspended ' ,
2015-11-13 13:09:42 -05:00
} , {
' url ' : ' https://twitter.com/starwars/status/665052190608723968 ' ,
' info_dict ' : {
' id ' : ' 665052190608723968 ' ,
2022-10-11 01:54:38 -04:00
' display_id ' : ' 665052190608723968 ' ,
2015-11-13 13:09:42 -05:00
' ext ' : ' mp4 ' ,
2023-02-12 08:43:26 -05:00
' title ' : r ' re:Star Wars.*A new beginning is coming December 18.* ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ ' ,
2015-11-13 13:09:42 -05:00
' uploader_id ' : ' starwars ' ,
2022-10-19 12:01:21 -04:00
' uploader ' : r ' re:Star Wars.* ' ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1447395772 ,
' upload_date ' : ' 20151113 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/starwars ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
' tags ' : [ ' TV ' , ' StarWars ' , ' TheForceAwakens ' ] ,
' age_limit ' : 0 ,
2015-11-13 13:09:42 -05:00
} ,
2016-03-03 00:39:04 -05:00
} , {
' url ' : ' https://twitter.com/BTNBrentYarina/status/705235433198714880 ' ,
' info_dict ' : {
' id ' : ' 705235433198714880 ' ,
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : " Brent Yarina - Khalil Iverson ' s missed highlight dunk. And made highlight dunk. In one highlight. " ,
' description ' : " Khalil Iverson ' s missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns " ,
2016-03-03 00:39:04 -05:00
' uploader_id ' : ' BTNBrentYarina ' ,
' uploader ' : ' Brent Yarina ' ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1456976204 ,
' upload_date ' : ' 20160303 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/BTNBrentYarina ' ,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
' tags ' : [ ] ,
' age_limit ' : 0 ,
2016-03-03 00:39:04 -05:00
} ,
' params ' : {
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
# Test case of TwitterCardIE
' skip_download ' : True ,
} ,
2022-11-17 21:00:11 -05:00
' skip ' : ' Dead external link ' ,
2016-03-03 01:42:49 -05:00
} , {
' url ' : ' https://twitter.com/jaydingeer/status/700207533655363584 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 700207414000242688 ' ,
' display_id ' : ' 700207533655363584 ' ,
2016-03-03 01:42:49 -05:00
' ext ' : ' mp4 ' ,
2022-10-11 01:54:38 -04:00
' title ' : ' jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ ' ,
2017-01-02 07:08:07 -05:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2022-10-11 01:54:38 -04:00
' uploader ' : ' jaydin donte geer ' ,
' uploader_id ' : ' jaydingeer ' ,
2017-09-19 16:58:06 -04:00
' duration ' : 30.0 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1455777459 ,
' upload_date ' : ' 20160218 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/jaydingeer ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ' Damndaniel ' ] ,
' age_limit ' : 0 ,
2016-03-03 01:42:49 -05:00
} ,
2016-03-26 16:36:02 -04:00
} , {
' url ' : ' https://twitter.com/Filmdrunk/status/713801302971588609 ' ,
' md5 ' : ' 89a15ed345d13b86e9a5a5e051fa308a ' ,
' info_dict ' : {
' id ' : ' MIOxnrUteUd ' ,
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : ' Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン ' ,
' uploader ' : ' TAKUMA ' ,
' uploader_id ' : ' 1004126642786242560 ' ,
2017-07-11 04:46:37 -04:00
' timestamp ' : 1402826626 ,
2016-03-26 16:36:02 -04:00
' upload_date ' : ' 20140615 ' ,
2022-10-11 01:54:38 -04:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' alt_title ' : ' Vine by TAKUMA ' ,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
' view_count ' : int ,
2016-03-26 16:36:02 -04:00
} ,
' add_ie ' : [ ' Vine ' ] ,
2016-04-16 06:57:50 -04:00
} , {
' url ' : ' https://twitter.com/captainamerica/status/719944021058060289 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 717462543795523584 ' ,
' display_id ' : ' 719944021058060289 ' ,
2016-04-16 06:57:50 -04:00
' ext ' : ' mp4 ' ,
' title ' : ' Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters. ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' @King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI ' ,
' uploader_id ' : ' CaptainAmerica ' ,
2016-04-16 06:57:50 -04:00
' uploader ' : ' Captain America ' ,
2017-09-19 16:58:06 -04:00
' duration ' : 3.17 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1460483005 ,
' upload_date ' : ' 20160412 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/CaptainAmerica ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ] ,
' age_limit ' : 0 ,
2016-04-16 06:57:50 -04:00
} ,
2016-09-24 07:57:55 -04:00
} , {
' url ' : ' https://twitter.com/OPP_HSD/status/779210622571536384 ' ,
' info_dict ' : {
' id ' : ' 1zqKVVlkqLaKB ' ,
' ext ' : ' mp4 ' ,
2019-11-09 03:23:20 -05:00
' title ' : ' Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence ' ,
2016-09-24 07:57:55 -04:00
' upload_date ' : ' 20160923 ' ,
2019-11-09 03:23:20 -05:00
' uploader_id ' : ' 1PmKqpJdOJQoY ' ,
' uploader ' : ' Sgt Kerry Schmidt - Ontario Provincial Police ' ,
2016-09-24 07:57:55 -04:00
' timestamp ' : 1474613214 ,
2022-10-11 01:54:38 -04:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2016-09-24 07:57:55 -04:00
} ,
' add_ie ' : [ ' Periscope ' ] ,
2017-06-03 12:41:55 -04:00
} , {
# has mp4 formats via mobile API
' url ' : ' https://twitter.com/news_al3alm/status/852138619213144067 ' ,
' info_dict ' : {
' id ' : ' 852138619213144067 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN ' ,
2017-06-03 12:41:55 -04:00
' uploader ' : ' عالم الأخبار ' ,
' uploader_id ' : ' news_al3alm ' ,
2017-09-19 16:58:06 -04:00
' duration ' : 277.4 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1492000653 ,
' upload_date ' : ' 20170412 ' ,
2017-06-03 12:41:55 -04:00
} ,
2021-01-08 11:14:50 -05:00
' skip ' : ' Account suspended ' ,
2017-09-22 19:38:09 -04:00
} , {
' url ' : ' https://twitter.com/i/web/status/910031516746514432 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 910030238373089285 ' ,
' display_id ' : ' 910031516746514432 ' ,
2017-09-22 19:38:09 -04:00
' ext ' : ' mp4 ' ,
' title ' : ' Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo ' ,
2017-09-22 19:38:09 -04:00
' uploader ' : ' Préfet de Guadeloupe ' ,
' uploader_id ' : ' Prefet971 ' ,
' duration ' : 47.48 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1505803395 ,
' upload_date ' : ' 20170919 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/Prefet971 ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ' Maria ' ] ,
' age_limit ' : 0 ,
2017-09-22 19:38:09 -04:00
} ,
' params ' : {
' skip_download ' : True , # requires ffmpeg
} ,
2018-05-31 18:16:00 -04:00
} , {
# card via api.twitter.com/1.1/videos/tweet/config
' url ' : ' https://twitter.com/LisPower1/status/1001551623938805763 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 1001551417340022785 ' ,
' display_id ' : ' 1001551623938805763 ' ,
2018-05-31 18:16:00 -04:00
' ext ' : ' mp4 ' ,
' title ' : ' re:.*?Shep is on a roll today.*? ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' md5:37b9f2ff31720cef23b2bd42ee8a0f09 ' ,
2018-05-31 18:16:00 -04:00
' uploader ' : ' Lis Power ' ,
' uploader_id ' : ' LisPower1 ' ,
' duration ' : 111.278 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1527623489 ,
' upload_date ' : ' 20180529 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/LisPower1 ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ] ,
' age_limit ' : 0 ,
2018-05-31 18:16:00 -04:00
} ,
' params ' : {
' skip_download ' : True , # requires ffmpeg
} ,
2019-07-13 14:19:17 -04:00
} , {
' url ' : ' https://twitter.com/foobar/status/1087791357756956680 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 1087791272830607360 ' ,
' display_id ' : ' 1087791357756956680 ' ,
2019-07-13 14:19:17 -04:00
' ext ' : ' mp4 ' ,
' title ' : ' Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts! ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
2019-11-09 03:23:20 -05:00
' description ' : ' md5:6dfd341a3310fb97d80d2bf7145df976 ' ,
2019-07-13 14:19:17 -04:00
' uploader ' : ' Twitter ' ,
' uploader_id ' : ' Twitter ' ,
' duration ' : 61.567 ,
2019-11-09 03:23:20 -05:00
' timestamp ' : 1548184644 ,
' upload_date ' : ' 20190122 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/Twitter ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ] ,
' age_limit ' : 0 ,
2019-11-09 03:23:20 -05:00
} ,
} , {
# not available in Periscope
' url ' : ' https://twitter.com/ViviEducation/status/1136534865145286656 ' ,
' info_dict ' : {
' id ' : ' 1vOGwqejwoWxB ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019 ' ,
' uploader ' : ' Vivi ' ,
' uploader_id ' : ' 1eVjYOLGkGrQL ' ,
2022-10-11 01:54:38 -04:00
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' tags ' : [ ' EduTECH2019 ' ] ,
' view_count ' : int ,
2019-07-13 14:19:17 -04:00
} ,
2019-11-09 03:23:20 -05:00
' add_ie ' : [ ' TwitterBroadcast ' ] ,
2021-01-16 07:40:15 -05:00
} , {
# unified card
' url ' : ' https://twitter.com/BrooklynNets/status/1349794411333394432?s=20 ' ,
' info_dict ' : {
2022-10-11 01:54:38 -04:00
' id ' : ' 1349774757969989634 ' ,
' display_id ' : ' 1349794411333394432 ' ,
2021-01-16 07:40:15 -05:00
' ext ' : ' mp4 ' ,
' title ' : ' md5:d1c4941658e4caaa6cb579260d85dcba ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' description ' : ' md5:71ead15ec44cee55071547d6447c6a3e ' ,
' uploader ' : ' Brooklyn Nets ' ,
' uploader_id ' : ' BrooklynNets ' ,
' duration ' : 324.484 ,
' timestamp ' : 1610651040 ,
' upload_date ' : ' 20210114 ' ,
2022-10-11 01:54:38 -04:00
' uploader_url ' : ' https://twitter.com/BrooklynNets ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
' tags ' : [ ] ,
' age_limit ' : 0 ,
2021-01-16 07:40:15 -05:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
2022-10-11 01:54:38 -04:00
} , {
' url ' : ' https://twitter.com/oshtru/status/1577855540407197696 ' ,
' info_dict ' : {
' id ' : ' 1577855447914409984 ' ,
' display_id ' : ' 1577855540407197696 ' ,
' ext ' : ' mp4 ' ,
2022-11-17 21:00:11 -05:00
' title ' : ' md5:9d198efb93557b8f8d5b78c480407214 ' ,
' description ' : ' md5:b9c3699335447391d11753ab21c70a74 ' ,
2022-10-11 01:54:38 -04:00
' upload_date ' : ' 20221006 ' ,
2022-11-17 21:00:11 -05:00
' uploader ' : ' oshtru ' ,
2022-10-11 01:54:38 -04:00
' uploader_id ' : ' oshtru ' ,
' uploader_url ' : ' https://twitter.com/oshtru ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' duration ' : 30.03 ,
2022-10-19 12:01:21 -04:00
' timestamp ' : 1665025050 ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-11 01:54:38 -04:00
' tags ' : [ ] ,
' age_limit ' : 0 ,
} ,
' params ' : { ' skip_download ' : True } ,
} , {
' url ' : ' https://twitter.com/UltimaShadowX/status/1577719286659006464 ' ,
' info_dict ' : {
' id ' : ' 1577719286659006464 ' ,
2023-07-06 15:39:51 -04:00
' title ' : ' Ultima📛 | #вʟм - Test ' ,
2022-10-11 01:54:38 -04:00
' description ' : ' Test https://t.co/Y3KEZD7Dad ' ,
2023-07-06 15:39:51 -04:00
' uploader ' : ' Ultima📛 | #вʟм ' ,
2022-10-11 01:54:38 -04:00
' uploader_id ' : ' UltimaShadowX ' ,
' uploader_url ' : ' https://twitter.com/UltimaShadowX ' ,
' upload_date ' : ' 20221005 ' ,
2022-10-19 12:01:21 -04:00
' timestamp ' : 1664992565 ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-11 01:54:38 -04:00
' like_count ' : int ,
' tags ' : [ ] ,
' age_limit ' : 0 ,
} ,
' playlist_count ' : 4 ,
' params ' : { ' skip_download ' : True } ,
2022-10-19 12:01:21 -04:00
} , {
' url ' : ' https://twitter.com/MesoMax919/status/1575560063510810624 ' ,
' info_dict ' : {
' id ' : ' 1575559336759263233 ' ,
' display_id ' : ' 1575560063510810624 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' md5:eec26382babd0f7c18f041db8ae1c9c9 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' description ' : ' md5:95aea692fda36a12081b9629b02daa92 ' ,
' uploader ' : ' Max Olson ' ,
' uploader_id ' : ' MesoMax919 ' ,
' uploader_url ' : ' https://twitter.com/MesoMax919 ' ,
' duration ' : 21.321 ,
' timestamp ' : 1664477766 ,
' upload_date ' : ' 20220929 ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-19 12:01:21 -04:00
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-10-19 12:01:21 -04:00
' tags ' : [ ' HurricaneIan ' ] ,
' age_limit ' : 0 ,
} ,
} , {
2023-05-01 19:55:28 -04:00
# Adult content, fails if not logged in (GraphQL)
2022-10-19 12:01:21 -04:00
' url ' : ' https://twitter.com/Rizdraws/status/1575199173472927762 ' ,
' info_dict ' : {
' id ' : ' 1575199163847000068 ' ,
' display_id ' : ' 1575199173472927762 ' ,
' ext ' : ' mp4 ' ,
' title ' : str ,
' description ' : str ,
' uploader ' : str ,
' uploader_id ' : ' Rizdraws ' ,
' uploader_url ' : ' https://twitter.com/Rizdraws ' ,
' upload_date ' : ' 20220928 ' ,
' timestamp ' : 1664391723 ,
2022-12-09 18:41:45 -05:00
' thumbnail ' : r ' re:^https?://.+ \ .jpg ' ,
2022-10-19 12:01:21 -04:00
' like_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
' age_limit ' : 18 ,
' tags ' : [ ]
} ,
2023-05-01 19:55:28 -04:00
' skip ' : ' Requires authentication ' ,
2022-10-19 12:01:21 -04:00
} , {
2023-07-06 15:39:51 -04:00
# Playlist result only with auth
2022-10-19 12:01:21 -04:00
' url ' : ' https://twitter.com/Srirachachau/status/1395079556562706435 ' ,
' playlist_mincount ' : 2 ,
' info_dict ' : {
' id ' : ' 1395079556562706435 ' ,
' title ' : str ,
' tags ' : [ ] ,
' uploader ' : str ,
' like_count ' : int ,
' upload_date ' : ' 20210519 ' ,
' age_limit ' : 0 ,
' repost_count ' : int ,
2023-05-01 19:55:28 -04:00
' description ' : ' Here it is! Finished my gothic western cartoon. Pretty proud of it. It \' s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw ' ,
2022-10-19 12:01:21 -04:00
' uploader_id ' : ' Srirachachau ' ,
' comment_count ' : int ,
' uploader_url ' : ' https://twitter.com/Srirachachau ' ,
' timestamp ' : 1621447860 ,
} ,
} , {
' url ' : ' https://twitter.com/DavidToons_/status/1578353380363501568 ' ,
' playlist_mincount ' : 2 ,
' info_dict ' : {
' id ' : ' 1578353380363501568 ' ,
' title ' : str ,
' uploader_id ' : ' DavidToons_ ' ,
' repost_count ' : int ,
' like_count ' : int ,
' uploader ' : str ,
' timestamp ' : 1665143744 ,
' uploader_url ' : ' https://twitter.com/DavidToons_ ' ,
2023-05-01 19:55:28 -04:00
' description ' : ' Chris sounds like Linda from Bob \' s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w ' ,
2022-10-19 12:01:21 -04:00
' tags ' : [ ] ,
' comment_count ' : int ,
' upload_date ' : ' 20221007 ' ,
' age_limit ' : 0 ,
} ,
} , {
' url ' : ' https://twitter.com/primevideouk/status/1578401165338976258 ' ,
' playlist_count ' : 2 ,
' info_dict ' : {
' id ' : ' 1578401165338976258 ' ,
' title ' : str ,
' description ' : ' md5:659a6b517a034b4cee5d795381a2dc41 ' ,
' uploader ' : str ,
' uploader_id ' : ' primevideouk ' ,
' timestamp ' : 1665155137 ,
' upload_date ' : ' 20221007 ' ,
' age_limit ' : 0 ,
' uploader_url ' : ' https://twitter.com/primevideouk ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' repost_count ' : int ,
2022-10-19 12:01:21 -04:00
' like_count ' : int ,
' tags ' : [ ' TheRingsOfPower ' ] ,
} ,
} , {
# Twitter Spaces
' url ' : ' https://twitter.com/MoniqueCamarra/status/1550101959377551360 ' ,
' info_dict ' : {
' id ' : ' 1lPJqmBeeNAJb ' ,
' ext ' : ' m4a ' ,
' title ' : ' EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West ' ,
' uploader ' : r ' re:Monique Camarra.+? ' ,
' uploader_id ' : ' MoniqueCamarra ' ,
' live_status ' : ' was_live ' ,
2023-06-01 02:35:41 -04:00
' release_timestamp ' : 1658417414 ,
2023-07-04 23:05:52 -04:00
' description ' : ' md5:4dc8e972f1d8b3c6580376fabb02a3ad ' ,
' timestamp ' : 1658407771 ,
' release_date ' : ' 20220721 ' ,
' upload_date ' : ' 20220721 ' ,
2022-10-19 12:01:21 -04:00
} ,
' add_ie ' : [ ' TwitterSpaces ' ] ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
2023-07-06 15:39:51 -04:00
' skip ' : ' Requires authentication ' ,
2022-12-09 18:41:45 -05:00
} , {
# URL specifies video number but --yes-playlist
' url ' : ' https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1 ' ,
' playlist_mincount ' : 2 ,
' info_dict ' : {
' id ' : ' 1600649710662213632 ' ,
' title ' : ' md5:be05989b0722e114103ed3851a0ffae2 ' ,
' timestamp ' : 1670459604.0 ,
' description ' : ' md5:591c19ce66fadc2359725d5cd0d1052c ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
2022-12-09 18:41:45 -05:00
' uploader_id ' : ' CTVJLaidlaw ' ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
2022-12-09 18:41:45 -05:00
' tags ' : [ ' colorectalcancer ' , ' cancerjourney ' , ' imnotaquitter ' ] ,
' upload_date ' : ' 20221208 ' ,
' age_limit ' : 0 ,
' uploader ' : ' Jocelyn Laidlaw ' ,
' uploader_url ' : ' https://twitter.com/CTVJLaidlaw ' ,
' like_count ' : int ,
} ,
} , {
# URL specifies video number and --no-playlist
' url ' : ' https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2 ' ,
' info_dict ' : {
' id ' : ' 1600649511827013632 ' ,
' ext ' : ' mp4 ' ,
2023-05-01 19:55:28 -04:00
' title ' : ' md5:7662a0a27ce6faa3e5b160340f3cfab1 ' ,
2022-12-09 18:41:45 -05:00
' thumbnail ' : r ' re:^https?://.+ \ .jpg ' ,
' timestamp ' : 1670459604.0 ,
' uploader_id ' : ' CTVJLaidlaw ' ,
' uploader ' : ' Jocelyn Laidlaw ' ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
' comment_count ' : int ,
2022-12-09 18:41:45 -05:00
' tags ' : [ ' colorectalcancer ' , ' cancerjourney ' , ' imnotaquitter ' ] ,
' duration ' : 102.226 ,
' uploader_url ' : ' https://twitter.com/CTVJLaidlaw ' ,
' display_id ' : ' 1600649710662213632 ' ,
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2022-12-09 18:41:45 -05:00
' description ' : ' md5:591c19ce66fadc2359725d5cd0d1052c ' ,
' upload_date ' : ' 20221208 ' ,
' age_limit ' : 0 ,
} ,
' params ' : { ' noplaylist ' : True } ,
2023-02-02 08:32:14 -05:00
} , {
# id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
# note the id different between extraction and url
' url ' : ' https://twitter.com/s2FAKER/status/1621117700482416640 ' ,
' info_dict ' : {
' id ' : ' 1621117577354424321 ' ,
' display_id ' : ' 1621117700482416640 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 뽀 - 아 최우제 이동속도 봐 ' ,
' description ' : ' 아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB ' ,
' duration ' : 24.598 ,
' uploader ' : ' 뽀 ' ,
' uploader_id ' : ' s2FAKER ' ,
' uploader_url ' : ' https://twitter.com/s2FAKER ' ,
' upload_date ' : ' 20230202 ' ,
' timestamp ' : 1675339553.0 ,
' thumbnail ' : r ' re:https?://pbs \ .twimg \ .com/.+ ' ,
' age_limit ' : 18 ,
' tags ' : [ ] ,
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
' comment_count ' : int ,
' view_count ' : int ,
2023-02-02 08:32:14 -05:00
} ,
2023-02-12 08:43:26 -05:00
} , {
' url ' : ' https://twitter.com/hlo_again/status/1599108751385972737/video/2 ' ,
' info_dict ' : {
' id ' : ' 1599108643743473680 ' ,
' display_id ' : ' 1599108751385972737 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' \u06ea - \U0001F48B ' ,
' uploader_url ' : ' https://twitter.com/hlo_again ' ,
' like_count ' : int ,
' uploader_id ' : ' hlo_again ' ,
' thumbnail ' : ' https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig ' ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
2023-02-12 08:43:26 -05:00
' duration ' : 9.531 ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
' view_count ' : int ,
2023-02-12 08:43:26 -05:00
' upload_date ' : ' 20221203 ' ,
' age_limit ' : 0 ,
' timestamp ' : 1670092210.0 ,
' tags ' : [ ] ,
' uploader ' : ' \u06ea ' ,
' description ' : ' \U0001F48B https://t.co/bTj9Qz7vQP ' ,
} ,
' params ' : { ' noplaylist ' : True } ,
} , {
' url ' : ' https://twitter.com/MunTheShinobi/status/1600009574919962625 ' ,
' info_dict ' : {
' id ' : ' 1600009362759733248 ' ,
' display_id ' : ' 1600009574919962625 ' ,
' ext ' : ' mp4 ' ,
' uploader_url ' : ' https://twitter.com/MunTheShinobi ' ,
' description ' : ' This is a genius ad by Apple. \U0001f525 \U0001f525 \U0001f525 \U0001f525 \U0001f525 https://t.co/cNsA0MoOml ' ,
2023-07-05 16:30:23 -04:00
' view_count ' : int ,
2023-02-12 08:43:26 -05:00
' thumbnail ' : ' https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig ' ,
' age_limit ' : 0 ,
2023-05-01 19:55:28 -04:00
' uploader ' : ' Mün The Shinobi ' ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
2023-02-12 08:43:26 -05:00
' upload_date ' : ' 20221206 ' ,
2023-05-01 19:55:28 -04:00
' title ' : ' Mün The Shinobi - This is a genius ad by Apple. \U0001f525 \U0001f525 \U0001f525 \U0001f525 \U0001f525 ' ,
2023-07-05 16:30:23 -04:00
' comment_count ' : int ,
2023-02-12 08:43:26 -05:00
' like_count ' : int ,
' tags ' : [ ] ,
' uploader_id ' : ' MunTheShinobi ' ,
' duration ' : 139.987 ,
' timestamp ' : 1670306984.0 ,
} ,
2023-03-04 12:51:33 -05:00
} , {
2023-07-06 15:39:51 -04:00
# url to retweet id w/ legacy api
2023-03-04 12:51:33 -05:00
' url ' : ' https://twitter.com/liberdalau/status/1623739803874349067 ' ,
' info_dict ' : {
' id ' : ' 1623274794488659969 ' ,
' display_id ' : ' 1623739803874349067 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy ' ,
2023-07-06 15:39:51 -04:00
' description ' : ' md5:b06864cd3dc2554821cc327f5348485a ' ,
2023-03-04 12:51:33 -05:00
' uploader ' : ' Johnny Bullets ' ,
' uploader_id ' : ' Johnnybull3ts ' ,
' uploader_url ' : ' https://twitter.com/Johnnybull3ts ' ,
' age_limit ' : 0 ,
' tags ' : [ ] ,
' duration ' : 8.033 ,
' timestamp ' : 1675853859.0 ,
' upload_date ' : ' 20230208 ' ,
' thumbnail ' : r ' re:https://pbs \ .twimg \ .com/ext_tw_video_thumb/.+ ' ,
' like_count ' : int ,
2023-07-05 16:30:23 -04:00
' repost_count ' : int ,
2023-03-04 12:51:33 -05:00
} ,
2023-07-05 16:30:23 -04:00
' params ' : { ' extractor_args ' : { ' twitter ' : { ' legacy_api ' : [ ' ' ] } } } ,
2023-07-06 15:39:51 -04:00
} , {
# orig tweet w/ graphql
' url ' : ' https://twitter.com/liberdalau/status/1623739803874349067 ' ,
' info_dict ' : {
' id ' : ' 1623274794488659969 ' ,
' display_id ' : ' 1623739803874349067 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' @selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy ' ,
' description ' : ' md5:9258bdbb54793bdc124fe1cd47e96c6a ' ,
' uploader ' : ' @selfisekai@hackerspace.pl 🐀 ' ,
' uploader_id ' : ' liberdalau ' ,
' uploader_url ' : ' https://twitter.com/liberdalau ' ,
' age_limit ' : 0 ,
' tags ' : [ ] ,
' duration ' : 8.033 ,
' timestamp ' : 1675964711.0 ,
' upload_date ' : ' 20230209 ' ,
' thumbnail ' : r ' re:https://pbs \ .twimg \ .com/ext_tw_video_thumb/.+ ' ,
' like_count ' : int ,
' view_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
} ,
2022-10-12 00:12:31 -04:00
} , {
# onion route
' url ' : ' https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273 ' ,
' only_matching ' : True ,
2019-11-09 03:23:20 -05:00
} , {
# Twitch Clip Embed
' url ' : ' https://twitter.com/GunB1g/status/1163218564784017422 ' ,
' only_matching ' : True ,
2020-01-12 05:55:11 -05:00
} , {
# promo_video_website card
' url ' : ' https://twitter.com/GunB1g/status/1163218564784017422 ' ,
' only_matching ' : True ,
2021-01-08 11:14:50 -05:00
} , {
# promo_video_convo card
' url ' : ' https://twitter.com/poco_dandy/status/1047395834013384704 ' ,
' only_matching ' : True ,
} , {
# appplayer card
' url ' : ' https://twitter.com/poco_dandy/status/1150646424461176832 ' ,
' only_matching ' : True ,
2021-01-16 07:40:15 -05:00
} , {
# video_direct_message card
' url ' : ' https://twitter.com/qarev001/status/1348948114569269251 ' ,
' only_matching ' : True ,
} , {
# poll2choice_video card
' url ' : ' https://twitter.com/CAF_Online/status/1349365911120195585 ' ,
' only_matching ' : True ,
} , {
# poll3choice_video card
' url ' : ' https://twitter.com/SamsungMobileSA/status/1348609186725289984 ' ,
' only_matching ' : True ,
} , {
# poll4choice_video card
' url ' : ' https://twitter.com/SouthamptonFC/status/1347577658079641604 ' ,
' only_matching ' : True ,
2015-11-11 13:13:42 -05:00
} ]
2015-07-21 17:38:40 -04:00
2022-10-19 12:01:21 -04:00
def _graphql_to_legacy ( self , data , twid ) :
result = traverse_obj ( data , (
' threaded_conversation_with_injections_v2 ' , ' instructions ' , 0 , ' entries ' ,
lambda _ , v : v [ ' entryId ' ] == f ' tweet- { twid } ' , ' content ' , ' itemContent ' ,
2023-07-06 15:39:51 -04:00
' tweet_results ' , ' result ' , ( ' tweet ' , None ) , { dict } ,
) , default = { } , get_all = False ) if self . is_logged_in else traverse_obj (
data , ( ' tweetResult ' , ' result ' , { dict } ) , default = { } )
2022-10-19 12:01:21 -04:00
2023-07-06 15:39:51 -04:00
if result . get ( ' __typename ' ) not in ( ' Tweet ' , ' TweetTombstone ' , ' TweetUnavailable ' , None ) :
2023-02-02 08:32:14 -05:00
self . report_warning ( f ' Unknown typename: { result . get ( " __typename " ) } ' , twid , only_once = True )
2022-10-19 12:01:21 -04:00
if ' tombstone ' in result :
2023-05-01 19:55:28 -04:00
cause = remove_end ( traverse_obj ( result , ( ' tombstone ' , ' text ' , ' text ' , { str } ) ) , ' . Learn more ' )
2022-10-19 12:01:21 -04:00
raise ExtractorError ( f ' Twitter API says: { cause or " Unknown error " } ' , expected = True )
2023-07-06 15:39:51 -04:00
elif result . get ( ' __typename ' ) == ' TweetUnavailable ' :
reason = result . get ( ' reason ' )
if reason == ' NsfwLoggedOut ' :
self . raise_login_required ( ' NSFW tweet requires authentication ' )
raise ExtractorError ( reason or ' Requested tweet is unavailable ' , expected = True )
2022-10-19 12:01:21 -04:00
status = result . get ( ' legacy ' , { } )
status . update ( traverse_obj ( result , {
' user ' : ( ' core ' , ' user_results ' , ' result ' , ' legacy ' ) ,
' card ' : ( ' card ' , ' legacy ' ) ,
' quoted_status ' : ( ' quoted_status_result ' , ' result ' , ' legacy ' ) ,
} , expected_type = dict , default = { } ) )
# extra transformation is needed since result does not match legacy format
binding_values = {
binding_value . get ( ' key ' ) : binding_value . get ( ' value ' )
2023-05-01 19:55:28 -04:00
for binding_value in traverse_obj ( status , ( ' card ' , ' binding_values ' , . . . , { dict } ) )
2022-10-19 12:01:21 -04:00
}
if binding_values :
status [ ' card ' ] [ ' binding_values ' ] = binding_values
return status
def _build_graphql_query ( self , media_id ) :
return {
' variables ' : {
' focalTweetId ' : media_id ,
' includePromotedContent ' : True ,
' with_rux_injections ' : False ,
' withBirdwatchNotes ' : True ,
' withCommunity ' : True ,
' withDownvotePerspective ' : False ,
' withQuickPromoteEligibilityTweetFields ' : True ,
' withReactionsMetadata ' : False ,
' withReactionsPerspective ' : False ,
' withSuperFollowsTweetFields ' : True ,
' withSuperFollowsUserFields ' : True ,
' withV2Timeline ' : True ,
' withVoice ' : True ,
} ,
' features ' : {
' graphql_is_translatable_rweb_tweet_is_translatable_enabled ' : False ,
' interactive_text_enabled ' : True ,
' responsive_web_edit_tweet_api_enabled ' : True ,
' responsive_web_enhance_cards_enabled ' : True ,
' responsive_web_graphql_timeline_navigation_enabled ' : False ,
' responsive_web_text_conversations_enabled ' : False ,
' responsive_web_uc_gql_enabled ' : True ,
' standardized_nudges_misinfo ' : True ,
' tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled ' : False ,
' tweetypie_unmention_optimization_enabled ' : True ,
' unified_cards_ad_metadata_container_dynamic_card_content_query_enabled ' : True ,
' verified_phone_label_enabled ' : False ,
' vibe_api_enabled ' : True ,
} ,
2023-07-06 15:39:51 -04:00
} if self . is_logged_in else {
' variables ' : {
' tweetId ' : media_id ,
' withCommunity ' : False ,
' includePromotedContent ' : False ,
' withVoice ' : False ,
} ,
' features ' : {
' creator_subscriptions_tweet_preview_api_enabled ' : True ,
' tweetypie_unmention_optimization_enabled ' : True ,
' responsive_web_edit_tweet_api_enabled ' : True ,
' graphql_is_translatable_rweb_tweet_is_translatable_enabled ' : True ,
' view_counts_everywhere_api_enabled ' : True ,
' longform_notetweets_consumption_enabled ' : True ,
' responsive_web_twitter_article_tweet_consumption_enabled ' : False ,
' tweet_awards_web_tipping_enabled ' : False ,
' freedom_of_speech_not_reach_fetch_enabled ' : True ,
' standardized_nudges_misinfo ' : True ,
' tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled ' : True ,
' longform_notetweets_rich_text_read_enabled ' : True ,
' longform_notetweets_inline_media_enabled ' : True ,
' responsive_web_graphql_exclude_directive_enabled ' : True ,
' verified_phone_label_enabled ' : False ,
' responsive_web_media_download_video_enabled ' : False ,
' responsive_web_graphql_skip_user_profile_image_extensions_enabled ' : False ,
' responsive_web_graphql_timeline_navigation_enabled ' : True ,
' responsive_web_enhance_cards_enabled ' : False
} ,
' fieldToggles ' : {
' withArticleRichContentState ' : False
}
2022-10-19 12:01:21 -04:00
}
2015-07-21 17:38:40 -04:00
def _real_extract ( self , url ) :
2022-12-09 18:41:45 -05:00
twid , selected_index = self . _match_valid_url ( url ) . group ( ' id ' , ' index ' )
2023-07-06 15:39:51 -04:00
if not self . is_logged_in and self . _configuration_arg ( ' legacy_api ' ) :
2023-07-05 16:30:23 -04:00
status = traverse_obj ( self . _call_api ( f ' statuses/show/ { twid } .json ' , twid , {
' cards_platform ' : ' Web-12 ' ,
' include_cards ' : 1 ,
' include_reply_count ' : 1 ,
' include_user_entities ' : 0 ,
' tweet_mode ' : ' extended ' ,
} ) , ' retweeted_status ' , None )
2023-07-06 15:39:51 -04:00
elif not self . is_logged_in :
status = self . _graphql_to_legacy (
self . _call_graphql_api ( ' 2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId ' , twid ) , twid )
2023-05-01 19:55:28 -04:00
else :
2023-07-06 15:39:51 -04:00
status = self . _graphql_to_legacy (
self . _call_graphql_api ( ' zZXycP0V6H7m-2r0mOnFcA/TweetDetail ' , twid ) , twid )
2015-10-18 06:04:13 -04:00
2023-07-06 15:39:51 -04:00
title = description = traverse_obj (
status , ( ( ' full_text ' , ' text ' ) , { lambda x : x . replace ( ' \n ' , ' ' ) } ) , get_all = False ) or ' '
2015-10-18 06:04:13 -04:00
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
2015-11-13 13:09:42 -05:00
title = re . sub ( r ' \ s+(https?://[^ ]+) ' , ' ' , title )
2019-11-09 03:23:20 -05:00
user = status . get ( ' user ' ) or { }
uploader = user . get ( ' name ' )
if uploader :
2022-10-19 12:01:21 -04:00
title = f ' { uploader } - { title } '
2019-11-09 03:23:20 -05:00
uploader_id = user . get ( ' screen_name ' )
2015-11-11 13:13:42 -05:00
info = {
2019-11-09 03:23:20 -05:00
' id ' : twid ,
' title ' : title ,
' description ' : description ,
' uploader ' : uploader ,
' timestamp ' : unified_timestamp ( status . get ( ' created_at ' ) ) ,
' uploader_id ' : uploader_id ,
2022-06-17 22:00:12 -04:00
' uploader_url ' : format_field ( uploader_id , None , ' https://twitter.com/ %s ' ) ,
2019-11-09 03:23:20 -05:00
' like_count ' : int_or_none ( status . get ( ' favorite_count ' ) ) ,
' repost_count ' : int_or_none ( status . get ( ' retweet_count ' ) ) ,
' comment_count ' : int_or_none ( status . get ( ' reply_count ' ) ) ,
' age_limit ' : 18 if status . get ( ' possibly_sensitive ' ) else 0 ,
2023-02-12 08:43:26 -05:00
' tags ' : traverse_obj ( status , ( ' entities ' , ' hashtags ' , . . . , ' text ' ) ) ,
2015-07-21 17:38:40 -04:00
}
2015-11-11 13:13:42 -05:00
2021-01-16 07:40:15 -05:00
def extract_from_video_info ( media ) :
2022-10-11 01:54:38 -04:00
media_id = traverse_obj ( media , ' id_str ' , ' id ' , expected_type = str_or_none )
self . write_debug ( f ' Extracting from video info: { media_id } ' )
2019-11-09 03:23:20 -05:00
formats = [ ]
2021-04-15 08:12:59 -04:00
subtitles = { }
2023-07-06 15:39:51 -04:00
for variant in traverse_obj ( media , ( ' video_info ' , ' variants ' , . . . ) ) :
2021-04-15 08:12:59 -04:00
fmts , subs = self . _extract_variant_formats ( variant , twid )
subtitles = self . _merge_subtitles ( subtitles , subs )
formats . extend ( fmts )
2019-11-09 03:23:20 -05:00
thumbnails = [ ]
media_url = media . get ( ' media_url_https ' ) or media . get ( ' media_url ' )
if media_url :
def add_thumbnail ( name , size ) :
thumbnails . append ( {
' id ' : name ,
' url ' : update_url_query ( media_url , { ' name ' : name } ) ,
' width ' : int_or_none ( size . get ( ' w ' ) or size . get ( ' width ' ) ) ,
' height ' : int_or_none ( size . get ( ' h ' ) or size . get ( ' height ' ) ) ,
} )
for name , size in media . get ( ' sizes ' , { } ) . items ( ) :
add_thumbnail ( name , size )
add_thumbnail ( ' orig ' , media . get ( ' original_info ' ) or { } )
2015-11-11 13:13:42 -05:00
2022-10-11 01:54:38 -04:00
return {
2023-07-05 16:30:23 -04:00
' id ' : media_id ,
2019-11-09 03:23:20 -05:00
' formats ' : formats ,
2021-04-15 08:12:59 -04:00
' subtitles ' : subtitles ,
2019-11-09 03:23:20 -05:00
' thumbnails ' : thumbnails ,
2023-02-12 08:43:26 -05:00
' view_count ' : traverse_obj ( media , ( ' mediaStats ' , ' viewCount ' , { int_or_none } ) ) ,
2023-07-06 15:39:51 -04:00
' duration ' : float_or_none ( traverse_obj ( media , ( ' video_info ' , ' duration_millis ' ) ) , 1000 ) ,
2022-11-17 00:10:03 -05:00
# The codec of http formats are unknown
' _format_sort_fields ' : ( ' res ' , ' br ' , ' size ' , ' proto ' ) ,
2022-10-11 01:54:38 -04:00
}
2021-01-16 07:40:15 -05:00
2022-10-11 01:54:38 -04:00
def extract_from_card_info ( card ) :
if not card :
return
self . write_debug ( f ' Extracting from card info: { card . get ( " url " ) } ' )
binding_values = card [ ' binding_values ' ]
def get_binding_value ( k ) :
o = binding_values . get ( k ) or { }
return try_get ( o , lambda x : x [ x [ ' type ' ] . lower ( ) + ' _value ' ] )
card_name = card [ ' name ' ] . split ( ' : ' ) [ - 1 ]
if card_name == ' player ' :
2022-10-19 12:01:21 -04:00
yield {
2022-10-11 01:54:38 -04:00
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' player_url ' ) ,
}
elif card_name == ' periscope_broadcast ' :
2022-10-19 12:01:21 -04:00
yield {
2022-10-11 01:54:38 -04:00
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' url ' ) or get_binding_value ( ' player_url ' ) ,
' ie_key ' : PeriscopeIE . ie_key ( ) ,
}
elif card_name == ' broadcast ' :
2022-10-19 12:01:21 -04:00
yield {
2022-10-11 01:54:38 -04:00
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' broadcast_url ' ) ,
' ie_key ' : TwitterBroadcastIE . ie_key ( ) ,
}
2022-10-19 12:01:21 -04:00
elif card_name == ' audiospace ' :
yield {
' _type ' : ' url ' ,
' url ' : f ' https://twitter.com/i/spaces/ { get_binding_value ( " id " ) } ' ,
' ie_key ' : TwitterSpacesIE . ie_key ( ) ,
}
2022-10-11 01:54:38 -04:00
elif card_name == ' summary ' :
2022-10-19 12:01:21 -04:00
yield {
2019-11-09 03:23:20 -05:00
' _type ' : ' url ' ,
2022-10-11 01:54:38 -04:00
' url ' : get_binding_value ( ' card_url ' ) ,
}
elif card_name == ' unified_card ' :
2022-10-19 12:01:21 -04:00
unified_card = self . _parse_json ( get_binding_value ( ' unified_card ' ) , twid )
yield from map ( extract_from_video_info , traverse_obj (
unified_card , ( ' media_entities ' , . . . ) , expected_type = dict ) )
2022-10-11 01:54:38 -04:00
# amplify, promo_video_website, promo_video_convo, appplayer,
# video_direct_message, poll2choice_video, poll3choice_video,
# poll4choice_video, ...
else :
is_amplify = card_name == ' amplify '
vmap_url = get_binding_value ( ' amplify_url_vmap ' ) if is_amplify else get_binding_value ( ' player_stream_url ' )
content_id = get_binding_value ( ' %s _content_id ' % ( card_name if is_amplify else ' player ' ) )
formats , subtitles = self . _extract_formats_from_vmap_url ( vmap_url , content_id or twid )
thumbnails = [ ]
for suffix in ( ' _small ' , ' ' , ' _large ' , ' _x_large ' , ' _original ' ) :
image = get_binding_value ( ' player_image ' + suffix ) or { }
image_url = image . get ( ' url ' )
if not image_url or ' /player-placeholder ' in image_url :
continue
thumbnails . append ( {
' id ' : suffix [ 1 : ] if suffix else ' medium ' ,
' url ' : image_url ,
' width ' : int_or_none ( image . get ( ' width ' ) ) ,
' height ' : int_or_none ( image . get ( ' height ' ) ) ,
} )
2022-10-19 12:01:21 -04:00
yield {
2022-10-11 01:54:38 -04:00
' formats ' : formats ,
' subtitles ' : subtitles ,
' thumbnails ' : thumbnails ,
' duration ' : int_or_none ( get_binding_value (
' content_duration_seconds ' ) ) ,
}
2023-02-12 08:43:26 -05:00
videos = traverse_obj ( status , (
2023-07-05 16:30:23 -04:00
( None , ' quoted_status ' ) , ' extended_entities ' , ' media ' , lambda _ , m : m [ ' type ' ] != ' photo ' , { dict } ) )
2022-10-11 01:54:38 -04:00
2023-02-12 08:43:26 -05:00
if self . _yes_playlist ( twid , selected_index , video_label = ' URL-specified video number ' ) :
selected_entries = ( * map ( extract_from_video_info , videos ) , * extract_from_card_info ( status . get ( ' card ' ) ) )
else :
2023-07-06 15:39:51 -04:00
desired_obj = traverse_obj ( status , (
( None , ' quoted_status ' ) , ' extended_entities ' , ' media ' , int ( selected_index ) - 1 , { dict } ) , get_all = False )
2023-02-12 08:43:26 -05:00
if not desired_obj :
raise ExtractorError ( f ' Video # { selected_index } is unavailable ' , expected = True )
elif desired_obj . get ( ' type ' ) != ' video ' :
raise ExtractorError ( f ' Media # { selected_index } is not a video ' , expected = True )
# Restore original archive id and video index in title
for index , entry in enumerate ( videos , 1 ) :
if entry . get ( ' id ' ) != desired_obj . get ( ' id ' ) :
continue
if index == 1 :
info [ ' _old_archive_ids ' ] = [ make_archive_id ( self , twid ) ]
if len ( videos ) != 1 :
info [ ' title ' ] + = f ' # { index } '
break
return { * * info , * * extract_from_video_info ( desired_obj ) , ' display_id ' : twid }
entries = [ { * * info , * * data , ' display_id ' : twid } for data in selected_entries ]
2022-10-11 01:54:38 -04:00
if not entries :
expanded_url = traverse_obj ( status , ( ' entities ' , ' urls ' , 0 , ' expanded_url ' ) , expected_type = url_or_none )
if not expanded_url or expanded_url == url :
2023-05-01 19:55:28 -04:00
self . raise_no_formats ( ' No video could be found in this tweet ' , expected = True )
return info
2022-10-11 01:54:38 -04:00
return self . url_result ( expanded_url , display_id = twid , * * info )
entries [ 0 ] [ ' _old_archive_ids ' ] = [ make_archive_id ( self , twid ) ]
if len ( entries ) == 1 :
return entries [ 0 ]
for index , entry in enumerate ( entries , 1 ) :
entry [ ' title ' ] + = f ' # { index } '
return self . playlist_result ( entries , * * info )
2016-02-21 03:41:24 -05:00
class TwitterAmplifyIE ( TwitterBaseIE ) :
IE_NAME = ' twitter:amplify '
2016-09-08 06:04:57 -04:00
_VALID_URL = r ' https?://amp \ .twimg \ .com/v/(?P<id>[0-9a-f \ -] {36} ) '
2016-02-21 03:41:24 -05:00
_TEST = {
' url ' : ' https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951 ' ,
2022-10-19 12:01:21 -04:00
' md5 ' : ' fec25801d18a4557c5c9f33d2c379ffa ' ,
2016-02-21 03:41:24 -05:00
' info_dict ' : {
' id ' : ' 0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Twitter Video ' ,
2016-02-21 04:16:35 -05:00
' thumbnail ' : ' re:^https?://.* ' ,
2016-02-21 03:41:24 -05:00
} ,
2022-10-19 12:01:21 -04:00
' params ' : { ' format ' : ' [protocol=https] ' } ,
2016-02-21 03:41:24 -05:00
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
vmap_url = self . _html_search_meta (
' twitter:amplify:vmap ' , webpage , ' vmap url ' )
2022-10-19 12:01:21 -04:00
formats , _ = self . _extract_formats_from_vmap_url ( vmap_url , video_id )
2016-02-21 03:41:24 -05:00
2016-02-21 04:16:35 -05:00
thumbnails = [ ]
thumbnail = self . _html_search_meta (
' twitter:image:src ' , webpage , ' thumbnail ' , fatal = False )
def _find_dimension ( target ) :
w = int_or_none ( self . _html_search_meta (
' twitter: %s :width ' % target , webpage , fatal = False ) )
h = int_or_none ( self . _html_search_meta (
' twitter: %s :height ' % target , webpage , fatal = False ) )
return w , h
if thumbnail :
thumbnail_w , thumbnail_h = _find_dimension ( ' image ' )
thumbnails . append ( {
' url ' : thumbnail ,
' width ' : thumbnail_w ,
' height ' : thumbnail_h ,
} )
video_w , video_h = _find_dimension ( ' player ' )
2017-07-11 03:48:34 -04:00
formats [ 0 ] . update ( {
2016-02-21 04:16:35 -05:00
' width ' : video_w ,
' height ' : video_h ,
2017-07-11 03:48:34 -04:00
} )
2016-02-21 04:16:35 -05:00
2016-02-21 03:41:24 -05:00
return {
' id ' : video_id ,
' title ' : ' Twitter Video ' ,
2016-02-21 04:16:35 -05:00
' formats ' : formats ,
' thumbnails ' : thumbnails ,
2016-02-21 03:41:24 -05:00
}
2019-11-09 03:23:20 -05:00
class TwitterBroadcastIE ( TwitterBaseIE , PeriscopeBaseIE ) :
IE_NAME = ' twitter:broadcast '
_VALID_URL = TwitterBaseIE . _BASE_REGEX + r ' i/broadcasts/(?P<id>[0-9a-zA-Z] {13} ) '
2020-06-01 09:32:57 -04:00
_TEST = {
# untitled Periscope video
' url ' : ' https://twitter.com/i/broadcasts/1yNGaQLWpejGj ' ,
' info_dict ' : {
' id ' : ' 1yNGaQLWpejGj ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Andrea May Sahouri - Periscope Broadcast ' ,
' uploader ' : ' Andrea May Sahouri ' ,
' uploader_id ' : ' 1PXEdBZWpGwKe ' ,
2022-10-19 12:01:21 -04:00
' thumbnail ' : r ' re:^https?://[^?#]+ \ .jpg \ ?token= ' ,
' view_count ' : int ,
2020-06-01 09:32:57 -04:00
} ,
}
2019-11-09 03:23:20 -05:00
def _real_extract ( self , url ) :
broadcast_id = self . _match_id ( url )
broadcast = self . _call_api (
' broadcasts/show.json ' , broadcast_id ,
{ ' ids ' : broadcast_id } ) [ ' broadcasts ' ] [ broadcast_id ]
info = self . _parse_broadcast_data ( broadcast , broadcast_id )
media_key = broadcast [ ' media_key ' ]
source = self . _call_api (
2022-10-19 12:01:21 -04:00
f ' live_video_stream/status/ { media_key } ' , media_key ) [ ' source ' ]
2019-11-09 03:23:20 -05:00
m3u8_url = source . get ( ' noRedirectPlaybackUrl ' ) or source [ ' location ' ]
if ' /live_video_stream/geoblocked/ ' in m3u8_url :
self . raise_geo_restricted ( )
m3u8_id = compat_parse_qs ( compat_urllib_parse_urlparse (
m3u8_url ) . query ) . get ( ' type ' , [ None ] ) [ 0 ]
state , width , height = self . _extract_common_format_info ( broadcast )
info [ ' formats ' ] = self . _extract_pscp_m3u8_formats (
m3u8_url , broadcast_id , m3u8_id , state , width , height )
return info
2020-10-12 19:58:59 -04:00
2022-10-19 12:01:21 -04:00
class TwitterSpacesIE ( TwitterBaseIE ) :
IE_NAME = ' twitter:spaces '
_VALID_URL = TwitterBaseIE . _BASE_REGEX + r ' i/spaces/(?P<id>[0-9a-zA-Z] {13} ) '
_TESTS = [ {
' url ' : ' https://twitter.com/i/spaces/1RDxlgyvNXzJL ' ,
' info_dict ' : {
' id ' : ' 1RDxlgyvNXzJL ' ,
' ext ' : ' m4a ' ,
' title ' : ' King Carlo e la mossa Kansas City per fare il Grande Centro ' ,
' description ' : ' Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe ' ,
' uploader ' : r ' re:Lucio Di Gaetano.*? ' ,
' uploader_id ' : ' luciodigaetano ' ,
' live_status ' : ' was_live ' ,
2023-07-04 23:05:52 -04:00
' timestamp ' : 1659877956 ,
' upload_date ' : ' 20220807 ' ,
' release_timestamp ' : 1659904215 ,
' release_date ' : ' 20220807 ' ,
2022-10-19 12:01:21 -04:00
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
SPACE_STATUS = {
' notstarted ' : ' is_upcoming ' ,
' ended ' : ' was_live ' ,
' running ' : ' is_live ' ,
' timedout ' : ' post_live ' ,
}
def _build_graphql_query ( self , space_id ) :
return {
' variables ' : {
' id ' : space_id ,
' isMetatagsQuery ' : True ,
' withDownvotePerspective ' : False ,
' withReactionsMetadata ' : False ,
' withReactionsPerspective ' : False ,
' withReplays ' : True ,
' withSuperFollowsUserFields ' : True ,
' withSuperFollowsTweetFields ' : True ,
} ,
' features ' : {
' dont_mention_me_view_api_enabled ' : True ,
' interactive_text_enabled ' : True ,
' responsive_web_edit_tweet_api_enabled ' : True ,
' responsive_web_enhance_cards_enabled ' : True ,
' responsive_web_uc_gql_enabled ' : True ,
' spaces_2022_h2_clipping ' : True ,
' spaces_2022_h2_spaces_communities ' : False ,
' standardized_nudges_misinfo ' : True ,
' tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled ' : False ,
' vibe_api_enabled ' : True ,
} ,
}
def _real_extract ( self , url ) :
space_id = self . _match_id ( url )
2023-07-06 15:39:51 -04:00
if not self . is_logged_in :
self . raise_login_required ( ' Twitter Spaces require authentication ' )
2022-10-19 12:01:21 -04:00
space_data = self . _call_graphql_api ( ' HPEisOmj1epUNLCWTYhUWw/AudioSpaceById ' , space_id ) [ ' audioSpace ' ]
if not space_data :
raise ExtractorError ( ' Twitter Space not found ' , expected = True )
metadata = space_data [ ' metadata ' ]
live_status = try_call ( lambda : self . SPACE_STATUS [ metadata [ ' state ' ] . lower ( ) ] )
2023-07-04 23:05:52 -04:00
is_live = live_status == ' is_live '
2022-10-19 12:01:21 -04:00
formats = [ ]
if live_status == ' is_upcoming ' :
self . raise_no_formats ( ' Twitter Space not started yet ' , expected = True )
2023-07-04 23:05:52 -04:00
elif not is_live and not metadata . get ( ' is_space_available_for_replay ' ) :
self . raise_no_formats ( ' Twitter Space ended and replay is disabled ' , expected = True )
elif metadata . get ( ' media_key ' ) :
source = traverse_obj (
self . _call_api ( f ' live_video_stream/status/ { metadata [ " media_key " ] } ' , metadata [ ' media_key ' ] ) ,
( ' source ' , ( ' noRedirectPlaybackUrl ' , ' location ' ) , { url_or_none } ) , get_all = False )
2022-10-19 12:01:21 -04:00
formats = self . _extract_m3u8_formats (
2023-07-04 23:05:52 -04:00
source , metadata [ ' media_key ' ] , ' m4a ' , live = is_live , fatal = False ,
headers = { ' Referer ' : ' https://twitter.com/ ' } ) if source else [ ]
2022-10-19 12:01:21 -04:00
for fmt in formats :
fmt . update ( { ' vcodec ' : ' none ' , ' acodec ' : ' aac ' } )
2023-07-04 23:05:52 -04:00
if not is_live :
fmt [ ' container ' ] = ' m4a_dash '
2022-10-19 12:01:21 -04:00
participants = ' , ' . join ( traverse_obj (
space_data , ( ' participants ' , ' speakers ' , . . . , ' display_name ' ) ) ) or ' nobody yet '
2023-07-04 23:05:52 -04:00
if not formats and live_status == ' post_live ' :
self . raise_no_formats ( ' Twitter Space ended but not downloadable yet ' , expected = True )
2022-10-19 12:01:21 -04:00
return {
' id ' : space_id ,
' title ' : metadata . get ( ' title ' ) ,
' description ' : f ' Twitter Space participated by { participants } ' ,
' uploader ' : traverse_obj (
metadata , ( ' creator_results ' , ' result ' , ' legacy ' , ' name ' ) ) ,
' uploader_id ' : traverse_obj (
metadata , ( ' creator_results ' , ' result ' , ' legacy ' , ' screen_name ' ) ) ,
' live_status ' : live_status ,
2023-06-01 02:35:41 -04:00
' release_timestamp ' : try_call (
lambda : int_or_none ( metadata [ ' scheduled_start ' ] , scale = 1000 ) ) ,
2023-07-04 23:05:52 -04:00
' timestamp ' : int_or_none ( metadata . get ( ' created_at ' ) , scale = 1000 ) ,
2022-10-19 12:01:21 -04:00
' formats ' : formats ,
}
2020-10-12 19:58:59 -04:00
class TwitterShortenerIE ( TwitterBaseIE ) :
IE_NAME = ' twitter:shortener '
2020-10-17 20:14:13 -04:00
_VALID_URL = r ' https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+) '
_BASE_URL = ' https://t.co/ '
2020-10-12 19:58:59 -04:00
def _real_extract ( self , url ) :
2021-08-18 21:41:24 -04:00
mobj = self . _match_valid_url ( url )
2020-10-17 20:14:13 -04:00
eid , id = mobj . group ( ' eid ' , ' id ' )
if eid :
id = eid
url = self . _BASE_URL + id
2023-07-09 03:53:02 -04:00
new_url = self . _request_webpage ( url , id , headers = { ' User-Agent ' : ' curl ' } ) . url
2020-10-17 20:14:13 -04:00
__UNSAFE_LINK = " https://twitter.com/safety/unsafe_link_warning?unsafe_link= "
if new_url . startswith ( __UNSAFE_LINK ) :
new_url = new_url . replace ( __UNSAFE_LINK , " " )
2020-10-17 04:24:57 -04:00
return self . url_result ( new_url )