[extractor/tiktok] Add TikTokLive extractor (#5637)

Closes #3698
Authored by: JC-Chung
This commit is contained in:
JC-Chung 2023-01-05 19:23:34 +08:00 committed by GitHub
parent a1d9aca338
commit 933ed882e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 0 deletions

View file

@ -1890,6 +1890,7 @@
TikTokEffectIE, TikTokEffectIE,
TikTokTagIE, TikTokTagIE,
TikTokVMIE, TikTokVMIE,
TikTokLiveIE,
DouyinIE, DouyinIE,
) )
from .tinypic import TinyPicIE from .tinypic import TinyPicIE

View file

@ -11,6 +11,7 @@
HEADRequest, HEADRequest,
LazyList, LazyList,
UnsupportedError, UnsupportedError,
UserNotLive,
get_element_by_id, get_element_by_id,
get_first, get_first,
int_or_none, int_or_none,
@ -980,3 +981,42 @@ def _real_extract(self, url):
if self.suitable(new_url): # Prevent infinite loop in case redirect fails if self.suitable(new_url): # Prevent infinite loop in case redirect fails
raise UnsupportedError(new_url) raise UnsupportedError(new_url)
return self.url_result(new_url) return self.url_result(new_url)
class TikTokLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/live'
IE_NAME = 'tiktok:live'
_TESTS = [{
'url': 'https://www.tiktok.com/@iris04201/live',
'only_matching': True,
}]
def _real_extract(self, url):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
room_id = self._html_search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
if not room_id:
raise UserNotLive(video_id=uploader)
live_info = traverse_obj(self._download_json(
'https://www.tiktok.com/api/live/detail/', room_id, query={
'aid': '1988',
'roomID': room_id,
}), 'LiveRoomInfo', expected_type=dict, default={})
if 'status' not in live_info:
raise ExtractorError('Unexpected response from TikTok API')
# status = 2 if live else 4
if not int_or_none(live_info['status']) == 2:
raise UserNotLive(video_id=uploader)
return {
'id': room_id,
'title': live_info.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage, default=''),
'uploader': uploader,
'uploader_id': traverse_obj(live_info, ('ownerInfo', 'id')),
'creator': traverse_obj(live_info, ('ownerInfo', 'nickname')),
'concurrent_view_count': traverse_obj(live_info, ('liveRoomStats', 'userCount'), expected_type=int),
'formats': self._extract_m3u8_formats(live_info['liveUrl'], room_id, 'mp4', live=True),
'is_live': True,
}