From 6ef5ad9e29ab3648e87af32a2a1abc6063237c3f Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Mon, 2 May 2022 17:13:18 +0200 Subject: [PATCH] [trovo] Update to new API (#3509) Closes #3457 Authored by: nyuszika7h --- yt_dlp/extractor/trovo.py | 234 +++++++++++++++++++++++--------------- 1 file changed, 145 insertions(+), 89 deletions(-) diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 3487f3acc..c049025a3 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,5 +1,7 @@ import itertools import json +import random +import string from .common import InfoExtractor from ..utils import ( @@ -15,10 +17,20 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} - def _call_api(self, video_id, query=None, data=None): - return self._download_json( - 'https://gql.trovo.live/', video_id, query=query, data=data, - headers={'Accept': 'application/json'}) + def _call_api(self, video_id, data): + if 'persistedQuery' in data.get('extensions', {}): + url = 'https://gql.trovo.live' + else: + url = 'https://api-web.trovo.live/graphql' + + resp = self._download_json( + url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'}, + query={ + 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)), + })[0] + if 'errors' in resp: + raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}') + return resp['data'][data['operationName']] def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} @@ -35,27 +47,14 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._call_api(username, query={ - 'query': '''{ - getLiveInfo(params: {userName: "%s"}) { - isLive - programInfo { - coverUrl - id - streamInfo { - desc - playUrl - } - title - } - streamerInfo { - nickName - uid - userName - } - } -}''' % username, - })['data']['getLiveInfo'] + live_info = self._call_api(username, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': username, + }, + }, + }) if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -90,56 +89,61 @@ def _real_extract(self, url): class TrovoVodIE(TrovoBaseIE): _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P[^/?&#]+)' _TESTS = [{ - 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', + 'params': {'getcomments': True}, 'info_dict': { - 'id': 'ltv-100095501_100095501_1609596043', + 'id': 'lc-5285890818705062210', 'ext': 'mp4', - 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!', - 'uploader': 'Exsl', - 'timestamp': 1609640305, - 'upload_date': '20210103', - 'uploader_id': '100095501', - 'duration': 43977, + 'title': 'fatal moaning for a super good🤣🤣', + 'uploader': 'OneTappedYou', + 'timestamp': 1621628019, + 'upload_date': '20210521', + 'uploader_id': '100719456', + 'duration': 31, 'view_count': int, 'like_count': int, 'comment_count': int, - 'comments': 'mincount:8', - 'categories': ['Grand Theft Auto V'], + 'comments': 'mincount:1', + 'categories': ['Call of Duty: Mobile'], + 'uploader_url': 'https://trovo.live/OneTappedYou', + 'thumbnail': r're:^https?://.*\.jpg', }, - 'skip': '404' }, { - 'url': 'https://trovo.live/clip/lc-5285890810184026005', + 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', 'only_matching': True, }] def _real_extract(self, url): vid = self._match_id(url) - resp = self._call_api(vid, data=json.dumps([{ - 'query': '''{ - batchGetVodDetailInfo(params: {vids: ["%s"]}) { - VodDetailInfos - } -}''' % vid, - }, { - 'query': '''{ - getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { - commentList { - author { - nickName - uid - } - commentID - content - createdAt - parentID - } - } -}''' % vid, - }]).encode()) - vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] + + # NOTE: It is also possible to extract this info from the Nuxt data on the website, + # however that seems unreliable - sometimes it randomly doesn't return the data, + # at least when using a non-residential IP. + resp = self._call_api(vid, data={ + 'operationName': 'batchGetVodDetailInfo', + 'variables': { + 'params': { + 'vids': [vid], + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'ceae0355d66476e21a1dd8e8af9f68de95b4019da2cda8b177c9a2255dad31d0', + }, + }, + }) + vod_detail_info = resp['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] + if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'): + playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting'] + if playback_rights_setting == 'SubscriberOnly': + raise ExtractorError('This video is only available for subscribers', expected=True) + else: + raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True) + language = vod_info.get('languageName') formats = [] for play_info in (vod_info.get('playInfos') or []): @@ -163,23 +167,6 @@ def _real_extract(self, url): category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) - comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or [] - comments = [] - for comment in comment_list: - content = comment.get('content') - if not content: - continue - author = comment.get('author') or {} - parent = comment.get('parentID') - comments.append({ - 'author': author.get('nickName'), - 'author_id': str_or_none(author.get('uid')), - 'id': str_or_none(comment.get('commentID')), - 'text': content, - 'timestamp': int_or_none(comment.get('createdAt')), - 'parent': 'root' if parent == 0 else str_or_none(parent), - }) - info = { 'id': vid, 'title': title, @@ -190,12 +177,51 @@ def _real_extract(self, url): 'view_count': get_count('watch'), 'like_count': get_count('like'), 'comment_count': get_count('comment'), - 'comments': comments, 'categories': [category] if category else None, + '__post_extractor': self.extract_comments(vid), } info.update(self._extract_streamer_info(vod_detail_info)) return info + def _get_comments(self, vid): + for page in itertools.count(1): + comments_json = self._call_api(vid, data={ + 'operationName': 'getCommentList', + 'variables': { + 'params': { + 'appInfo': { + 'postID': vid, + }, + 'preview': {}, + 'pageSize': 99, + 'page': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'be8e5f9522ddac7f7c604c0d284fd22481813263580849926c4c66fb767eed25', + }, + }, + }) + for comment in comments_json['commentList']: + content = comment.get('content') + if not content: + continue + author = comment.get('author') or {} + parent = comment.get('parentID') + yield { + 'author': author.get('nickName'), + 'author_id': str_or_none(author.get('uid')), + 'id': str_or_none(comment.get('commentID')), + 'text': content, + 'timestamp': int_or_none(comment.get('createdAt')), + 'parent': 'root' if parent == 0 else str_or_none(parent), + } + + if comments_json['lastPage']: + break + class TrovoChannelBaseIE(TrovoBaseIE): def _get_vod_json(self, page, uid): @@ -215,9 +241,15 @@ def _entries(self, uid): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._call_api(id, query={ - 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id - })['data']['getLiveInfo']['streamerInfo']['uid']) + live_info = self._call_api(id, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': id, + }, + }, + }) + uid = str(live_info['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -233,13 +265,25 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelLtvVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelLtvVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': '78fe32792005eab7e922cafcdad9c56bed8bbc5f5df3c7cd24fcb84a744f5f78', + }, + }, + }) class TrovoChannelClipIE(TrovoChannelBaseIE): @@ -254,10 +298,22 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelClipVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelClipVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'e7924bfe20059b5c75fc8ff9e7929f43635681a7bdf3befa01072ed22c8eff31', + }, + }, + })