diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index bdc39a7dd..e23fdfd6a 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,68 +1,83 @@ import functools import urllib.parse +import urllib.error import hashlib import json +import time from .common import InfoExtractor from ..utils import ( ExtractorError, OnDemandPagedList, int_or_none, + jwt_decode_hs256, mimetype2ext, qualities, traverse_obj, + try_call, unified_timestamp, ) -# https://github.com/yt-dlp/yt-dlp/issues/6671 class IwaraBaseIE(InfoExtractor): + _NETRC_MACHINE = 'iwara' _USERTOKEN = None _MEDIATOKEN = None - _NETRC_MACHINE = 'iwara' - def _get_user_token(self, invalidate=False): - if not invalidate and self._USERTOKEN: - return self._USERTOKEN + def _is_token_expired(self, token, token_type): + # User token TTL == ~3 weeks, Media token TTL == ~1 hour + if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120): + self.to_screen(f'{token_type} token has expired') + return True + def _get_user_token(self): username, password = self._get_login_info() - IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) - if not IwaraBaseIE._USERTOKEN or invalidate: - IwaraBaseIE._USERTOKEN = self._download_json( + if not username or not password: + return + + user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username) + if not user_token or self._is_token_expired(user_token, 'User'): + response = self._download_json( 'https://api.iwara.tv/user/login', None, note='Logging in', - data=json.dumps({ + headers={'Content-Type': 'application/json'}, data=json.dumps({ 'email': username, 'password': password - }).encode('utf-8'), - headers={ + }).encode(), expected_status=lambda x: True) + user_token = traverse_obj(response, ('token', {str})) + if not user_token: + error = traverse_obj(response, ('message', {str})) + if 'invalidLogin' in error: + raise ExtractorError('Invalid login credentials', expected=True) + else: + raise ExtractorError(f'Iwara API said: {error or "nothing"}') + + self.cache.store(self._NETRC_MACHINE, username, user_token) + + IwaraBaseIE._USERTOKEN = user_token + + def _get_media_token(self): + self._get_user_token() + if not IwaraBaseIE._USERTOKEN: + return # user has not passed credentials + + if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'): + IwaraBaseIE._MEDIATOKEN = self._download_json( + 'https://api.iwara.tv/user/token', None, note='Fetching media token', + data=b'', headers={ + 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}', 'Content-Type': 'application/json' - })['token'] + })['accessToken'] - self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN) + return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'} - return self._USERTOKEN - - def _get_media_token(self, invalidate=False): - if not invalidate and self._MEDIATOKEN: - return self._MEDIATOKEN - - IwaraBaseIE._MEDIATOKEN = self._download_json( - 'https://api.iwara.tv/user/token', None, note='Fetching media token', - data=b'', # Need to have some data here, even if it's empty - headers={ - 'Authorization': f'Bearer {self._get_user_token()}', - 'Content-Type': 'application/json' - })['accessToken'] - - return self._MEDIATOKEN + def _perform_login(self, username, password): + self._get_media_token() class IwaraIE(IwaraBaseIE): IE_NAME = 'iwara' _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P[a-zA-Z0-9]+)' _TESTS = [{ - # this video cannot be played because of migration - 'only_matching': True, 'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq', 'info_dict': { 'id': 'k2ayoueezfkx6gvq', @@ -79,25 +94,29 @@ class IwaraIE(IwaraBaseIE): 'timestamp': 1677843869, 'modified_timestamp': 1679056362, }, + 'skip': 'this video cannot be played because of migration', }, { 'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/', - 'md5': '20691ce1473ec2766c0788e14c60ce66', + 'md5': '7645f966f069b8ec9210efd9130c9aad', 'info_dict': { 'id': '1ywe1sbkqwumpdxz5', 'ext': 'mp4', 'age_limit': 18, - 'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿', - 'description': 'md5:0c4c310f2e0592d68b9f771d348329ca', - 'uploader': '龙也zZZ', + 'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ', + 'description': 'md5:3f60016fff22060eef1ef26d430b1f67', + 'uploader': 'Lyu ya', 'uploader_id': 'user792540', 'tags': [ 'uncategorized' ], - 'like_count': 1809, - 'view_count': 25156, - 'comment_count': 1, + 'like_count': int, + 'view_count': int, + 'comment_count': int, 'timestamp': 1678732213, - 'modified_timestamp': 1679110271, + 'modified_timestamp': int, + 'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg', + 'modified_date': '20230614', + 'upload_date': '20230313', }, }, { 'url': 'https://iwara.tv/video/blggmfno8ghl725bg', @@ -112,12 +131,15 @@ class IwaraIE(IwaraBaseIE): 'tags': [ 'pee' ], - 'like_count': 192, - 'view_count': 12119, - 'comment_count': 0, + 'like_count': int, + 'view_count': int, + 'comment_count': int, 'timestamp': 1598880567, - 'modified_timestamp': 1598908995, - 'availability': 'needs_auth', + 'modified_timestamp': int, + 'upload_date': '20200831', + 'modified_date': '20230605', + 'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg', + # 'availability': 'needs_auth', }, }] @@ -142,17 +164,16 @@ def _extract_formats(self, video_id, fileurl): def _real_extract(self, url): video_id = self._match_id(url) - username, password = self._get_login_info() - headers = { - 'Authorization': f'Bearer {self._get_media_token()}', - } if username and password else None - video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers) + username, _ = self._get_login_info() + video_data = self._download_json( + f'https://api.iwara.tv/video/{video_id}', video_id, + expected_status=lambda x: True, headers=self._get_media_token()) errmsg = video_data.get('message') # at this point we can actually get uploaded user info, but do we need it? if errmsg == 'errors.privateVideo': - self.raise_login_required('Private video. Login if you have permissions to watch') + self.raise_login_required('Private video. Login if you have permissions to watch', method='password') elif errmsg == 'errors.notFound' and not username: - self.raise_login_required('Video may need login to view') + self.raise_login_required('Video may need login to view', method='password') elif errmsg: # None if success raise ExtractorError(f'Iwara says: {errmsg}') @@ -181,15 +202,6 @@ def _real_extract(self, url): 'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))), } - def _perform_login(self, username, password): - if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token(): - self.write_debug('Skipping logging in') - return - - IwaraBaseIE._USERTOKEN = self._get_user_token(True) - self._get_media_token(True) - self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN) - class IwaraUserIE(IwaraBaseIE): _VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P[^/?#&]+)' @@ -200,12 +212,14 @@ class IwaraUserIE(IwaraBaseIE): 'url': 'https://iwara.tv/profile/user792540/videos', 'info_dict': { 'id': 'user792540', + 'title': 'Lyu ya', }, - 'playlist_mincount': 80, + 'playlist_mincount': 70, }, { 'url': 'https://iwara.tv/profile/theblackbirdcalls/videos', 'info_dict': { 'id': 'theblackbirdcalls', + 'title': 'TheBlackbirdCalls', }, 'playlist_mincount': 723, }, { @@ -214,6 +228,13 @@ class IwaraUserIE(IwaraBaseIE): }, { 'url': 'https://iwara.tv/profile/theblackbirdcalls', 'only_matching': True, + }, { + 'url': 'https://www.iwara.tv/profile/lumymmd', + 'info_dict': { + 'id': 'lumymmd', + 'title': 'Lumy MMD', + }, + 'playlist_mincount': 1, }] def _entries(self, playlist_id, user_id, page): @@ -225,7 +246,7 @@ def _entries(self, playlist_id, user_id, page): 'sort': 'date', 'user': user_id, 'limit': self._PER_PAGE, - }) + }, headers=self._get_media_token()) for x in traverse_obj(videos, ('results', ..., 'id')): yield self.url_result(f'https://iwara.tv/video/{x}') @@ -244,7 +265,6 @@ def _real_extract(self, url): class IwaraPlaylistIE(IwaraBaseIE): - # the ID is an UUID but I don't think it's necessary to write concrete regex _VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P[0-9a-f-]+)' IE_NAME = 'iwara:playlist' _PER_PAGE = 32 @@ -260,7 +280,8 @@ class IwaraPlaylistIE(IwaraBaseIE): def _entries(self, playlist_id, first_page, page): videos = self._download_json( 'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}', - query={'page': page, 'limit': self._PER_PAGE}) if page else first_page + query={'page': page, 'limit': self._PER_PAGE}, + headers=self._get_media_token()) if page else first_page for x in traverse_obj(videos, ('results', ..., 'id')): yield self.url_result(f'https://iwara.tv/video/{x}') @@ -268,7 +289,7 @@ def _real_extract(self, url): playlist_id = self._match_id(url) page_0 = self._download_json( f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id, - note='Requesting playlist info') + note='Requesting playlist info', headers=self._get_media_token()) return self.playlist_result( OnDemandPagedList(