[ie/weibo] Fix extraction (#8463)

Closes #8445
Authored by: c-basalt
This commit is contained in:
c-basalt 2023-11-11 15:02:59 -05:00 committed by GitHub
parent 312a2d1e8b
commit 15b252dfd2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,3 +1,4 @@
import json
import random import random
import itertools import itertools
import urllib.parse import urllib.parse
@ -18,24 +19,33 @@
class WeiboBaseIE(InfoExtractor): class WeiboBaseIE(InfoExtractor):
def _update_visitor_cookies(self, video_id): def _update_visitor_cookies(self, visitor_url, video_id):
headers = {'Referer': visitor_url}
chrome_ver = self._search_regex(
r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90')
visitor_data = self._download_json( visitor_data = self._download_json(
'https://passport.weibo.com/visitor/genvisitor', video_id, 'https://passport.weibo.com/visitor/genvisitor', video_id,
note='Generating first-visit guest request', note='Generating first-visit guest request',
transform_source=strip_jsonp, headers=headers, transform_source=strip_jsonp,
data=urlencode_postdata({ data=urlencode_postdata({
'cb': 'gen_callback', 'cb': 'gen_callback',
'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}', 'fp': json.dumps({
})) 'os': '1',
'browser': f'Chrome{chrome_ver},0,0,0',
'fonts': 'undefined',
'screenInfo': '1920*1080*24',
'plugins': ''
}, separators=(',', ':'))}))['data']
self._download_webpage( self._download_webpage(
'https://passport.weibo.com/visitor/visitor', video_id, 'https://passport.weibo.com/visitor/visitor', video_id,
note='Running first-visit callback to get guest cookies', note='Running first-visit callback to get guest cookies',
query={ headers=headers, query={
'a': 'incarnate', 'a': 'incarnate',
't': visitor_data['data']['tid'], 't': visitor_data['tid'],
'w': 2, 'w': 3 if visitor_data.get('new_tid') else 2,
'c': '%03d' % visitor_data['data']['confidence'], 'c': f'{visitor_data.get("confidence", 100):03d}',
'gc': '',
'cb': 'cross_domain', 'cb': 'cross_domain',
'from': 'weibo', 'from': 'weibo',
'_rand': random.random(), '_rand': random.random(),
@ -44,7 +54,7 @@ def _update_visitor_cookies(self, video_id):
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(video_id) self._update_visitor_cookies(urlh.url, video_id)
webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs)
return self._parse_json(webpage, video_id, fatal=fatal) return self._parse_json(webpage, video_id, fatal=fatal)