[extractor/vk] Fix extractor (#4128)

Closes #4437
Authored by: Mehavoid
This commit is contained in:
Mehavoid 2022-07-27 23:31:03 +03:00 committed by GitHub
parent bfbb5a1bb1
commit 59f63c8f0f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,11 +1,17 @@
import collections import collections
import hashlib
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
clean_html,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
orderedSet, orderedSet,
@ -13,19 +19,29 @@
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from .dailymotion import DailymotionIE
from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
class VKBaseIE(InfoExtractor): class VKBaseIE(InfoExtractor):
_NETRC_MACHINE = 'vk' _NETRC_MACHINE = 'vk'
def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
challenge_url, cookie = response[1].geturl() if response else '', None
if challenge_url.startswith('https://vk.com/429.html?'):
cookie = self._get_cookies(challenge_url).get('hash429')
if not cookie:
return response
hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
self._request_webpage(
update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
def _perform_login(self, username, password): def _perform_login(self, username, password):
login_page, url_handle = self._download_webpage_handle( login_page, url_handle = self._download_webpage_handle(
'https://vk.com', None, 'Downloading login page') 'https://vk.com', None, 'Downloading login page')
@ -51,11 +67,14 @@ def _perform_login(self, username, password):
'Unable to login, incorrect username and/or password', expected=True) 'Unable to login, incorrect username and/or password', expected=True)
def _download_payload(self, path, video_id, data, fatal=True): def _download_payload(self, path, video_id, data, fatal=True):
endpoint = f'https://vk.com/{path}.php'
data['al'] = 1 data['al'] = 1
code, payload = self._download_json( code, payload = self._download_json(
'https://vk.com/%s.php' % path, video_id, endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
data=urlencode_postdata(data), fatal=fatal, headers={
headers={'X-Requested-With': 'XMLHttpRequest'})['payload'] 'Referer': endpoint,
'X-Requested-With': 'XMLHttpRequest',
})['payload']
if code == '3': if code == '3':
self.raise_login_required() self.raise_login_required()
elif code == '8': elif code == '8':
@ -84,17 +103,20 @@ class VKIE(VKBaseIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': { 'info_dict': {
'id': '-77521_162222515', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня', 'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'uploader_id': '-77521', 'uploader_id': '39545378',
'duration': 195, 'duration': 195,
'timestamp': 1329049880, 'timestamp': 1329049880,
'upload_date': '20120212', 'upload_date': '20120212',
'comment_count': int,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg$',
}, },
'params': {'skip_download': 'm3u8'},
}, },
{ {
'url': 'http://vk.com/video205387401_165548505', 'url': 'http://vk.com/video205387401_165548505',
@ -107,12 +129,14 @@ class VKIE(VKBaseIE):
'duration': 9, 'duration': 9,
'timestamp': 1374364108, 'timestamp': 1374364108,
'upload_date': '20130720', 'upload_date': '20130720',
'comment_count': int,
'like_count': int,
'thumbnail': r're:https?://.+\.jpg$',
} }
}, },
{ {
'note': 'Embedded video', 'note': 'Embedded video',
'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa', 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': { 'info_dict': {
'id': '-77521_162222515', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
@ -121,8 +145,10 @@ class VKIE(VKBaseIE):
'duration': 195, 'duration': 195,
'upload_date': '20120212', 'upload_date': '20120212',
'timestamp': 1329049880, 'timestamp': 1329049880,
'uploader_id': '-77521', 'uploader_id': '39545378',
'thumbnail': r're:https?://.+\.jpg$',
}, },
'params': {'skip_download': 'm3u8'},
}, },
{ {
# VIDEO NOW REMOVED # VIDEO NOW REMOVED
@ -176,8 +202,13 @@ class VKIE(VKBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': '8 серия (озвучка)', 'title': '8 серия (озвучка)',
'duration': 8383, 'duration': 8383,
'comment_count': int,
'uploader': 'Dizi2021',
'like_count': int,
'timestamp': 1640162189,
'upload_date': '20211222', 'upload_date': '20211222',
'view_count': int, 'uploader_id': '-93049196',
'thumbnail': r're:https?://.+\.jpg$',
}, },
}, },
{ {
@ -204,10 +235,23 @@ class VKIE(VKBaseIE):
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 178, 'duration': 178,
'upload_date': '20130116', 'upload_date': '20130117',
'uploader': "Children's Joy Foundation Inc.", 'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf', 'uploader_id': 'thecjf',
'view_count': int, 'view_count': int,
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
'availability': 'public',
'like_count': int,
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'Children\'s Joy Foundation Inc.',
'uploader_url': 'http://www.youtube.com/user/thecjf',
'thumbnail': r're:https?://.+\.jpg$',
'tags': 'count:27',
'start_time': 0.0,
'categories': ['Nonprofits & Activism'],
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
'age_limit': 0,
}, },
}, },
{ {
@ -223,9 +267,7 @@ class VKIE(VKBaseIE):
'uploader_id': 'x1p5vl5', 'uploader_id': 'x1p5vl5',
'timestamp': 1473877246, 'timestamp': 1473877246,
}, },
'params': { 'skip': 'Removed'
'skip_download': True,
},
}, },
{ {
# video key is extra_data not url\d+ # video key is extra_data not url\d+
@ -240,9 +282,7 @@ class VKIE(VKBaseIE):
'timestamp': 1454859345, 'timestamp': 1454859345,
'upload_date': '20160207', 'upload_date': '20160207',
}, },
'params': { 'skip': 'Removed',
'skip_download': True,
},
}, },
{ {
# finished live stream, postlive_mp4 # finished live stream, postlive_mp4
@ -253,11 +293,12 @@ class VKIE(VKBaseIE):
'title': 'ИгроМир 2016 День 1 — Игромания Утром', 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания', 'uploader': 'Игромания',
'duration': 5239, 'duration': 5239,
# TODO: use act=show to extract view_count
# 'view_count': int,
'upload_date': '20160929', 'upload_date': '20160929',
'uploader_id': '-387766', 'uploader_id': '-387766',
'timestamp': 1475137527, 'timestamp': 1475137527,
'thumbnail': r're:https?://.+\.jpg$',
'comment_count': int,
'like_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -317,7 +358,7 @@ def _real_extract(self, url):
mv_data = {} mv_data = {}
if video_id: if video_id:
data = { data = {
'act': 'show_inline', 'act': 'show',
'video': video_id, 'video': video_id,
} }
# Some videos (removed?) can only be downloaded with list id specified # Some videos (removed?) can only be downloaded with list id specified