mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[extractor/vk] Fix extractor (#4128)
Closes #4437 Authored by: Mehavoid
This commit is contained in:
parent
bfbb5a1bb1
commit
59f63c8f0f
1 changed files with 65 additions and 24 deletions
|
@ -1,11 +1,17 @@
|
||||||
import collections
|
import collections
|
||||||
|
import hashlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .dailymotion import DailymotionIE
|
||||||
|
from .odnoklassniki import OdnoklassnikiIE
|
||||||
|
from .pladform import PladformIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -13,19 +19,29 @@
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
|
||||||
from .odnoklassniki import OdnoklassnikiIE
|
|
||||||
from .pladform import PladformIE
|
|
||||||
from .vimeo import VimeoIE
|
|
||||||
from .youtube import YoutubeIE
|
|
||||||
|
|
||||||
|
|
||||||
class VKBaseIE(InfoExtractor):
|
class VKBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'vk'
|
_NETRC_MACHINE = 'vk'
|
||||||
|
|
||||||
|
def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
|
||||||
|
response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
|
||||||
|
challenge_url, cookie = response[1].geturl() if response else '', None
|
||||||
|
if challenge_url.startswith('https://vk.com/429.html?'):
|
||||||
|
cookie = self._get_cookies(challenge_url).get('hash429')
|
||||||
|
if not cookie:
|
||||||
|
return response
|
||||||
|
|
||||||
|
hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
|
||||||
|
self._request_webpage(
|
||||||
|
update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
|
||||||
|
note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
|
||||||
|
return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
login_page, url_handle = self._download_webpage_handle(
|
login_page, url_handle = self._download_webpage_handle(
|
||||||
'https://vk.com', None, 'Downloading login page')
|
'https://vk.com', None, 'Downloading login page')
|
||||||
|
@ -51,11 +67,14 @@ def _perform_login(self, username, password):
|
||||||
'Unable to login, incorrect username and/or password', expected=True)
|
'Unable to login, incorrect username and/or password', expected=True)
|
||||||
|
|
||||||
def _download_payload(self, path, video_id, data, fatal=True):
|
def _download_payload(self, path, video_id, data, fatal=True):
|
||||||
|
endpoint = f'https://vk.com/{path}.php'
|
||||||
data['al'] = 1
|
data['al'] = 1
|
||||||
code, payload = self._download_json(
|
code, payload = self._download_json(
|
||||||
'https://vk.com/%s.php' % path, video_id,
|
endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
|
||||||
data=urlencode_postdata(data), fatal=fatal,
|
headers={
|
||||||
headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
|
'Referer': endpoint,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})['payload']
|
||||||
if code == '3':
|
if code == '3':
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
elif code == '8':
|
elif code == '8':
|
||||||
|
@ -84,17 +103,20 @@ class VKIE(VKBaseIE):
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||||
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-77521_162222515',
|
'id': '-77521_162222515',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'ProtivoGunz - Хуёвая песня',
|
'title': 'ProtivoGunz - Хуёвая песня',
|
||||||
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
|
||||||
'uploader_id': '-77521',
|
'uploader_id': '39545378',
|
||||||
'duration': 195,
|
'duration': 195,
|
||||||
'timestamp': 1329049880,
|
'timestamp': 1329049880,
|
||||||
'upload_date': '20120212',
|
'upload_date': '20120212',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/video205387401_165548505',
|
'url': 'http://vk.com/video205387401_165548505',
|
||||||
|
@ -107,12 +129,14 @@ class VKIE(VKBaseIE):
|
||||||
'duration': 9,
|
'duration': 9,
|
||||||
'timestamp': 1374364108,
|
'timestamp': 1374364108,
|
||||||
'upload_date': '20130720',
|
'upload_date': '20130720',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'note': 'Embedded video',
|
'note': 'Embedded video',
|
||||||
'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
|
'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
|
||||||
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-77521_162222515',
|
'id': '-77521_162222515',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -121,8 +145,10 @@ class VKIE(VKBaseIE):
|
||||||
'duration': 195,
|
'duration': 195,
|
||||||
'upload_date': '20120212',
|
'upload_date': '20120212',
|
||||||
'timestamp': 1329049880,
|
'timestamp': 1329049880,
|
||||||
'uploader_id': '-77521',
|
'uploader_id': '39545378',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# VIDEO NOW REMOVED
|
# VIDEO NOW REMOVED
|
||||||
|
@ -176,8 +202,13 @@ class VKIE(VKBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '8 серия (озвучка)',
|
'title': '8 серия (озвучка)',
|
||||||
'duration': 8383,
|
'duration': 8383,
|
||||||
|
'comment_count': int,
|
||||||
|
'uploader': 'Dizi2021',
|
||||||
|
'like_count': int,
|
||||||
|
'timestamp': 1640162189,
|
||||||
'upload_date': '20211222',
|
'upload_date': '20211222',
|
||||||
'view_count': int,
|
'uploader_id': '-93049196',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -204,10 +235,23 @@ class VKIE(VKBaseIE):
|
||||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||||
'duration': 178,
|
'duration': 178,
|
||||||
'upload_date': '20130116',
|
'upload_date': '20130117',
|
||||||
'uploader': "Children's Joy Foundation Inc.",
|
'uploader': "Children's Joy Foundation Inc.",
|
||||||
'uploader_id': 'thecjf',
|
'uploader_id': 'thecjf',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
|
||||||
|
'availability': 'public',
|
||||||
|
'like_count': int,
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'channel': 'Children\'s Joy Foundation Inc.',
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/thecjf',
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
|
'tags': 'count:27',
|
||||||
|
'start_time': 0.0,
|
||||||
|
'categories': ['Nonprofits & Activism'],
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -223,9 +267,7 @@ class VKIE(VKBaseIE):
|
||||||
'uploader_id': 'x1p5vl5',
|
'uploader_id': 'x1p5vl5',
|
||||||
'timestamp': 1473877246,
|
'timestamp': 1473877246,
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'Removed'
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# video key is extra_data not url\d+
|
# video key is extra_data not url\d+
|
||||||
|
@ -240,9 +282,7 @@ class VKIE(VKBaseIE):
|
||||||
'timestamp': 1454859345,
|
'timestamp': 1454859345,
|
||||||
'upload_date': '20160207',
|
'upload_date': '20160207',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'Removed',
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# finished live stream, postlive_mp4
|
# finished live stream, postlive_mp4
|
||||||
|
@ -253,11 +293,12 @@ class VKIE(VKBaseIE):
|
||||||
'title': 'ИгроМир 2016 День 1 — Игромания Утром',
|
'title': 'ИгроМир 2016 День 1 — Игромания Утром',
|
||||||
'uploader': 'Игромания',
|
'uploader': 'Игромания',
|
||||||
'duration': 5239,
|
'duration': 5239,
|
||||||
# TODO: use act=show to extract view_count
|
|
||||||
# 'view_count': int,
|
|
||||||
'upload_date': '20160929',
|
'upload_date': '20160929',
|
||||||
'uploader_id': '-387766',
|
'uploader_id': '-387766',
|
||||||
'timestamp': 1475137527,
|
'timestamp': 1475137527,
|
||||||
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -317,7 +358,7 @@ def _real_extract(self, url):
|
||||||
mv_data = {}
|
mv_data = {}
|
||||||
if video_id:
|
if video_id:
|
||||||
data = {
|
data = {
|
||||||
'act': 'show_inline',
|
'act': 'show',
|
||||||
'video': video_id,
|
'video': video_id,
|
||||||
}
|
}
|
||||||
# Some videos (removed?) can only be downloaded with list id specified
|
# Some videos (removed?) can only be downloaded with list id specified
|
||||||
|
|
Loading…
Reference in a new issue