Merge branch 'yt-dlp:master' into vrporn-com

This commit is contained in:
horny-is-back 2024-09-09 19:19:20 +02:00 committed by GitHub
commit 6b750c9e75
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 373 additions and 124 deletions

View file

@ -80,5 +80,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -92,5 +92,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -88,5 +88,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -73,5 +73,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -67,5 +67,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -73,5 +73,8 @@ body:
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -266,7 +266,7 @@ jobs:
# We need to ignore wheels otherwise we break universal2 builds # We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --no-binary :all: -r requirements.txt python3 -m pip install -U --no-binary :all: -r requirements.txt
# We need to fuse our own universal2 wheels for curl_cffi # We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U delocate python3 -m pip install -U 'delocate==0.11.0'
mkdir curl_cffi_whls curl_cffi_universal2 mkdir curl_cffi_whls curl_cffi_universal2
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do

View file

@ -1,4 +1,4 @@
name: Anti-Spam name: Issue Lockdown
on: on:
issues: issues:
types: [opened] types: [opened]
@ -9,6 +9,7 @@ permissions:
jobs: jobs:
lockdown: lockdown:
name: Issue Lockdown name: Issue Lockdown
if: vars.ISSUE_LOCKDOWN
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: "Lock new issue" - name: "Lock new issue"
@ -17,4 +18,4 @@ jobs:
ISSUE_NUMBER: ${{ github.event.issue.number }} ISSUE_NUMBER: ${{ github.event.issue.number }}
REPOSITORY: ${{ github.repository }} REPOSITORY: ${{ github.repository }}
run: | run: |
gh issue lock "${ISSUE_NUMBER}" -r too_heated -R "${REPOSITORY}" gh issue lock "${ISSUE_NUMBER}" -R "${REPOSITORY}"

17
.github/workflows/sanitize-comment.yml vendored Normal file
View file

@ -0,0 +1,17 @@
name: Sanitize comment
on:
issue_comment:
types: [created, edited]
permissions:
issues: write
jobs:
sanitize-comment:
name: Sanitize comment
if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request
runs-on: ubuntu-latest
steps:
- name: Sanitize comment
uses: yt-dlp/sanitize-comment@v1

View file

@ -49,8 +49,11 @@
- type: markdown - type: markdown
attributes: attributes:
value: | value: |
### NOTE: Due to a recent increase in malicious spam activity, this issue will be automatically locked until it is triaged by a maintainer. > [!CAUTION]
### If you receive any replies asking you download a file, do NOT follow the download links! > ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.
'''.strip() '''.strip()
NO_SKIP = ''' NO_SKIP = '''

View file

@ -822,6 +822,24 @@ def test_remove_logging_handler(self, handler, logger_name):
rh.close() rh.close()
assert len(logging_handlers) == before_count assert len(logging_handlers) == before_count
def test_wrap_request_errors(self):
class TestRequestHandler(RequestHandler):
def _validate(self, request):
if request.headers.get('x-fail'):
raise UnsupportedRequest('test error')
def _send(self, request: Request):
raise RequestError('test error')
with TestRequestHandler(logger=FakeLogger()) as rh:
with pytest.raises(UnsupportedRequest, match='test error') as exc_info:
rh.validate(Request('http://example.com', headers={'x-fail': '1'}))
assert exc_info.value.handler is rh
with pytest.raises(RequestError, match='test error') as exc_info:
rh.send(Request('http://example.com'))
assert exc_info.value.handler is rh
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
class TestUrllibRequestHandler(TestRequestHandlerBase): class TestUrllibRequestHandler(TestRequestHandlerBase):

View file

@ -945,6 +945,7 @@
) )
from .kicker import KickerIE from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kika import KikaIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE from .kommunetv import KommunetvIE

View file

@ -15,7 +15,7 @@
class KhanAcademyBaseIE(InfoExtractor): class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70' _PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
def _parse_video(self, video): def _parse_video(self, video):
return { return {
@ -39,7 +39,7 @@ def _real_extract(self, url):
query={ query={
'fastly_cacheable': 'persist_until_publish', 'fastly_cacheable': 'persist_until_publish',
'pcv': self._PUBLISHED_CONTENT_VERSION, 'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265', 'hash': '3712657851',
'variables': json.dumps({ 'variables': json.dumps({
'path': display_id, 'path': display_id,
'countryCode': 'US', 'countryCode': 'US',

126
yt_dlp/extractor/kika.py Normal file
View file

@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class KikaIE(InfoExtractor):
IE_DESC = 'KiKA.de'
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
'info_dict': {
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'ext': 'mp4',
'upload_date': '20240831',
'timestamp': 1725126600,
'season_number': 2024,
'modified_date': '20240831',
'episode': 'Episode 476',
'episode_number': 476,
'season': 'Season 2024',
'duration': 634,
'title': 'logo! vom Samstag, 31. August 2024',
'modified_timestamp': 1725129983,
},
}, {
'url': 'https://www.kika.de/kaltstart/videos/video92498',
'md5': '710ece827e5055094afeb474beacb7aa',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
'timestamp': 1702926876,
'upload_date': '20231218',
'episode_number': 7,
'modified_date': '20240319',
'modified_timestamp': 1710880610,
'episode': 'Episode 7',
'season_number': 1,
'season': 'Season 1',
},
}, {
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
'info_dict': {
'id': 'video90088',
'ext': 'mp4',
'upload_date': '20221102',
'timestamp': 1667390580,
'duration': 197,
'modified_timestamp': 1711093771,
'episode_number': 8,
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
'modified_date': '20240322',
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
'season_number': 1,
'episode': 'Episode 8',
'season': 'Season 1',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
subtitles['de'] = [{
'url': ttml_resource,
'ext': 'ttml',
}]
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
subtitles.setdefault('de', []).append({
'url': webvtt_resource,
'ext': 'vtt',
})
return {
'id': video_id,
'formats': list(self._extract_formats(video_assets, video_id)),
'subtitles': subtitles,
**traverse_obj(doc, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('date', {parse_iso8601}),
'modified_timestamp': ('modificationDate', {parse_iso8601}),
'duration': ((
('durationInSeconds', {int_or_none}),
('duration', {parse_duration})), any),
'episode_number': ('episodeNumber', {int_or_none}),
'season_number': ('season', {int_or_none}),
}),
}
def _extract_formats(self, media_info, video_id):
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
stream_url = media['url']
ext = determine_ext(stream_url)
if ext == 'm3u8':
yield from self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
yield {
'url': stream_url,
'format_id': ext,
**traverse_obj(media, {
'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}),
}

View file

@ -13,8 +13,8 @@
class MDRIE(InfoExtractor): class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA' IE_DESC = 'MDR.DE'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' _VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
'uploader': 'MITTELDEUTSCHER RUNDFUNK', 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
}, },
'skip': '404 not found', 'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1482541200,
'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, { }, {
# audio with alternative playerURL pattern # audio with alternative playerURL pattern
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
'duration': 3239, 'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK', 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
}, },
}, { 'skip': '404 not found',
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
}, { }, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html', 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True, 'only_matching': True,

View file

@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id, impersonate=True)
sample_id = self._search_regex( sample_id = self._search_regex(
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)', r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
@ -82,7 +82,15 @@ def extract_count(klass):
return { return {
'id': sample_id, 'id': sample_id,
'title': title, 'title': title,
'formats': [{
'url': mp3_url, 'url': mp3_url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'http_headers': {
'Referer': url,
},
}],
'display_id': display_id, 'display_id': display_id,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,

View file

@ -234,13 +234,30 @@ def _parse_config(self, config, video_id):
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'), '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
} }
def _extract_original_format(self, url, video_id, unlisted_hash=None): def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': ','.join((
'config_url', 'created_time', 'description', 'download', 'license',
'metadata.connections.comments.total', 'metadata.connections.likes.total',
'release_time', 'stats.plays')),
}, **kwargs)
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
# Original/source formats are only available when logged in
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
return
query = {'action': 'load_download_config'} query = {'action': 'load_download_config'}
if unlisted_hash: if unlisted_hash:
query['unlisted_hash'] = unlisted_hash query['unlisted_hash'] = unlisted_hash
download_data = self._download_json( download_data = self._download_json(
url, video_id, fatal=False, query=query, url, video_id, 'Loading download config JSON', fatal=False,
headers={'X-Requested-With': 'XMLHttpRequest'}, query=query, headers={'X-Requested-With': 'XMLHttpRequest'},
expected_status=(403, 404)) or {} expected_status=(403, 404)) or {}
source_file = download_data.get('source_file') source_file = download_data.get('source_file')
download_url = try_get(source_file, lambda x: x['download_url']) download_url = try_get(source_file, lambda x: x['download_url'])
@ -261,15 +278,13 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None):
'quality': 1, 'quality': 1,
} }
jwt_response = self._download_json( jwt = jwt or traverse_obj(self._download_json(
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} 'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
if not jwt_response.get('jwt'): if not jwt:
return return
headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'} original_response = api_data or self._call_videos_api(
original_response = self._download_json( video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
f'https://api.vimeo.com/videos/{video_id}', video_id, for download_data in traverse_obj(original_response, ('download', ..., {dict})):
headers=headers, fatal=False, expected_status=(403, 404)) or {}
for download_data in original_response.get('download') or []:
download_url = download_data.get('link') download_url = download_data.get('link')
if not download_url or download_data.get('quality') != 'source': if not download_url or download_data.get('quality') != 'source':
continue continue
@ -354,7 +369,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'No longer available', 'skip': 'No longer available',
}, },
{ {
'url': 'http://player.vimeo.com/video/54469442', 'url': 'https://player.vimeo.com/video/54469442',
'md5': '619b811a4417aa4abe78dc653becf511', 'md5': '619b811a4417aa4abe78dc653becf511',
'note': 'Videos that embed the url in the player page', 'note': 'Videos that embed the url in the player page',
'info_dict': { 'info_dict': {
@ -370,6 +385,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'http://vimeo.com/68375962', 'url': 'http://vimeo.com/68375962',
@ -379,22 +395,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962', 'id': '68375962',
'ext': 'mp4', 'ext': 'mp4',
'title': 'youtube-dl password protected test video', 'title': 'youtube-dl password protected test video',
'timestamp': 1371200155, 'timestamp': 1371214555,
'upload_date': '20130614', 'upload_date': '20130614',
'release_timestamp': 1371214555,
'release_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128', 'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz', 'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10, 'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int, 'comment_count': int,
'like_count': int, 'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
}, },
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'http://vimeo.com/channels/keypeele/75629013', 'url': 'http://vimeo.com/channels/keypeele/75629013',
@ -418,29 +435,38 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int, 'like_count': int,
}, },
'params': {'format': 'http-1080p'}, 'params': {'format': 'http-1080p'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'http://vimeo.com/76979871', 'url': 'http://vimeo.com/76979871',
'note': 'Video with subtitles', 'note': 'Video with subtitles',
'info_dict': { 'info_dict': {
'id': '76979871', 'id': '76979871',
'ext': 'mov', 'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)', 'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 'description': str, # FIXME: Dynamic SEO spam description
'timestamp': 1381846109, 'timestamp': 1381860509,
'upload_date': '20131015', 'upload_date': '20131015',
'release_timestamp': 1381860509,
'release_date': '20131015',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff', 'uploader_id': 'staff',
'uploader': 'Vimeo Staff', 'uploader': 'Vimeo',
'duration': 62, 'duration': 62,
'comment_count': int,
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/452001751-8216e0571c251a09d7a8387550942d89f7f86f6398f8ed886e639b0dd50d3c90-d_1280',
'subtitles': { 'subtitles': {
'de': [{'ext': 'vtt'}], 'de': 'count:3',
'en': [{'ext': 'vtt'}], 'en': 'count:3',
'es': [{'ext': 'vtt'}], 'es': 'count:3',
'fr': [{'ext': 'vtt'}], 'fr': 'count:3',
}, },
}, },
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], 'expected_warnings': [
'Ignoring subtitle tracks found in the HLS manifest',
'Failed to parse XML: not well-formed',
],
}, },
{ {
# from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
@ -456,11 +482,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 118, 'duration': 118,
'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# contains original format # contains Original format
'url': 'https://vimeo.com/33951933', 'url': 'https://vimeo.com/33951933',
'md5': '53c688fa95a55bf4b7293d37a89c5c53', # 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
'info_dict': { 'info_dict': {
'id': '33951933', 'id': '33951933',
'ext': 'mp4', 'ext': 'mp4',
@ -476,15 +503,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'view_count': int, 'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
'like_count': int, 'like_count': int,
'tags': 'count:11',
}, },
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'note': 'Contains original format not accessible in webpage', 'note': 'Contains source format not accessible in webpage',
'url': 'https://vimeo.com/393756517', 'url': 'https://vimeo.com/393756517',
'md5': 'c464af248b592190a5ffbb5d33f382b0', # 'md5': 'c464af248b592190a5ffbb5d33f382b0',
'info_dict': { 'info_dict': {
'id': '393756517', 'id': '393756517',
'ext': 'mov', # 'ext': 'mov',
'ext': 'mp4',
'timestamp': 1582642091, 'timestamp': 1582642091,
'uploader_id': 'frameworkla', 'uploader_id': 'frameworkla',
'title': 'Straight To Hell - Sabrina: Netflix', 'title': 'Straight To Hell - Sabrina: Netflix',
@ -495,6 +526,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
'uploader_url': 'https://vimeo.com/frameworkla', 'uploader_url': 'https://vimeo.com/frameworkla',
}, },
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# only available via https://vimeo.com/channels/tributes/6213729 and # only available via https://vimeo.com/channels/tributes/6213729 and
@ -511,16 +544,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'channel_id': 'tributes', 'channel_id': 'tributes',
'timestamp': 1250886430, 'timestamp': 1250886430,
'upload_date': '20090821', 'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', 'description': str, # FIXME: Dynamic SEO spam description
'duration': 321, 'duration': 321,
'comment_count': int, 'comment_count': int,
'view_count': int, 'view_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
'like_count': int, 'like_count': int,
'tags': ['[the shining', 'vimeohq', 'cv', 'vimeo tribute]'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# redirects to ondemand extractor and should be passed through it # redirects to ondemand extractor and should be passed through it
@ -543,28 +578,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
'skip': 'this page is no longer available.', 'skip': 'this page is no longer available.',
}, },
{ {
'url': 'http://player.vimeo.com/video/68375962', 'url': 'https://player.vimeo.com/video/68375962',
'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
'info_dict': { 'info_dict': {
'id': '68375962', 'id': '68375962',
'ext': 'mp4', 'ext': 'mp4',
'title': 'youtube-dl password protected test video', 'title': 'youtube-dl password protected test video',
'timestamp': 1371200155,
'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128', 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128', 'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz', 'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10, 'duration': 10,
'description': 'md5:6173f270cd0c0119f22817204b3eb86c',
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_1280',
'view_count': int,
'comment_count': int,
'like_count': int,
}, },
'params': { 'params': {
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
@ -592,7 +622,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc", 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
'uploader': 'Philipp Hagemeister', 'uploader': 'Philipp Hagemeister',
'uploader_id': 'user20132939', 'uploader_id': 'user20132939',
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b', 'description': str, # FIXME: Dynamic SEO spam description
'upload_date': '20150209', 'upload_date': '20150209',
'timestamp': 1423518307, 'timestamp': 1423518307,
'thumbnail': 'https://i.vimeocdn.com/video/default_1280', 'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
@ -606,6 +636,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'format': 'best[protocol=https]', 'format': 'best[protocol=https]',
'videopassword': 'youtube-dl', 'videopassword': 'youtube-dl',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# source file returns 403: Forbidden # source file returns 403: Forbidden
@ -633,11 +664,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'release_date': '20160329', 'release_date': '20160329',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'https://vimeo.com/138909882', 'url': 'https://vimeo.com/138909882',
'info_dict': { 'info_dict': {
'id': '138909882', 'id': '138909882',
# 'ext': 'm4v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!', 'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
'description': 'md5:5967e090768a831488f6e74b7821b3c1', 'description': 'md5:5967e090768a831488f6e74b7821b3c1',
@ -645,11 +678,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Firework Champions', 'uploader': 'Firework Champions',
'upload_date': '20150910', 'upload_date': '20150910',
'timestamp': 1441901895, 'timestamp': 1441901895,
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d_1280',
'uploader_url': 'https://vimeo.com/fireworkchampions',
'tags': 'count:6',
'duration': 229,
'view_count': int,
'like_count': int,
'comment_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'format': 'Original', # 'format': 'source',
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
'url': 'https://vimeo.com/channels/staffpicks/143603739', 'url': 'https://vimeo.com/channels/staffpicks/143603739',
@ -670,8 +711,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': int, 'like_count': int,
'uploader_url': 'https://vimeo.com/karimhd', 'uploader_url': 'https://vimeo.com/karimhd',
'channel_url': 'https://vimeo.com/channels/staffpicks', 'channel_url': 'https://vimeo.com/channels/staffpicks',
'tags': 'count:6',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# requires passing unlisted_hash(a52724358e) to load_download_config request # requires passing unlisted_hash(a52724358e) to load_download_config request
@ -701,6 +744,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{ {
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308 # chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
@ -735,6 +779,48 @@ class VimeoIE(VimeoBaseInfoExtractor):
}, },
'expected_warnings': ['Failed to parse XML: not well-formed'], 'expected_warnings': ['Failed to parse XML: not well-formed'],
}, },
{
# vimeo.com URL with unlisted hash and Original format
'url': 'https://vimeo.com/144579403/ec02229140',
# 'md5': '6b662c2884e0373183fbde2a0d15cb78',
'info_dict': {
'id': '144579403',
'ext': 'mp4',
'title': 'SALESMANSHIP',
'description': 'md5:4338302f347a1ff8841b4a3aecaa09f0',
'uploader': 'Off the Picture Pictures',
'uploader_id': 'offthepicturepictures',
'uploader_url': 'https://vimeo.com/offthepicturepictures',
'duration': 669,
'upload_date': '20151104',
'timestamp': 1446607180,
'release_date': '20151104',
'release_timestamp': 1446607180,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1018638656-[\da-f]+-d_1280',
},
# 'params': {'format': 'Original'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{
# player.vimeo.com URL with source format
'url': 'https://player.vimeo.com/video/859028877',
# 'md5': '19ca3d2463441dee2d2f0671ac2916a2',
'info_dict': {
'id': '859028877',
'ext': 'mp4',
'title': 'Ariana Grande - Honeymoon Avenue (Live from London)',
'uploader': 'Raja Virdi',
'uploader_id': 'rajavirdi',
'uploader_url': 'https://vimeo.com/rajavirdi',
'duration': 309,
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d_1280',
},
# 'params': {'format': 'source'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
},
{ {
# user playlist alias -> https://vimeo.com/258705797 # user playlist alias -> https://vimeo.com/258705797
'url': 'https://vimeo.com/user26785108/newspiritualguide', 'url': 'https://vimeo.com/user26785108/newspiritualguide',
@ -768,16 +854,6 @@ def _verify_player_video_password(self, url, video_id, headers):
raise ExtractorError('Wrong video password', expected=True) raise ExtractorError('Wrong video password', expected=True)
return checked return checked
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None):
return self._download_json(
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
video_id, 'Downloading API JSON', headers={
'Authorization': f'jwt {jwt_token}',
'Accept': 'application/json',
}, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
})
def _extract_from_api(self, video_id, unlisted_hash=None): def _extract_from_api(self, video_id, unlisted_hash=None):
viewer = self._download_json( viewer = self._download_json(
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info') 'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
@ -798,6 +874,11 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
info = self._parse_config(self._download_json( info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id) video['config_url'], video_id), video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
if source_format:
info['formats'].append(source_format)
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
info.update({ info.update({
'description': video.get('description'), 'description': video.get('description'),
@ -899,7 +980,12 @@ def _real_extract(self, url):
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password( config = self._verify_player_video_password(
redirect_url, video_id, headers) redirect_url, video_id, headers)
return self._parse_config(config, video_id) info = self._parse_config(config, video_id)
source_format = self._extract_original_format(
f'https://vimeo.com/{video_id}', video_id, unlisted_hash)
if source_format:
info['formats'].append(source_format)
return info
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config: if vimeo_config:
@ -1269,6 +1355,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_DESC = 'Review pages on vimeo' IE_DESC = 'Review pages on vimeo'
_VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})' _VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
_TESTS = [{ _TESTS = [{
'url': 'https://vimeo.com/user170863801/review/996447483/a316d6ed8d',
'info_dict': {
'id': '996447483',
'ext': 'mp4',
'title': 'Rodeo day 1-_2',
'uploader': 'BROADKAST',
'uploader_id': 'user170863801',
'uploader_url': 'https://vimeo.com/user170863801',
'duration': 30,
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML'],
}, {
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253', 'md5': 'c507a72f780cacc12b2248bb4006d253',
'info_dict': { 'info_dict': {
@ -1282,6 +1382,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280', 'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
'uploader_url': 'https://vimeo.com/user21297594', 'uploader_url': 'https://vimeo.com/user21297594',
}, },
'skip': '404 Not Found',
}, { }, {
'note': 'video player needs Referer', 'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053', 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
@ -1316,6 +1417,7 @@ def _real_extract(self, url):
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash') user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}' data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
data = self._download_json(data_url, video_id) data = self._download_json(data_url, video_id)
viewer = {}
if data.get('isLocked') is True: if data.get('isLocked') is True:
video_password = self._get_video_password() video_password = self._get_video_password()
viewer = self._download_json( viewer = self._download_json(
@ -1327,8 +1429,8 @@ def _real_extract(self, url):
config = self._download_json(config_url, video_id) config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id) info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format( source_format = self._extract_original_format(
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id, f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1))) video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
if source_format: if source_format:
info_dict['formats'].append(source_format) info_dict['formats'].append(source_format)
info_dict['description'] = clean_html(clip_data.get('description')) info_dict['description'] = clean_html(clip_data.get('description'))

View file

@ -10,7 +10,7 @@
import urllib.parse import urllib.parse
import urllib.request import urllib.request
from .exceptions import RequestError, UnsupportedRequest from .exceptions import RequestError
from ..dependencies import certifi from ..dependencies import certifi
from ..socks import ProxyType, sockssocket from ..socks import ProxyType, sockssocket
from ..utils import format_field, traverse_obj from ..utils import format_field, traverse_obj
@ -206,7 +206,7 @@ def wrap_request_errors(func):
def wrapper(self, *args, **kwargs): def wrapper(self, *args, **kwargs):
try: try:
return func(self, *args, **kwargs) return func(self, *args, **kwargs)
except UnsupportedRequest as e: except RequestError as e:
if e.handler is None: if e.handler is None:
e.handler = self e.handler = self
raise raise