mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[vimeo:channel] Improve playlist extraction
This commit is contained in:
parent
dfc8f46e1c
commit
c8e3e0974b
1 changed files with 21 additions and 4 deletions
|
@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
\.
|
\.
|
||||||
)?
|
)?
|
||||||
vimeo(?P<pro>pro)?\.com/
|
vimeo(?P<pro>pro)?\.com/
|
||||||
(?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
|
(?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:.*?/)?
|
(?:.*?/)?
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
|
@ -314,6 +314,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
|
'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://vimeo.com/album/2632481/video/79010983',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# source file returns 403: Forbidden
|
# source file returns 403: Forbidden
|
||||||
'url': 'https://vimeo.com/7809605',
|
'url': 'https://vimeo.com/7809605',
|
||||||
|
@ -651,8 +655,21 @@ def _title_and_entries(self, list_id, base_url):
|
||||||
webpage = self._login_list_password(page_url, list_id, webpage)
|
webpage = self._login_list_password(page_url, list_id, webpage)
|
||||||
yield self._extract_list_title(webpage)
|
yield self._extract_list_title(webpage)
|
||||||
|
|
||||||
for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
|
# Try extracting href first since not all videos are available via
|
||||||
yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id)
|
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
|
||||||
|
clips = re.findall(
|
||||||
|
r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
|
||||||
|
if clips:
|
||||||
|
for video_id, video_url in clips:
|
||||||
|
yield self.url_result(
|
||||||
|
compat_urlparse.urljoin(base_url, video_url),
|
||||||
|
VimeoIE.ie_key(), video_id=video_id)
|
||||||
|
# More relaxed fallback
|
||||||
|
else:
|
||||||
|
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
|
||||||
|
yield self.url_result(
|
||||||
|
'https://vimeo.com/%s' % video_id,
|
||||||
|
VimeoIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
|
||||||
break
|
break
|
||||||
|
@ -689,7 +706,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class VimeoAlbumIE(VimeoChannelIE):
|
class VimeoAlbumIE(VimeoChannelIE):
|
||||||
IE_NAME = 'vimeo:album'
|
IE_NAME = 'vimeo:album'
|
||||||
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
|
_VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)/?(?:$|[?#])'
|
||||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vimeo.com/album/2632481',
|
'url': 'https://vimeo.com/album/2632481',
|
||||||
|
|
Loading…
Reference in a new issue