mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 20:38:11 -05:00
[ie/Dailymotion] Improve embed extraction (#10843)
Closes #8848, Closes #9432 Authored by: pzhlkj6612, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
754940e9a5
commit
a403dcf9be
1 changed files with 101 additions and 14 deletions
|
@ -10,11 +10,14 @@
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -99,11 +102,16 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
_VALID_URL = r'''(?ix)
|
_VALID_URL = r'''(?ix)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)|
|
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
|
||||||
(?:www\.)?lequipe\.fr/video
|
(?:www\.)?lequipe\.fr
|
||||||
|
)/
|
||||||
|
(?:
|
||||||
|
swf/(?!video)|
|
||||||
|
(?:(?:crawler|embed|swf)/)?video/|
|
||||||
|
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
|
||||||
)
|
)
|
||||||
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
|
(?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
|
||||||
'''
|
'''
|
||||||
IE_NAME = 'dailymotion'
|
IE_NAME = 'dailymotion'
|
||||||
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -217,6 +225,63 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, { # playlist-only
|
||||||
|
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
|
||||||
|
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x93blhi',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'OnAir - 01/08/24',
|
||||||
|
'description': '',
|
||||||
|
'duration': 217,
|
||||||
|
'timestamp': 1722505658,
|
||||||
|
'upload_date': '20240801',
|
||||||
|
'uploader': 'Financialounge',
|
||||||
|
'uploader_id': 'x2vtgmm',
|
||||||
|
'age_limit': 0,
|
||||||
|
'tags': [],
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||||
|
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x7wdsj',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
# https://www.dailymotion.com/crawler/video/x8u4owg
|
||||||
|
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x8u4owg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader': 'Le Parisien',
|
||||||
|
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
|
||||||
|
'upload_date': '20240309',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1709997866,
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader_id': 'x32f7b',
|
||||||
|
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
|
||||||
|
'duration': 428.0,
|
||||||
|
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||||
|
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_COMMON_MEDIA_FIELDS = '''description
|
_COMMON_MEDIA_FIELDS = '''description
|
||||||
|
@ -232,16 +297,35 @@ def _extract_embed_urls(cls, url, webpage):
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||||
|
attrs = extract_attributes(mobj.group(0))
|
||||||
|
player_url = url_or_none(attrs.get('src'))
|
||||||
|
if not player_url:
|
||||||
|
continue
|
||||||
|
player_url = player_url.replace('.js', '.html')
|
||||||
|
if player_url.startswith('//'):
|
||||||
|
player_url = f'https:{player_url}'
|
||||||
|
if video_id := attrs.get('data-video'):
|
||||||
|
query_string = f'video={video_id}'
|
||||||
|
elif playlist_id := attrs.get('data-playlist'):
|
||||||
|
query_string = f'playlist={playlist_id}'
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
yield update_url(player_url, query=query_string)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url)
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
video_id, playlist_id = self._match_valid_url(url).groups()
|
video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
|
||||||
|
|
||||||
if playlist_id:
|
if is_playlist: # We matched the playlist query param as video_id
|
||||||
if self._yes_playlist(playlist_id, video_id):
|
playlist_id = video_id
|
||||||
return self.url_result(
|
video_id = None
|
||||||
'http://www.dailymotion.com/playlist/' + playlist_id,
|
|
||||||
'DailymotionPlaylist', playlist_id)
|
if self._yes_playlist(playlist_id, video_id):
|
||||||
|
return self.url_result(
|
||||||
|
f'http://www.dailymotion.com/playlist/{playlist_id}',
|
||||||
|
'DailymotionPlaylist', playlist_id)
|
||||||
|
|
||||||
password = self.get_param('videopassword')
|
password = self.get_param('videopassword')
|
||||||
media = self._call_api(
|
media = self._call_api(
|
||||||
|
@ -282,6 +366,8 @@ def _real_extract(self, url):
|
||||||
title = metadata['title']
|
title = metadata['title']
|
||||||
is_live = media.get('isOnAir')
|
is_live = media.get('isOnAir')
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
for quality, media_list in metadata['qualities'].items():
|
for quality, media_list in metadata['qualities'].items():
|
||||||
for m in media_list:
|
for m in media_list:
|
||||||
media_url = m.get('url')
|
media_url = m.get('url')
|
||||||
|
@ -289,8 +375,10 @@ def _real_extract(self, url):
|
||||||
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
if not media_url or media_type == 'application/vnd.lumberjack.manifest':
|
||||||
continue
|
continue
|
||||||
if media_type == 'application/x-mpegURL':
|
if media_type == 'application/x-mpegURL':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
fmt, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
|
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(fmt)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
f = {
|
f = {
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
|
@ -310,7 +398,6 @@ def _real_extract(self, url):
|
||||||
if not f.get('fps') and f['format_id'].endswith('@60'):
|
if not f.get('fps') and f['format_id'].endswith('@60'):
|
||||||
f['fps'] = 60
|
f['fps'] = 60
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
|
||||||
for subtitle_lang, subtitle in subtitles_data.items():
|
for subtitle_lang, subtitle in subtitles_data.items():
|
||||||
subtitles[subtitle_lang] = [{
|
subtitles[subtitle_lang] = [{
|
||||||
|
@ -447,7 +534,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||||
IE_NAME = 'dailymotion:user'
|
IE_NAME = 'dailymotion:user'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
Loading…
Reference in a new issue