[extractor/generic] Decode unicode-escaped embed URLs (#5919)

Authored by: bashonly
Closes #5854
This commit is contained in:
bashonly 2023-01-02 08:06:01 -06:00 committed by GitHub
parent 32a84bcf4e
commit 05997b6e98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2135,7 +2135,8 @@ class GenericIE(InfoExtractor):
'age_limit': 0, 'age_limit': 0,
'direct': True, 'direct': True,
} }
}, { },
{
'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
'info_dict': { 'info_dict': {
@ -2149,7 +2150,23 @@ class GenericIE(InfoExtractor):
'duration': 318.0, 'duration': 318.0,
'direct': True, 'direct': True,
'age_limit': 0, 'age_limit': 0,
} },
},
{
'note': 'JW Player embed with unicode-escape sequences in URL',
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
'info_dict': {
'id': 'm',
'ext': 'mp4',
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
'description': 'Mahler\'s ',
'uploader': 'www.medici.tv',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {
'skip_download': True,
},
}, },
{ {
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
@ -2751,6 +2768,7 @@ def filter_video(urls):
entries = [] entries = []
for video_url in orderedSet(found): for video_url in orderedSet(found):
video_url = video_url.encode().decode('unicode-escape')
video_url = unescapeHTML(video_url) video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/') video_url = video_url.replace('\\/', '/')
video_url = urllib.parse.urljoin(url, video_url) video_url = urllib.parse.urljoin(url, video_url)