[extractor/wistia] Match IDs in embed URLs (#4990)

Closes #4985
Authored by: bashonly
This commit is contained in:
bashonly 2022-09-21 20:53:08 +00:00 committed by GitHub
parent 2fa669f759
commit 163281178a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 10 deletions

View file

@ -876,17 +876,19 @@ class GenericIE(InfoExtractor):
# Wistia embed
{
'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
'md5': 'b9676d24bf30945d97060638fbfe77f0',
'info_dict': {
'id': '6e2wtrbdaf',
'ext': 'mov',
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
'description': 'a Paywall Videos video from Remilon',
'duration': 644.072,
'id': '5vd7p4bct5',
'ext': 'bin',
'title': 'md5:db27290a04ae306319b0b5cce3cdf7bd',
'description': 'md5:e835b7808e11aaef29ccdc28888437af',
'duration': 623.019,
'uploader': 'study.com',
'timestamp': 1459678540,
'upload_date': '20160403',
'filesize': 24687186,
'timestamp': 1663258727,
'upload_date': '20220915',
'filesize': 29798093,
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg$',
},
},
# Wistia standard embed (async)
@ -903,7 +905,20 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
'skip': 'webpage 404 not found',
},
# Wistia embed with video IDs in query
{
'url': 'https://amplitude.com/amplify-sessions?amp%5Bwmediaid%5D=pz0m0l0if3&amp%5Bwvideo%5D=pz0m0l0if3&wchannelid=emyjmwjf79&wmediaid=i8um783bdt',
'info_dict': {
'id': 'md5:922795280019b3a70ca133330a4b0108',
'title': 'Amplify Sessions - Amplitude',
'description': 'md5:3d271bdee219417bb1c35eeb0937b923',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg$',
},
'playlist_count': 3,
},
# Soundcloud embed
{

View file

@ -131,6 +131,20 @@ class WistiaIE(WistiaBaseIE):
'timestamp': 1463607249,
'duration': 4987.11,
},
'skip': 'webpage 404 not found',
}, {
'url': 'wistia:5vd7p4bct5',
'md5': 'b9676d24bf30945d97060638fbfe77f0',
'info_dict': {
'id': '5vd7p4bct5',
'ext': 'bin',
'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679',
'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f',
'upload_date': '20220915',
'timestamp': 1663258727,
'duration': 623.019,
'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$',
},
}, {
'url': 'wistia:sh7fpupwlt',
'only_matching': True,
@ -157,6 +171,8 @@ def _extract_embed_urls(cls, url, webpage):
urls.append('wistia:%s' % match.group('id'))
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
urls.append('wistia:%s' % match.group('id'))
for match in re.finditer(r'(?:wmediaid|wvideo(?:id)?)(?:%5D)?=(?P<id>[a-z0-9]{10})', url):
urls.append('wistia:%s' % match.group('id'))
return urls
@classmethod