mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[generic] Automatic detection of flow player and age_limit (Fixes #3576)
This commit is contained in:
parent
24e5e24166
commit
4d805e063c
1 changed files with 32 additions and 0 deletions
|
@ -331,6 +331,18 @@ class GenericIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
|
||||
}
|
||||
},
|
||||
# Flowplayer
|
||||
{
|
||||
'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
|
||||
'md5': '9d65602bf31c6e20014319c7d07fba27',
|
||||
'info_dict': {
|
||||
'id': '5123ea6d5e5a7',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'uploader': 'www.handjobhub.com',
|
||||
'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -570,6 +582,16 @@ def _real_extract(self, url):
|
|||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||
default='video')
|
||||
|
||||
# Try to detect age limit automatically
|
||||
age_limit = self._rta_search(webpage)
|
||||
# And then there are the jokers who advertise that they use RTA,
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www.rtalabel.org/" title="Restricted to Adults">RTA</a>',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
|
||||
|
@ -833,6 +855,15 @@ def _playlist_from_matches(matches, getter, ie=None):
|
|||
if not found:
|
||||
# Broaden the findall a little bit: JWPlayer JS loader
|
||||
found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
|
||||
if not found:
|
||||
# Flow player
|
||||
found = re.findall(r'''(?xs)
|
||||
flowplayer\("[^"]+",\s*
|
||||
\{[^}]+?\}\s*,
|
||||
\s*{[^}]+? ["']?clip["']?\s*:\s*\{\s*
|
||||
["']?url["']?\s*:\s*["']([^"']+)["']
|
||||
''', webpage)
|
||||
assert found
|
||||
if not found:
|
||||
# Try to find twitter cards info
|
||||
found = re.findall(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)
|
||||
|
@ -884,6 +915,7 @@ def check_video(vurl):
|
|||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
|
|
Loading…
Reference in a new issue