mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[pornhub] Extract cast
Closes #406, https://github.com/ytdl-org/youtube-dl/pull/27384
This commit is contained in:
parent
3fd4c2a543
commit
d0fb4bd16f
2 changed files with 5 additions and 1 deletions
|
@ -290,6 +290,7 @@ class InfoExtractor(object):
|
||||||
categories: A list of categories that the video falls in, for example
|
categories: A list of categories that the video falls in, for example
|
||||||
["Sports", "Berlin"]
|
["Sports", "Berlin"]
|
||||||
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
|
||||||
|
cast: A list of the video cast
|
||||||
is_live: True, False, or None (=unknown). Whether this video is a
|
is_live: True, False, or None (=unknown). Whether this video is a
|
||||||
live stream that goes on instead of a fixed-length video.
|
live stream that goes on instead of a fixed-length video.
|
||||||
was_live: True, False, or None (=unknown). Whether this video was
|
was_live: True, False, or None (=unknown). Whether this video was
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
)
|
)
|
||||||
from .openload import PhantomJSwrapper
|
from .openload import PhantomJSwrapper
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -145,6 +146,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'cast': list,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# non-ASCII title
|
# non-ASCII title
|
||||||
|
@ -464,7 +466,7 @@ def extract_list(meta_key):
|
||||||
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||||
% meta_key, webpage, meta_key, default=None)
|
% meta_key, webpage, meta_key, default=None)
|
||||||
if div:
|
if div:
|
||||||
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
# description provided in JSON-LD is irrelevant
|
# description provided in JSON-LD is irrelevant
|
||||||
|
@ -485,6 +487,7 @@ def extract_list(meta_key):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': extract_list('tags'),
|
'tags': extract_list('tags'),
|
||||||
'categories': extract_list('categories'),
|
'categories': extract_list('categories'),
|
||||||
|
'cast': extract_list('pornstars'),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}, info)
|
}, info)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue