mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[porncom] Extract categories and tags (Closes #10510)
This commit is contained in:
parent
196c6ba067
commit
7a3e849f6e
1 changed files with 12 additions and 1 deletions
|
@ -26,6 +26,8 @@ class PornComIE(InfoExtractor):
|
|||
'duration': 551,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
|
||||
|
@ -75,7 +77,14 @@ def _real_extract(self, url):
|
|||
self._sort_formats(formats)
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
|
||||
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage,
|
||||
'view count', fatal=False))
|
||||
|
||||
def extract_list(kind):
|
||||
s = self._search_regex(
|
||||
r'(?s)<p[^>]*>%s:(.+?)</p>' % kind.capitalize(),
|
||||
webpage, kind, fatal=False)
|
||||
return re.findall(r'<a[^>]+>([^<]+)</a>', s or '')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -86,4 +95,6 @@ def _real_extract(self, url):
|
|||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'categories': extract_list('categories'),
|
||||
'tags': extract_list('tags'),
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue