[pornhub] Modernize and fix test definition

This commit is contained in:
Philipp Hagemeister 2014-10-27 00:33:35 +01:00
parent 8d32abff9e
commit 249efaf44b
2 changed files with 7 additions and 8 deletions

View file

@ -16,13 +16,14 @@
class PornHubIE(InfoExtractor): class PornHubIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?(?P<url>pornhub\.com/view_video\.php\?viewkey=(?P<videoid>[0-9a-f]+))' _VALID_URL = r'^https?://(?:www\.)?pornhub\.com/view_video\.php\?viewkey=(?P<id>[0-9a-f]+)'
_TEST = { _TEST = {
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'file': '648719015.mp4',
'md5': '882f488fa1f0026f023f33576004a2ed', 'md5': '882f488fa1f0026f023f33576004a2ed',
'info_dict': { 'info_dict': {
"uploader": "BABES-COM", 'id': '648719015',
'ext': 'mp4',
"uploader": "Babes",
"title": "Seductive Indian beauty strips down and fingers her pink pussy", "title": "Seductive Indian beauty strips down and fingers her pink pussy",
"age_limit": 18 "age_limit": 18
} }
@ -35,9 +36,7 @@ def _extract_count(self, pattern, webpage, name):
return count return count
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('videoid')
url = 'http://www.' + mobj.group('url')
req = compat_urllib_request.Request(url) req = compat_urllib_request.Request(url)
req.add_header('Cookie', 'age_verified=1') req.add_header('Cookie', 'age_verified=1')
@ -45,7 +44,7 @@ def _real_extract(self, url):
video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title')
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a href="/users/|<span class="username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False)
if thumbnail: if thumbnail:

View file

@ -81,7 +81,7 @@ class RTLnowIE(InfoExtractor):
'id': '99205', 'id': '99205',
'ext': 'flv', 'ext': 'flv',
'title': 'Medicopter 117 - Angst!', 'title': 'Medicopter 117 - Angst!',
'description': 'md5:895b1df01639b5f61a04fc305a5cb94d', 'description': 're:^Im Therapiezentrum \'Sonnalm\' kommen durch eine Unachtsamkeit die für die B.handlung mit Phobikern gehaltenen Voglespinnen frei\. Eine Ausreißerin',
'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg', 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
'upload_date': '20080928', 'upload_date': '20080928',
'duration': 2691, 'duration': 2691,