[aparat] Add support (Fixes #2012)

This commit is contained in:
Philipp Hagemeister 2013-12-20 17:05:28 +01:00
parent 768df74538
commit aa94a6d315
5 changed files with 73 additions and 6 deletions

View file

@ -1,6 +1,7 @@
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .aparat import AparatIE
from .appletrailers import AppleTrailersIE from .appletrailers import AppleTrailersIE
from .archiveorg import ArchiveOrgIE from .archiveorg import ArchiveOrgIE
from .ard import ARDIE from .ard import ARDIE

View file

@ -0,0 +1,56 @@
#coding: utf-8
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
HEADRequest,
)
class AparatIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
_TEST = {
u'url': u'http://www.aparat.com/v/wP8On',
u'file': u'wP8On.mp4',
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
u'info_dict': {
u"title": u"تیم گلکسی 11 - زومیت",
},
#u'skip': u'Extremely unreliable',
}
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
video_id + u'/vt/frame')
webpage = self._download_webpage(embed_url, video_id)
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
for i, video_url in enumerate(video_urls):
req = HEADRequest(video_url)
res = self._request_webpage(
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
if res:
break
else:
raise ExtractorError(u'No working video URLs found')
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
thumbnail = self._search_regex(
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
'thumbnail': thumbnail,
}

View file

@ -170,6 +170,8 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
try: try:
return self._downloader.urlopen(url_or_request) return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if errnote is False:
return False
if errnote is None: if errnote is None:
errnote = u'Unable to download webpage' errnote = u'Unable to download webpage'
errmsg = u'%s: %s' % (errnote, compat_str(err)) errmsg = u'%s: %s' % (errnote, compat_str(err))

View file

@ -11,6 +11,7 @@
compat_urlparse, compat_urlparse,
ExtractorError, ExtractorError,
HEADRequest,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -109,21 +110,18 @@ def report_following_redirect(self, new_url):
def _send_head(self, url): def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url.""" """Check if it is a redirect, like url shorteners, in case return the new url."""
class HeadRequest(compat_urllib_request.Request):
def get_method(self):
return "HEAD"
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler): class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
""" """
Subclass the HTTPRedirectHandler to make it use our Subclass the HTTPRedirectHandler to make it use our
HeadRequest also on the redirected URL HEADRequest also on the redirected URL
""" """
def redirect_request(self, req, fp, code, msg, headers, newurl): def redirect_request(self, req, fp, code, msg, headers, newurl):
if code in (301, 302, 303, 307): if code in (301, 302, 303, 307):
newurl = newurl.replace(' ', '%20') newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items() newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type")) if k.lower() not in ("content-length", "content-type"))
return HeadRequest(newurl, return HEADRequest(newurl,
headers=newheaders, headers=newheaders,
origin_req_host=req.get_origin_req_host(), origin_req_host=req.get_origin_req_host(),
unverifiable=True) unverifiable=True)
@ -152,7 +150,7 @@ def http_error_405(self, req, fp, code, msg, headers):
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler()) opener.add_handler(handler())
response = opener.open(HeadRequest(url)) response = opener.open(HEADRequest(url))
if response is None: if response is None:
raise ExtractorError(u'Invalid URL protocol') raise ExtractorError(u'Invalid URL protocol')
return response return response
@ -296,6 +294,11 @@ def _real_extract(self, url):
if mobj is not None: if mobj is not None:
return OoyalaIE._build_url_result(mobj.group(1)) return OoyalaIE._build_url_result(mobj.group(1))
# Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
if mobj is not None:
return self.url_result(mobj.group(1), 'Aparat')
# Start with something easy: JW Player in SWFObject # Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: if mobj is None:

View file

@ -1093,3 +1093,8 @@ def remove_start(s, start):
def url_basename(url): def url_basename(url):
path = compat_urlparse.urlparse(url).path path = compat_urlparse.urlparse(url).path
return path.strip(u'/').split(u'/')[-1] return path.strip(u'/').split(u'/')[-1]
class HEADRequest(compat_urllib_request.Request):
def get_method(self):
return "HEAD"