Remove the calls to 'compat_urllib_request.urlopen' in a few extractors

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-12-08 22:24:55 +01:00
parent ac5118bcb9
commit baa7b1978b
5 changed files with 42 additions and 86 deletions

View file

@ -51,8 +51,7 @@ def _real_extract(self, url):
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
urlp = compat_urllib_parse_urlparse(url) urlp = compat_urllib_parse_urlparse(url)
if urlp.path.startswith('/play/'): if urlp.path.startswith('/play/'):
request = compat_urllib_request.Request(url) response = self._request_webpage(url, None, False)
response = compat_urllib_request.urlopen(request)
redirecturl = response.geturl() redirecturl = response.geturl()
rurlp = compat_urllib_parse_urlparse(redirecturl) rurlp = compat_urllib_parse_urlparse(redirecturl)
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
@ -69,25 +68,23 @@ def _real_extract(self, url):
request.add_header('User-Agent', 'iTunes/10.6.1') request.add_header('User-Agent', 'iTunes/10.6.1')
self.report_extraction(mobj.group(1)) self.report_extraction(mobj.group(1))
info = None info = None
try: urlh = self._request_webpage(request, None, False,
urlh = compat_urllib_request.urlopen(request) u'unable to download video info webpage')
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
basename = url.split('/')[-1] basename = url.split('/')[-1]
title,ext = os.path.splitext(basename) title,ext = os.path.splitext(basename)
title = title.decode('UTF-8') title = title.decode('UTF-8')
ext = ext.replace('.', '') ext = ext.replace('.', '')
self.report_direct_download(title) self.report_direct_download(title)
info = { info = {
'id': title, 'id': title,
'url': url, 'url': url,
'uploader': None, 'uploader': None,
'upload_date': None, 'upload_date': None,
'title': title, 'title': title,
'ext': ext, 'ext': ext,
'urlhandle': urlh 'urlhandle': urlh
} }
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
if info is None: # Regular URL if info is None: # Regular URL
try: try:
json_code_bytes = urlh.read() json_code_bytes = urlh.read()

View file

@ -1,11 +1,8 @@
import re import re
import socket
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_parse_qs, compat_parse_qs,
compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_str, compat_str,
@ -93,12 +90,8 @@ def report_disclaimer(self):
def _real_initialize(self): def _real_initialize(self):
# Retrieve disclaimer # Retrieve disclaimer
request = compat_urllib_request.Request(self._DISCLAIMER) self.report_disclaimer()
try: self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
self.report_disclaimer()
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
# Confirm age # Confirm age
disclaimer_form = { disclaimer_form = {
@ -107,11 +100,8 @@ def _real_initialize(self):
} }
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.add_header('Content-Type', 'application/x-www-form-urlencoded')
try: self.report_age_confirmation()
self.report_age_confirmation() self._download_webpage(request, None, False, u'Unable to confirm age')
compat_urllib_request.urlopen(request).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
def _real_extract(self, url): def _real_extract(self, url):
# Extract id and simplified title from URL # Extract id and simplified title from URL

View file

@ -1,13 +1,10 @@
import json import json
import re import re
import socket
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
unified_strdate, unified_strdate,
ExtractorError,
) )
@ -31,9 +28,11 @@ def check_urls(self, url_list):
"""Returns 1st active url from list""" """Returns 1st active url from list"""
for url in url_list: for url in url_list:
try: try:
compat_urllib_request.urlopen(url) # We only want to know if the request succeed
# don't download the whole file
self._request_webpage(url, None, False)
return url return url
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error): except ExtractorError:
url = None url = None
return None return None

View file

@ -1,13 +1,8 @@
import re import re
import socket
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_http_client,
compat_str, compat_str,
compat_urllib_error,
compat_urllib_request,
ExtractorError, ExtractorError,
orderedSet, orderedSet,
@ -45,11 +40,7 @@ def _real_extract(self, url):
self.report_extraction(info['id']) self.report_extraction(info['id'])
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
xmlUrl = baseUrl + video + '.xml' xmlUrl = baseUrl + video + '.xml'
try: mdoc = self._download_xml(xmlUrl, info['id'])
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
mdoc = xml.etree.ElementTree.fromstring(metaXml)
try: try:
info['title'] = mdoc.findall('./title')[0].text info['title'] = mdoc.findall('./title')[0].text
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
@ -95,12 +86,9 @@ def _real_extract(self, url):
'upload_date': None, 'upload_date': None,
} }
self.report_download_webpage(info['id'])
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
try: rootpage = self._download_webpage(rootURL, info['id'],
rootpage = compat_urllib_request.urlopen(rootURL).read() errnote=u'Unable to download course info page')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
info['title'] = info['id'] info['title'] = info['id']

View file

@ -7,7 +7,6 @@
import json import json
import os.path import os.path
import re import re
import socket
import string import string
import struct import struct
import traceback import traceback
@ -17,9 +16,7 @@
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import ( from ..utils import (
compat_chr, compat_chr,
compat_http_client,
compat_parse_qs, compat_parse_qs,
compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
compat_urlparse, compat_urlparse,
@ -53,9 +50,9 @@ def _set_language(self):
request = compat_urllib_request.Request(self._LANG_URL) request = compat_urllib_request.Request(self._LANG_URL)
try: try:
self.report_lang() self.report_lang()
compat_urllib_request.urlopen(request).read() self._download_webpage(self._LANG_URL, None, False)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except ExtractorError as err:
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
return False return False
return True return True
@ -67,12 +64,8 @@ def _login(self):
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return False return False
request = compat_urllib_request.Request(self._LOGIN_URL) login_page = self._download_webpage(self._LOGIN_URL, None, False,
try: u'Unable to fetch login page')
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
return False
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"', galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
login_page, u'Login GALX parameter') login_page, u'Login GALX parameter')
@ -105,12 +98,12 @@ def _login(self):
request = compat_urllib_request.Request(self._LOGIN_URL, login_data) request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
try: try:
self.report_login() self.report_login()
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') login_results = self._download_webpage(request, None, False)
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
self._downloader.report_warning(u'unable to log in: bad username or password') self._downloader.report_warning(u'unable to log in: bad username or password')
return False return False
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except ExtractorError as err:
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
return False return False
return True return True
@ -120,11 +113,8 @@ def _confirm_age(self):
'action_confirm': 'Confirm', 'action_confirm': 'Confirm',
} }
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
try: self.report_age_confirmation()
self.report_age_confirmation() self._download_webpage(request, None, False, u'Unable to confirm age')
compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
return True return True
def _real_initialize(self): def _real_initialize(self):
@ -1737,10 +1727,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
IE_NAME = u'youtube:search' IE_NAME = u'youtube:search'
_SEARCH_KEY = 'ytsearch' _SEARCH_KEY = 'ytsearch'
def report_download_page(self, query, pagenum):
"""Report attempt to download search page with given number."""
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
@ -1749,13 +1735,9 @@ def _get_n_results(self, query, n):
limit = n limit = n
while (50 * pagenum) < limit: while (50 * pagenum) < limit:
self.report_download_page(query, pagenum+1)
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
request = compat_urllib_request.Request(result_url) data = self._download_webpage(result_url, u'query "%s"' % query,
try: u'Downloading page %s' % pagenum, u'Unable to download API page')
data = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
api_response = json.loads(data)['data'] api_response = json.loads(data)['data']
if not 'items' in api_response: if not 'items' in api_response: