mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
Remove the calls to 'compat_urllib_request.urlopen' in a few extractors
This commit is contained in:
parent
ac5118bcb9
commit
baa7b1978b
5 changed files with 42 additions and 86 deletions
|
@ -51,8 +51,7 @@ def _real_extract(self, url):
|
||||||
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
|
||||||
urlp = compat_urllib_parse_urlparse(url)
|
urlp = compat_urllib_parse_urlparse(url)
|
||||||
if urlp.path.startswith('/play/'):
|
if urlp.path.startswith('/play/'):
|
||||||
request = compat_urllib_request.Request(url)
|
response = self._request_webpage(url, None, False)
|
||||||
response = compat_urllib_request.urlopen(request)
|
|
||||||
redirecturl = response.geturl()
|
redirecturl = response.geturl()
|
||||||
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
rurlp = compat_urllib_parse_urlparse(redirecturl)
|
||||||
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
|
||||||
|
@ -69,25 +68,23 @@ def _real_extract(self, url):
|
||||||
request.add_header('User-Agent', 'iTunes/10.6.1')
|
request.add_header('User-Agent', 'iTunes/10.6.1')
|
||||||
self.report_extraction(mobj.group(1))
|
self.report_extraction(mobj.group(1))
|
||||||
info = None
|
info = None
|
||||||
try:
|
urlh = self._request_webpage(request, None, False,
|
||||||
urlh = compat_urllib_request.urlopen(request)
|
u'unable to download video info webpage')
|
||||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||||
basename = url.split('/')[-1]
|
basename = url.split('/')[-1]
|
||||||
title,ext = os.path.splitext(basename)
|
title,ext = os.path.splitext(basename)
|
||||||
title = title.decode('UTF-8')
|
title = title.decode('UTF-8')
|
||||||
ext = ext.replace('.', '')
|
ext = ext.replace('.', '')
|
||||||
self.report_direct_download(title)
|
self.report_direct_download(title)
|
||||||
info = {
|
info = {
|
||||||
'id': title,
|
'id': title,
|
||||||
'url': url,
|
'url': url,
|
||||||
'uploader': None,
|
'uploader': None,
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'urlhandle': urlh
|
'urlhandle': urlh
|
||||||
}
|
}
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
|
|
||||||
if info is None: # Regular URL
|
if info is None: # Regular URL
|
||||||
try:
|
try:
|
||||||
json_code_bytes = urlh.read()
|
json_code_bytes = urlh.read()
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
@ -93,12 +90,8 @@ def report_disclaimer(self):
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
# Retrieve disclaimer
|
# Retrieve disclaimer
|
||||||
request = compat_urllib_request.Request(self._DISCLAIMER)
|
self.report_disclaimer()
|
||||||
try:
|
self._download_webpage(self._DISCLAIMER, None, False, u'Unable to retrieve disclaimer')
|
||||||
self.report_disclaimer()
|
|
||||||
compat_urllib_request.urlopen(request).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to retrieve disclaimer: %s' % compat_str(err))
|
|
||||||
|
|
||||||
# Confirm age
|
# Confirm age
|
||||||
disclaimer_form = {
|
disclaimer_form = {
|
||||||
|
@ -107,11 +100,8 @@ def _real_initialize(self):
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
self.report_age_confirmation()
|
||||||
self.report_age_confirmation()
|
self._download_webpage(request, None, False, u'Unable to confirm age')
|
||||||
compat_urllib_request.urlopen(request).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract id and simplified title from URL
|
# Extract id and simplified title from URL
|
||||||
|
|
|
@ -1,13 +1,10 @@
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,9 +28,11 @@ def check_urls(self, url_list):
|
||||||
"""Returns 1st active url from list"""
|
"""Returns 1st active url from list"""
|
||||||
for url in url_list:
|
for url in url_list:
|
||||||
try:
|
try:
|
||||||
compat_urllib_request.urlopen(url)
|
# We only want to know if the request succeed
|
||||||
|
# don't download the whole file
|
||||||
|
self._request_webpage(url, None, False)
|
||||||
return url
|
return url
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error):
|
except ExtractorError:
|
||||||
url = None
|
url = None
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,13 +1,8 @@
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_request,
|
|
||||||
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
@ -45,11 +40,7 @@ def _real_extract(self, url):
|
||||||
self.report_extraction(info['id'])
|
self.report_extraction(info['id'])
|
||||||
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
|
||||||
xmlUrl = baseUrl + video + '.xml'
|
xmlUrl = baseUrl + video + '.xml'
|
||||||
try:
|
mdoc = self._download_xml(xmlUrl, info['id'])
|
||||||
metaXml = compat_urllib_request.urlopen(xmlUrl).read()
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to download video info XML: %s' % compat_str(err))
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
|
||||||
try:
|
try:
|
||||||
info['title'] = mdoc.findall('./title')[0].text
|
info['title'] = mdoc.findall('./title')[0].text
|
||||||
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
|
||||||
|
@ -95,12 +86,9 @@ def _real_extract(self, url):
|
||||||
'upload_date': None,
|
'upload_date': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
self.report_download_webpage(info['id'])
|
|
||||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||||
try:
|
rootpage = self._download_webpage(rootURL, info['id'],
|
||||||
rootpage = compat_urllib_request.urlopen(rootURL).read()
|
errnote=u'Unable to download course info page')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to download course info page: ' + compat_str(err))
|
|
||||||
|
|
||||||
info['title'] = info['id']
|
info['title'] = info['id']
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import socket
|
|
||||||
import string
|
import string
|
||||||
import struct
|
import struct
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -17,9 +16,7 @@
|
||||||
from .subtitles import SubtitlesInfoExtractor
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_http_client,
|
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
|
@ -53,9 +50,9 @@ def _set_language(self):
|
||||||
request = compat_urllib_request.Request(self._LANG_URL)
|
request = compat_urllib_request.Request(self._LANG_URL)
|
||||||
try:
|
try:
|
||||||
self.report_lang()
|
self.report_lang()
|
||||||
compat_urllib_request.urlopen(request).read()
|
self._download_webpage(self._LANG_URL, None, False)
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
|
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err.cause))
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -67,12 +64,8 @@ def _login(self):
|
||||||
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL)
|
login_page = self._download_webpage(self._LOGIN_URL, None, False,
|
||||||
try:
|
u'Unable to fetch login page')
|
||||||
login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
|
|
||||||
return False
|
|
||||||
|
|
||||||
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
|
galx = self._search_regex(r'(?s)<input.+?name="GALX".+?value="(.+?)"',
|
||||||
login_page, u'Login GALX parameter')
|
login_page, u'Login GALX parameter')
|
||||||
|
@ -105,12 +98,12 @@ def _login(self):
|
||||||
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
|
||||||
try:
|
try:
|
||||||
self.report_login()
|
self.report_login()
|
||||||
login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
login_results = self._download_webpage(request, None, False)
|
||||||
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
|
||||||
self._downloader.report_warning(u'unable to log in: bad username or password')
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
return False
|
return False
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except ExtractorError as err:
|
||||||
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
|
self._downloader.report_warning(u'unable to log in: %s' % compat_str(err.cause))
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -120,11 +113,8 @@ def _confirm_age(self):
|
||||||
'action_confirm': 'Confirm',
|
'action_confirm': 'Confirm',
|
||||||
}
|
}
|
||||||
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
|
||||||
try:
|
self.report_age_confirmation()
|
||||||
self.report_age_confirmation()
|
self._download_webpage(request, None, False, u'Unable to confirm age')
|
||||||
compat_urllib_request.urlopen(request).read().decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
@ -1737,10 +1727,6 @@ class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_NAME = u'youtube:search'
|
IE_NAME = u'youtube:search'
|
||||||
_SEARCH_KEY = 'ytsearch'
|
_SEARCH_KEY = 'ytsearch'
|
||||||
|
|
||||||
def report_download_page(self, query, pagenum):
|
|
||||||
"""Report attempt to download search page with given number."""
|
|
||||||
self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
|
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
def _get_n_results(self, query, n):
|
||||||
"""Get a specified number of results for a query"""
|
"""Get a specified number of results for a query"""
|
||||||
|
|
||||||
|
@ -1749,13 +1735,9 @@ def _get_n_results(self, query, n):
|
||||||
limit = n
|
limit = n
|
||||||
|
|
||||||
while (50 * pagenum) < limit:
|
while (50 * pagenum) < limit:
|
||||||
self.report_download_page(query, pagenum+1)
|
|
||||||
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
|
||||||
request = compat_urllib_request.Request(result_url)
|
data = self._download_webpage(result_url, u'query "%s"' % query,
|
||||||
try:
|
u'Downloading page %s' % pagenum, u'Unable to download API page')
|
||||||
data = compat_urllib_request.urlopen(request).read().decode('utf-8')
|
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
|
||||||
raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
|
|
||||||
api_response = json.loads(data)['data']
|
api_response = json.loads(data)['data']
|
||||||
|
|
||||||
if not 'items' in api_response:
|
if not 'items' in api_response:
|
||||||
|
|
Loading…
Reference in a new issue