mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[googledrive] Fix extraction on Python 3.6
Since Python 3.6, invalid escape sequences are deprecated. It's likely that there are invalid escape sequences somewhere on the webpage, so instead of unescaping the whole webpage, just unescape the URL. See https://bugs.python.org/issue27364. That change was designed for string literals, while it affects the 'unicode_escape' encoding as well. The code path is: str.decode('unicode_escape') codecs.unicode_escape_decode() PyUnicode_DecodeUnicodeEscape()
This commit is contained in:
parent
6ef3e65a7b
commit
e4e50f60b1
2 changed files with 11 additions and 4 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
version <unreleased>
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [googledrive] Fix extraction on Python 3.6
|
||||||
|
|
||||||
|
|
||||||
version 2017.02.04.1
|
version 2017.02.04.1
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
lowercase_escape,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
|
||||||
'md5': '881f7700aec4f538571fa1e0eed4a7b6',
|
'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny.mp4',
|
'title': 'Big Buck Bunny.mp4',
|
||||||
'duration': 46,
|
'duration': 45,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
|
@ -55,7 +56,7 @@ def _extract_url(webpage):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
|
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||||
|
|
||||||
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||||
if reason:
|
if reason:
|
||||||
|
@ -74,7 +75,7 @@ def _real_extract(self, url):
|
||||||
resolution = fmt.split('/')[1]
|
resolution = fmt.split('/')[1]
|
||||||
width, height = resolution.split('x')
|
width, height = resolution.split('x')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': fmt_url,
|
'url': lowercase_escape(fmt_url),
|
||||||
'format_id': fmt_id,
|
'format_id': fmt_id,
|
||||||
'resolution': resolution,
|
'resolution': resolution,
|
||||||
'width': int_or_none(width),
|
'width': int_or_none(width),
|
||||||
|
|
Loading…
Reference in a new issue