[ie] Handle decode errors when reading responses (#10868)

Authored by: bashonly
This commit is contained in:
bashonly 2024-09-13 18:20:17 -05:00 committed by GitHub
parent cc85596d5b
commit 325001317d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -35,6 +35,7 @@
from ..networking.exceptions import ( from ..networking.exceptions import (
HTTPError, HTTPError,
IncompleteRead, IncompleteRead,
TransportError,
network_exceptions, network_exceptions,
) )
from ..networking.impersonate import ImpersonateTarget from ..networking.impersonate import ImpersonateTarget
@ -965,6 +966,9 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
return False return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data) encoding=encoding, data=data)
if content is False:
assert not fatal
return False
return (content, urlh) return (content, urlh)
@staticmethod @staticmethod
@ -1039,7 +1043,15 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None): prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read() try:
webpage_bytes = urlh.read()
except TransportError as err:
errmsg = f'{video_id}: Error reading response: {err.msg}'
if fatal:
raise ExtractorError(errmsg, cause=err)
self.report_warning(errmsg)
return False
if prefix is not None: if prefix is not None:
webpage_bytes = prefix + webpage_bytes webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False): if self.get_param('dump_intermediate_pages', False):