[extractor] Passthrough errnote=False to parsing

This commit is contained in:
pukkandan 2022-07-15 16:05:00 +05:30
parent 5f2da312fa
commit 6edf28081f
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -929,39 +929,37 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
return content return content
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): def __print_error(self, errnote, fatal, video_id, err):
if fatal:
raise ExtractorError(f'{video_id}: {errnote} ', cause=err)
elif errnote:
self.report_warning(f'{video_id}: {errnote} {err}')
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None):
if transform_source: if transform_source:
xml_string = transform_source(xml_string) xml_string = transform_source(xml_string)
try: try:
return compat_etree_fromstring(xml_string.encode('utf-8')) return compat_etree_fromstring(xml_string.encode('utf-8'))
except xml.etree.ElementTree.ParseError as ve: except xml.etree.ElementTree.ParseError as ve:
errmsg = '%s: Failed to parse XML ' % video_id self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve)
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(errmsg + str(ve))
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs): def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs):
try: try:
return json.loads( return json.loads(
json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs) json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
except ValueError as ve: except ValueError as ve:
errmsg = f'{video_id}: Failed to parse JSON' self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve)
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(f'{errmsg}: {ve}')
def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True): def _parse_socket_response_as_json(self, data, *args, **kwargs):
return self._parse_json( return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs)
data[data.find('{'):data.rfind('}') + 1],
video_id, transform_source, fatal)
def __create_download_methods(name, parser, note, errnote, return_value): def __create_download_methods(name, parser, note, errnote, return_value):
def parse(ie, content, *args, **kwargs): def parse(ie, content, *args, errnote=errnote, **kwargs):
if parser is None: if parser is None:
return content return content
if errnote is False:
kwargs['errnote'] = errnote
# parser is fetched by name so subclasses can override it # parser is fetched by name so subclasses can override it
return getattr(ie, parser)(content, *args, **kwargs) return getattr(ie, parser)(content, *args, **kwargs)
@ -973,7 +971,7 @@ def download_handle(self, url_or_request, video_id, note=note, errnote=errnote,
if res is False: if res is False:
return res return res
content, urlh = res content, urlh = res
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None, def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@ -988,7 +986,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
self.report_warning(f'Unable to load request from disk: {e}') self.report_warning(f'Unable to load request from disk: {e}')
else: else:
content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers) content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
return parse(self, content, video_id, transform_source, fatal) return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote)
kwargs = { kwargs = {
'note': note, 'note': note,
'errnote': errnote, 'errnote': errnote,