[utils] LenientJSONDecoder: Parse unclosed objects

This commit is contained in:
pukkandan 2023-02-24 10:39:43 +05:30
parent da8e2912b1
commit cc09083636
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -593,21 +593,43 @@ def clean_html(html):
class LenientJSONDecoder(json.JSONDecoder): class LenientJSONDecoder(json.JSONDecoder):
def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs): # TODO: Write tests
def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
self.transform_source, self.ignore_extra = transform_source, ignore_extra self.transform_source, self.ignore_extra = transform_source, ignore_extra
self._close_attempts = 2 * close_objects
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@staticmethod
def _close_object(err):
doc = err.doc[:err.pos]
# We need to add comma first to get the correct error message
if err.msg.startswith('Expecting \',\''):
return doc + ','
elif not doc.endswith(','):
return
if err.msg.startswith('Expecting property name'):
return doc[:-1] + '}'
elif err.msg.startswith('Expecting value'):
return doc[:-1] + ']'
def decode(self, s): def decode(self, s):
if self.transform_source: if self.transform_source:
s = self.transform_source(s) s = self.transform_source(s)
try: for attempt in range(self._close_attempts + 1):
if self.ignore_extra: try:
return self.raw_decode(s.lstrip())[0] if self.ignore_extra:
return super().decode(s) return self.raw_decode(s.lstrip())[0]
except json.JSONDecodeError as e: return super().decode(s)
if e.pos is not None: except json.JSONDecodeError as e:
if e.pos is None:
raise
elif attempt < self._close_attempts:
s = self._close_object(e)
if s is not None:
continue
raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos) raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
raise assert False, 'Too many attempts to decode JSON'
def sanitize_open(filename, open_mode): def sanitize_open(filename, open_mode):