harden regex with lookbehind

This commit is contained in:
siikamiika 2020-08-11 00:13:43 +03:00
parent eaedbfd97e
commit 15eae44d74
2 changed files with 4 additions and 4 deletions

View file

@ -28,8 +28,8 @@ def dl_fragment(url):
return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data):
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);'
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);'
window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
for patt in window_patt, var_patt:
try:
raw_json = re.search(patt, data).group(1)

View file

@ -1495,8 +1495,8 @@ def _get_ytplayer_config(self, video_id, webpage):
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
(r'window\["ytInitialData"\]\s*=\s*(.*);',
r'var\s+ytInitialData\s*=\s*(.*?);'),
(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(