[bbc] Fix extraction of news articles (#2811)

Closes #1374

Authored by: ajj8
This commit is contained in:
ajj8 2022-02-17 15:54:53 +00:00 committed by GitHub
parent 5c10453827
commit ac184ab742
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1171,9 +1171,9 @@ def _real_extract(self, url):
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)
initial_data = self._parse_json(self._search_regex( initial_data = self._parse_json(self._parse_json(self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage, r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
'preload state', default='{}'), playlist_id, fatal=False) 'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
if initial_data: if initial_data:
def parse_media(media): def parse_media(media):
if not media: if not media:
@ -1214,7 +1214,7 @@ def parse_media(media):
if name == 'media-experience': if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict)) parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article': elif name == 'article':
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []): for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
if block.get('type') != 'media': if block.get('type') != 'media':
continue continue
parse_media(block.get('model')) parse_media(block.get('model'))