Merge pull request #126 from nao20010128nao/master

[instagram] Fix extractor
This commit is contained in:
Tom-Oliver Heidel 2020-11-30 02:50:39 +01:00 committed by GitHub
commit 929576bb9e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -126,16 +126,23 @@ def _real_extract(self, url):
uploader_id, like_count, comment_count, comments, height,
width) = [None] * 11
shared_data = self._parse_json(
self._search_regex(
r'window\._sharedData\s*=\s*({.+?});',
webpage, 'shared data', default='{}'),
video_id, fatal=False)
shared_data = try_get(webpage,
(lambda x: self._parse_json(
self._search_regex(
r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
x, 'additional data', default='{}'),
video_id, fatal=False),
lambda x: self._parse_json(
self._search_regex(
r'window\._sharedData\s*=\s*({.+?});',
x, 'shared data', default='{}'),
video_id, fatal=False)['entry_data']['PostPage'][0]),
None)
if shared_data:
media = try_get(
shared_data,
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
lambda x: x['entry_data']['PostPage'][0]['media']),
(lambda x: x['graphql']['shortcode_media'],
lambda x: x['media']),
dict)
if media:
video_url = media.get('video_url')
@ -144,7 +151,7 @@ def _real_extract(self, url):
description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
thumbnail = media.get('display_src')
thumbnail = media.get('display_src') or media.get('thumbnail_src')
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username')