[Instagram] Add story/highlight extractor (#2006)

Fixes https://github.com/ytdl-org/youtube-dl/issues/25575
Authored by: u-spec-png
This commit is contained in:
u-spec-png 2021-12-28 18:58:06 +00:00 committed by GitHub
parent e540c56f39
commit dd5e60b15d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 75 additions and 0 deletions

View file

@ -621,6 +621,7 @@
InstagramIOSIE, InstagramIOSIE,
InstagramUserIE, InstagramUserIE,
InstagramTagIE, InstagramTagIE,
InstagramStoryIE,
) )
from .internazionale import InternazionaleIE from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE from .internetvideoarchive import InternetVideoArchiveIE

View file

@ -542,3 +542,77 @@ def _query_vars_for(data):
'tag_name': 'tag_name':
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
} }
class InstagramStoryIE(InstagramBaseIE):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
IE_NAME = 'instagram:story'
_TESTS = [{
'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
'info_dict': {
'id': '18090946048123978',
'title': 'Rare',
},
'playlist_mincount': 50
}]
def _real_extract(self, url):
username, story_id = self._match_valid_url(url).groups()
story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
'X-IG-App-ID': 936619743392459,
'X-ASBD-ID': 198387,
'X-IG-WWW-Claim': 0,
'X-Requested-With': 'XMLHttpRequest',
'Referer': url,
})
user_id = story_info['user']['id']
highlight_title = traverse_obj(story_info, ('highlight', 'title'))
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
'X-IG-App-ID': 936619743392459,
'X-ASBD-ID': 198387,
'X-IG-WWW-Claim': 0,
})['reels']
entites = []
videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
for video_info in videos:
formats = []
if isinstance(video_info, list):
video_info = video_info[0]
vcodec = video_info.get('video_codec')
dash_manifest_raw = video_info.get('video_dash_manifest')
videos_list = video_info.get('video_versions')
if not (dash_manifest_raw or videos_list):
continue
for format in videos_list:
formats.append({
'url': format.get('url'),
'width': format.get('width'),
'height': format.get('height'),
'vcodec': vcodec,
})
if dash_manifest_raw:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
self._sort_formats(formats)
thumbnails = [{
'url': thumbnail.get('url'),
'width': thumbnail.get('width'),
'height': thumbnail.get('height')
} for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
entites.append({
'id': video_info.get('id'),
'title': f'Story by {username}',
'timestamp': int_or_none(video_info.get('taken_at')),
'uploader': traverse_obj(videos, ('user', 'full_name')),
'duration': float_or_none(video_info.get('video_duration')),
'uploader_id': user_id,
'thumbnails': thumbnails,
'formats': formats,
})
return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)