[extractor/livestream] Support videos with account id (#6324)

Authored by: theperfectpunk
Closes #2225
This commit is contained in:
Mohit Tokas 2023-05-29 10:16:32 +05:30 committed by GitHub
parent c6d3f81a40
commit bfdf144c7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,33 +1,36 @@
import re
import itertools import itertools
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str, compat_urlparse
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
find_xpath_attr,
xpath_attr,
xpath_with_ns,
xpath_text,
orderedSet,
update_url_query,
int_or_none,
float_or_none,
parse_iso8601,
determine_ext, determine_ext,
find_xpath_attr,
float_or_none,
int_or_none,
orderedSet,
parse_iso8601,
traverse_obj,
update_url_query,
xpath_attr,
xpath_text,
xpath_with_ns,
) )
class LivestreamIE(InfoExtractor): class LivestreamIE(InfoExtractor):
IE_NAME = 'livestream' IE_NAME = 'livestream'
_VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?' _VALID_URL = r'''(?x)
https?://(?:new\.)?livestream\.com/
(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))
(?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))?
(?:/videos/(?P<id>\d+))?
'''
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"'] _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"']
_TESTS = [{ _TESTS = [{
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
'md5': '53274c76ba7754fb0e8d072716f2292b', 'md5': '7876c5f5dc3e711b6b73acce4aac1527',
'info_dict': { 'info_dict': {
'id': '4719370', 'id': '4719370',
'ext': 'mp4', 'ext': 'mp4',
@ -37,22 +40,37 @@ class LivestreamIE(InfoExtractor):
'duration': 5968.0, 'duration': 5968.0,
'like_count': int, 'like_count': int,
'view_count': int, 'view_count': int,
'comment_count': int,
'thumbnail': r're:^http://.*\.jpg$' 'thumbnail': r're:^http://.*\.jpg$'
} }
}, { }, {
'url': 'http://new.livestream.com/tedx/cityenglish', 'url': 'https://livestream.com/coheedandcambria/websterhall',
'info_dict': { 'info_dict': {
'title': 'TEDCity2.0 (English)', 'id': '1585861',
'id': '2245590', 'title': 'Live From Webster Hall'
},
'playlist_mincount': 1,
}, {
'url': 'https://livestream.com/dayananda/events/7954027',
'info_dict': {
'title': 'Live from Mevo',
'id': '7954027',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, { }, {
'url': 'http://new.livestream.com/chess24/tatasteelchess', 'url': 'https://livestream.com/accounts/82',
'info_dict': { 'info_dict': {
'title': 'Tata Steel Chess', 'id': '253978',
'id': '3705884', 'view_count': int,
}, 'title': 'trsr',
'playlist_mincount': 60, 'comment_count': int,
'like_count': int,
'upload_date': '20120306',
'timestamp': 1331042383,
'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg',
'duration': 15.332,
'ext': 'mp4'
}
}, { }, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True, 'only_matching': True,
@ -179,7 +197,7 @@ def _extract_stream_info(self, stream_info):
'is_live': is_live, 'is_live': is_live,
} }
def _extract_event(self, event_data): def _generate_event_playlist(self, event_data):
event_id = compat_str(event_data['id']) event_id = compat_str(event_data['id'])
account_id = compat_str(event_data['owner_account_id']) account_id = compat_str(event_data['owner_account_id'])
feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json' feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json'
@ -189,7 +207,6 @@ def _extract_event(self, event_data):
return self._extract_stream_info(stream_info) return self._extract_stream_info(stream_info)
last_video = None last_video = None
entries = []
for i in itertools.count(1): for i in itertools.count(1):
if last_video is None: if last_video is None:
info_url = feed_root_url info_url = feed_root_url
@ -197,31 +214,38 @@ def _extract_event(self, event_data):
info_url = '{root}?&id={id}&newer=-1&type=video'.format( info_url = '{root}?&id={id}&newer=-1&type=video'.format(
root=feed_root_url, id=last_video) root=feed_root_url, id=last_video)
videos_info = self._download_json( videos_info = self._download_json(
info_url, event_id, 'Downloading page {0}'.format(i))['data'] info_url, event_id, f'Downloading page {i}')['data']
videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
if not videos_info: if not videos_info:
break break
for v in videos_info: for v in videos_info:
v_id = compat_str(v['id']) v_id = compat_str(v['id'])
entries.append(self.url_result( yield self.url_result(
'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id), f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}',
'Livestream', v_id, v.get('caption'))) LivestreamIE, v_id, v.get('caption'))
last_video = videos_info[-1]['id'] last_video = videos_info[-1]['id']
return self.playlist_result(entries, event_id, event_data['full_name'])
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
event = mobj.group('event_id') or mobj.group('event_name') event = mobj.group('event_id') or mobj.group('event_name')
account = mobj.group('account_id') or mobj.group('account_name') account = mobj.group('account_id') or mobj.group('account_name')
api_url = self._API_URL_TEMPLATE % (account, event) api_url = f'http://livestream.com/api/accounts/{account}'
if video_id: if video_id:
video_data = self._download_json( video_data = self._download_json(
api_url + '/videos/%s' % video_id, video_id) f'{api_url}/events/{event}/videos/{video_id}', video_id)
return self._extract_video_info(video_data) return self._extract_video_info(video_data)
else: elif event:
event_data = self._download_json(api_url, video_id) event_data = self._download_json(f'{api_url}/events/{event}', None)
return self._extract_event(event_data) return self.playlist_result(
self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name'])
account_data = self._download_json(api_url, None)
items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...))
return self.playlist_result(
itertools.chain.from_iterable(map(self._generate_event_playlist, items)),
account_data.get('id'), account_data.get('full_name'))
# The original version of Livestream uses a different system # The original version of Livestream uses a different system