[youtube_live_chat] Support ongoing live chat (#422)

Authored by: siikamiika
This commit is contained in:
siikamiika 2021-06-23 03:12:39 +03:00 committed by GitHub
parent 8a77e5e6bc
commit c60ee3a218
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 123 additions and 41 deletions

View file

@ -25,7 +25,7 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatReplayFD
from .youtube_live_chat import YoutubeLiveChatFD
from .external import (
get_external_downloader,
FFmpegFD,
@ -44,7 +44,8 @@ def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD,
}

View file

@ -1,20 +1,23 @@
from __future__ import division, unicode_literals
import json
import time
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import (
try_get,
dict_get,
int_or_none,
RegexNotFoundError,
)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
class YoutubeLiveChatReplayFD(FragmentFD):
""" Downloads YouTube live chat replays fragment by fragment """
class YoutubeLiveChatFD(FragmentFD):
""" Downloads YouTube live chats fragment by fragment """
FD_NAME = 'youtube_live_chat_replay'
FD_NAME = 'youtube_live_chat'
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
@ -31,6 +34,8 @@ def real_download(self, filename, info_dict):
ie = YT_BaseIE(self.ydl)
start_time = int(time.time() * 1000)
def dl_fragment(url, data=None, headers=None):
http_headers = info_dict.get('http_headers', {})
if headers:
@ -38,36 +43,70 @@ def dl_fragment(url, data=None, headers=None):
http_headers.update(headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data)
def download_and_parse_fragment(url, frag_index, request_data):
def parse_actions_replay(live_chat_continuation):
offset = continuation_id = None
processed_fragment = bytearray()
for action in live_chat_continuation.get('actions', []):
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
if offset is not None:
continuation_id = try_get(
live_chat_continuation,
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
self._append_fragment(ctx, processed_fragment)
return continuation_id, offset
live_offset = 0
def parse_actions_live(live_chat_continuation):
nonlocal live_offset
continuation_id = None
processed_fragment = bytearray()
for action in live_chat_continuation.get('actions', []):
timestamp = self.parse_live_timestamp(action)
if timestamp is not None:
live_offset = timestamp - start_time
# compatibility with replay format
pseudo_action = {
'replayChatItemAction': {'actions': [action]},
'videoOffsetTimeMsec': str(live_offset),
'isLive': True,
}
processed_fragment.extend(
json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
continuation_data_getters = [
lambda x: x['continuations'][0]['invalidationContinuationData'],
lambda x: x['continuations'][0]['timedContinuationData'],
]
continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
if continuation_data:
continuation_id = continuation_data.get('continuation')
timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
if timeout_ms is not None:
time.sleep(timeout_ms / 1000)
self._append_fragment(ctx, processed_fragment)
return continuation_id, live_offset
if info_dict['protocol'] == 'youtube_live_chat_replay':
parse_actions = parse_actions_replay
elif info_dict['protocol'] == 'youtube_live_chat':
parse_actions = parse_actions_live
def download_and_parse_fragment(url, frag_index, request_data, headers):
count = 0
while count <= fragment_retries:
try:
success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
success, raw_fragment = dl_fragment(url, request_data, headers)
if not success:
return False, None, None
try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
data = None
if not data:
data = json.loads(raw_fragment)
data = json.loads(raw_fragment)
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
offset = continuation_id = None
processed_fragment = bytearray()
for action in live_chat_continuation.get('actions', []):
if 'replayChatItemAction' in action:
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
if offset is not None:
continuation_id = try_get(
live_chat_continuation,
lambda x: x['continuations'][0]['liveChatReplayContinuationData']['continuation'])
self._append_fragment(ctx, processed_fragment)
continuation_id, offset = parse_actions(live_chat_continuation)
return True, continuation_id, offset
except compat_urllib_error.HTTPError as err:
count += 1
@ -100,7 +139,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
if not api_key or not innertube_context:
return False
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
if info_dict['protocol'] == 'youtube_live_chat_replay':
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
elif info_dict['protocol'] == 'youtube_live_chat':
url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
frag_index = offset = 0
while continuation_id is not None:
@ -111,8 +154,11 @@ def download_and_parse_fragment(url, frag_index, request_data):
}
if frag_index > 1:
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'})
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
success, continuation_id, offset = download_and_parse_fragment(
url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
url, frag_index, fragment_request_data, headers)
if not success:
return False
if test:
@ -120,3 +166,39 @@ def download_and_parse_fragment(url, frag_index, request_data):
self._finish_frag_download(ctx)
return True
@staticmethod
def parse_live_timestamp(action):
action_content = dict_get(
action,
['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
if not isinstance(action_content, dict):
return None
item = dict_get(action_content, ['item', 'bannerRenderer'])
if not isinstance(item, dict):
return None
renderer = dict_get(item, [
# text
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
# ticker
'liveChatTickerPaidMessageItemRenderer',
'liveChatTickerSponsorItemRenderer',
# banner
'liveChatBannerRenderer',
])
if not isinstance(renderer, dict):
return None
parent_item_getters = [
lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
lambda x: x['contents'],
]
parent_item = try_get(renderer, parent_item_getters, dict)
if parent_item:
renderer = dict_get(parent_item, [
'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
])
if not isinstance(renderer, dict):
return None
return int_or_none(renderer.get('timestampUsec'), 1000)

View file

@ -2339,18 +2339,17 @@ def process_language(container, base_url, lang_code, sub_name, query):
initial_data = self._call_api(
'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
if not is_live:
try:
# This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
info['subtitles']['live_chat'] = [{
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat_replay',
}]
except (KeyError, IndexError, TypeError):
pass
try:
# This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
info['subtitles']['live_chat'] = [{
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
}]
except (KeyError, IndexError, TypeError):
pass
if initial_data:
chapters = self._extract_chapters_from_json(