mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[ie/youtube] Extract all formats from multi-language m3u8s (#9875)
Authored by: clienthax, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
90c3721a32
commit
9bd8501993
2 changed files with 20 additions and 9 deletions
|
@ -2222,6 +2222,11 @@ def build_stream_name():
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
'has_drm': has_drm,
|
'has_drm': has_drm,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# YouTube-specific
|
||||||
|
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
|
||||||
|
f['language'] = yt_audio_content_id.split('.')[0]
|
||||||
|
|
||||||
resolution = last_stream_inf.get('RESOLUTION')
|
resolution = last_stream_inf.get('RESOLUTION')
|
||||||
if resolution:
|
if resolution:
|
||||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||||
|
|
|
@ -3797,6 +3797,8 @@ def _needs_live_processing(self, live_status, duration):
|
||||||
|
|
||||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||||
CHUNK_SIZE = 10 << 20
|
CHUNK_SIZE = 10 << 20
|
||||||
|
PREFERRED_LANG_VALUE = 10
|
||||||
|
original_language = None
|
||||||
itags, stream_ids = collections.defaultdict(set), []
|
itags, stream_ids = collections.defaultdict(set), []
|
||||||
itag_qualities, res_qualities = {}, {0: None}
|
itag_qualities, res_qualities = {}, {0: None}
|
||||||
q = qualities([
|
q = qualities([
|
||||||
|
@ -3894,10 +3896,12 @@ def build_fragments(f):
|
||||||
throttled = True
|
throttled = True
|
||||||
|
|
||||||
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
||||||
language_preference = (
|
is_default = audio_track.get('audioIsDefault')
|
||||||
10 if audio_track.get('audioIsDefault') and 10
|
is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
|
||||||
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
|
language_code = audio_track.get('id', '').split('.')[0]
|
||||||
else -1)
|
if language_code and is_default:
|
||||||
|
original_language = language_code
|
||||||
|
|
||||||
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
|
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
|
||||||
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||||
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||||
|
@ -3924,8 +3928,7 @@ def build_fragments(f):
|
||||||
'filesize': int_or_none(fmt.get('contentLength')),
|
'filesize': int_or_none(fmt.get('contentLength')),
|
||||||
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
|
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
join_nonempty(audio_track.get('displayName'),
|
join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
|
||||||
language_preference > 0 and ' (default)', delim=''),
|
|
||||||
name, fmt.get('isDrc') and 'DRC',
|
name, fmt.get('isDrc') and 'DRC',
|
||||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||||
|
@ -3944,9 +3947,8 @@ def build_fragments(f):
|
||||||
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||||||
'url': fmt_url,
|
'url': fmt_url,
|
||||||
'width': int_or_none(fmt.get('width')),
|
'width': int_or_none(fmt.get('width')),
|
||||||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||||
'desc' if language_preference < -1 else '') or None,
|
'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
|
||||||
'language_preference': language_preference,
|
|
||||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
||||||
}
|
}
|
||||||
|
@ -4007,6 +4009,10 @@ def process_manifest_format(f, proto, client_name, itag):
|
||||||
elif itag:
|
elif itag:
|
||||||
f['format_id'] = itag
|
f['format_id'] = itag
|
||||||
|
|
||||||
|
if original_language and f.get('language') == original_language:
|
||||||
|
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||||||
|
f['language_preference'] = PREFERRED_LANG_VALUE
|
||||||
|
|
||||||
if f.get('source_preference') is None:
|
if f.get('source_preference') is None:
|
||||||
f['source_preference'] = -1
|
f['source_preference'] = -1
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue