From 21cdcf03a237a0c4979c941d5a5385cae44c7906 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 26 Oct 2024 18:02:21 +0000 Subject: [PATCH] [ie] Resolve `language` to ISO639-2 for ISM formats (#11359) Closes #11356 Authored by: bashonly --- yt_dlp/extractor/common.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 795105b7d..ecece85f5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -47,6 +47,7 @@ FormatSorter, GeoRestrictedError, GeoUtils, + ISO639Utils, LenientJSONDecoder, Popen, RegexNotFoundError, @@ -3071,7 +3072,11 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): url_pattern = stream.attrib['Url'] stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_name = stream.get('Name') - stream_language = stream.get('Language', 'und') + # IsmFD expects ISO 639 Set 2 language codes (3-character length) + # See: https://github.com/yt-dlp/yt-dlp/issues/11356 + stream_language = stream.get('Language') or 'und' + if len(stream_language) != 3: + stream_language = ISO639Utils.short2long(stream_language) or 'und' for track in stream.findall('QualityLevel'): KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))