From 4afa3ec4b6b693672d297ad615f3137763aaac81 Mon Sep 17 00:00:00 2001 From: Felix S Date: Fri, 31 Dec 2021 20:06:45 +0000 Subject: [PATCH] [extractor] Detect more subtitle codecs in MPD manifests (#2174) Authored by: fstirlitz --- yt_dlp/extractor/common.py | 10 +++++++--- yt_dlp/utils.py | 8 ++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3260399cb..3821b7183 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2712,11 +2712,15 @@ def extract_Initialization(source): mime_type = representation_attrib['mimeType'] content_type = representation_attrib.get('contentType', mime_type.split('/')[0]) - codecs = representation_attrib.get('codecs', '') + codecs = parse_codecs(representation_attrib.get('codecs', '')) if content_type not in ('video', 'audio', 'text'): if mime_type == 'image/jpeg': content_type = mime_type - elif codecs.split('.')[0] == 'stpp': + elif codecs['vcodec'] != 'none': + content_type = 'video' + elif codecs['acodec'] != 'none': + content_type = 'audio' + elif codecs.get('tcodec', 'none') != 'none': content_type = 'text' elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): content_type = 'text' @@ -2762,8 +2766,8 @@ def extract_Initialization(source): 'format_note': 'DASH %s' % content_type, 'filesize': filesize, 'container': mimetype2ext(mime_type) + '_dash', + **codecs } - f.update(parse_codecs(codecs)) elif content_type == 'text': f = { 'ext': mimetype2ext(mime_type), diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c22aeb464..56b31bc41 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3196,7 +3196,7 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec, hdr = None, None, None + vcodec, acodec, tcodec, hdr = None, None, None, None for full_codec in split_codecs: parts = full_codec.split('.') codec = parts[0].replace('0', '') @@ -3213,13 +3213,17 @@ def parse_codecs(codecs_str): elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec + elif codec in ('stpp', 'wvtt',): + if not tcodec: + tcodec = full_codec else: write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) - if vcodec or acodec: + if vcodec or acodec or tcodec: return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', 'dynamic_range': hdr, + **({'tcodec': tcodec} if tcodec is not None else {}), } elif len(split_codecs) == 2: return {