diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 87660bb23..d36f025ab 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1536,10 +1536,10 @@ def extract_chapter_information(e):
info['chapters'] = chapters
def extract_video_object(e):
- assert is_type(e, 'VideoObject')
author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
+ 'ext': mimetype2ext(e.get('encodingFormat')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'thumbnails': [{'url': unescapeHTML(url)}
@@ -1552,12 +1552,19 @@ def extract_video_object(e):
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
'view_count': int_or_none(e.get('interactionCount')),
+ 'tags': try_call(lambda: e.get('keywords').split(',')),
})
+ if is_type(e, 'AudioObject'):
+ info.update({
+ 'vcodec': 'none',
+ 'abr': int_or_none(e.get('bitrate')),
+ })
extract_interaction_statistic(e)
extract_chapter_information(e)
@@ -1608,7 +1615,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
extract_video_object(e['video'][0])
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
extract_video_object(e['subjectOf'][0])
- elif is_type(e, 'VideoObject'):
+ elif is_type(e, 'VideoObject', 'AudioObject'):
extract_video_object(e)
if expected_type is None:
continue
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 672034c6d..73aefc782 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2910,7 +2910,7 @@ def _real_extract(self, url):
if json_ld.get('url') not in (url, None):
self.report_detected('JSON LD')
return merge_dicts({
- '_type': 'url_transparent',
+ '_type': 'video' if json_ld.get('ext') else 'url_transparent',
'url': smuggle_url(json_ld['url'], {
'force_videoid': video_id,
'to_generic': True,
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index d655bfdd0..724e34ef7 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -232,7 +232,7 @@ def random_user_agent():
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)'
+JSON_LD_RE = r'(?is)'
NUMBER_RE = r'\d+(?:\.\d+)?'