mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
parent
1534aba865
commit
0f60ba6e65
3 changed files with 11 additions and 4 deletions
|
@ -1536,10 +1536,10 @@ def extract_chapter_information(e):
|
||||||
info['chapters'] = chapters
|
info['chapters'] = chapters
|
||||||
|
|
||||||
def extract_video_object(e):
|
def extract_video_object(e):
|
||||||
assert is_type(e, 'VideoObject')
|
|
||||||
author = e.get('author')
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
|
'ext': mimetype2ext(e.get('encodingFormat')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'thumbnails': [{'url': unescapeHTML(url)}
|
'thumbnails': [{'url': unescapeHTML(url)}
|
||||||
|
@ -1552,11 +1552,18 @@ def extract_video_object(e):
|
||||||
# however some websites are using 'Text' type instead.
|
# however some websites are using 'Text' type instead.
|
||||||
# 1. https://schema.org/VideoObject
|
# 1. https://schema.org/VideoObject
|
||||||
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
|
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
|
||||||
|
'artist': traverse_obj(e, ('byArtist', 'name'), expected_type=str),
|
||||||
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
|
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
'height': int_or_none(e.get('height')),
|
'height': int_or_none(e.get('height')),
|
||||||
'view_count': int_or_none(e.get('interactionCount')),
|
'view_count': int_or_none(e.get('interactionCount')),
|
||||||
|
'tags': try_call(lambda: e.get('keywords').split(',')),
|
||||||
|
})
|
||||||
|
if is_type(e, 'AudioObject'):
|
||||||
|
info.update({
|
||||||
|
'vcodec': 'none',
|
||||||
|
'abr': int_or_none(e.get('bitrate')),
|
||||||
})
|
})
|
||||||
extract_interaction_statistic(e)
|
extract_interaction_statistic(e)
|
||||||
extract_chapter_information(e)
|
extract_chapter_information(e)
|
||||||
|
@ -1608,7 +1615,7 @@ def traverse_json_ld(json_ld, at_top_level=True):
|
||||||
extract_video_object(e['video'][0])
|
extract_video_object(e['video'][0])
|
||||||
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
|
elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
|
||||||
extract_video_object(e['subjectOf'][0])
|
extract_video_object(e['subjectOf'][0])
|
||||||
elif is_type(e, 'VideoObject'):
|
elif is_type(e, 'VideoObject', 'AudioObject'):
|
||||||
extract_video_object(e)
|
extract_video_object(e)
|
||||||
if expected_type is None:
|
if expected_type is None:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -2910,7 +2910,7 @@ def _real_extract(self, url):
|
||||||
if json_ld.get('url') not in (url, None):
|
if json_ld.get('url') not in (url, None):
|
||||||
self.report_detected('JSON LD')
|
self.report_detected('JSON LD')
|
||||||
return merge_dicts({
|
return merge_dicts({
|
||||||
'_type': 'url_transparent',
|
'_type': 'video' if json_ld.get('ext') else 'url_transparent',
|
||||||
'url': smuggle_url(json_ld['url'], {
|
'url': smuggle_url(json_ld['url'], {
|
||||||
'force_videoid': video_id,
|
'force_videoid': video_id,
|
||||||
'to_generic': True,
|
'to_generic': True,
|
||||||
|
|
|
@ -232,7 +232,7 @@ def random_user_agent():
|
||||||
])
|
])
|
||||||
|
|
||||||
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
|
||||||
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
|
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
|
||||||
|
|
||||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue