mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-07 20:30:41 -05:00
[crunchyroll] Improve extraction failsafeness (closes #17991)
This commit is contained in:
parent
5e733b066a
commit
08c7d3dade
1 changed files with 21 additions and 8 deletions
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
|
@ -398,7 +399,7 @@ def _get_subtitles(self, video_id, webpage):
|
||||||
'Downloading subtitles for ' + sub_name, data={
|
'Downloading subtitles for ' + sub_name, data={
|
||||||
'subtitle_script_id': sub_id,
|
'subtitle_script_id': sub_id,
|
||||||
})
|
})
|
||||||
if sub_doc is None:
|
if not isinstance(sub_doc, etree.Element):
|
||||||
continue
|
continue
|
||||||
sid = sub_doc.get('id')
|
sid = sub_doc.get('id')
|
||||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||||
|
@ -515,7 +516,7 @@ def _real_extract(self, url):
|
||||||
'video_quality': stream_quality,
|
'video_quality': stream_quality,
|
||||||
'current_page': url,
|
'current_page': url,
|
||||||
})
|
})
|
||||||
if streamdata is not None:
|
if isinstance(streamdata, etree.Element):
|
||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
if stream_info is not None:
|
if stream_info is not None:
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
|
@ -526,7 +527,7 @@ def _real_extract(self, url):
|
||||||
'video_format': stream_format,
|
'video_format': stream_format,
|
||||||
'video_encode_quality': stream_quality,
|
'video_encode_quality': stream_quality,
|
||||||
})
|
})
|
||||||
if stream_info is not None:
|
if isinstance(stream_info, etree.Element):
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
for stream_info in stream_infos:
|
for stream_info in stream_infos:
|
||||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||||
|
@ -598,10 +599,22 @@ def _real_extract(self, url):
|
||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||||
webpage, 'series', fatal=False)
|
webpage, 'series', fatal=False)
|
||||||
season = xpath_text(metadata, 'series_title')
|
|
||||||
|
|
||||||
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
|
season = episode = episode_number = duration = thumbnail = None
|
||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
|
|
||||||
|
if isinstance(metadata, etree.Element):
|
||||||
|
season = xpath_text(metadata, 'series_title')
|
||||||
|
episode = xpath_text(metadata, 'episode_title')
|
||||||
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||||
|
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||||
|
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||||
|
|
||||||
|
if not episode:
|
||||||
|
episode = media_metadata.get('title')
|
||||||
|
if not episode_number:
|
||||||
|
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||||
|
if not thumbnail:
|
||||||
|
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||||
|
|
||||||
season_number = int_or_none(self._search_regex(
|
season_number = int_or_none(self._search_regex(
|
||||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
|
@ -611,8 +624,8 @@ def _real_extract(self, url):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'duration': float_or_none(media_metadata.get('duration'), 1000),
|
'duration': duration,
|
||||||
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
|
'thumbnail': thumbnail,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
|
Loading…
Reference in a new issue