mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-21 20:46:36 -05:00
[ard] Make more robust against missing thumbnails
I cannot reproduce this error, it's from travis.
This commit is contained in:
parent
dc752ff442
commit
bf0ff93277
2 changed files with 15 additions and 2 deletions
|
@ -13,6 +13,7 @@
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -157,8 +158,9 @@ def _real_extract(self, url):
|
||||||
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||||
doc = self._download_xml(player_url, display_id)
|
doc = self._download_xml(player_url, display_id)
|
||||||
video_node = doc.find('./video')
|
video_node = doc.find('./video')
|
||||||
upload_date = unified_strdate(video_node.find('./broadcastDate').text)
|
upload_date = unified_strdate(xpath_text(
|
||||||
thumbnail = video_node.find('.//teaserImage//variant/url').text
|
video_node, './broadcastDate'))
|
||||||
|
thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for a in video_node.findall('.//asset'):
|
for a in video_node.findall('.//asset'):
|
||||||
|
|
|
@ -304,6 +304,17 @@ def xpath_with_ns(path, ns_map):
|
||||||
return '/'.join(replaced)
|
return '/'.join(replaced)
|
||||||
|
|
||||||
|
|
||||||
|
def xpath_text(node, xpath, name=None, fatal=False):
|
||||||
|
n = node.find(xpath)
|
||||||
|
if n is None:
|
||||||
|
if fatal:
|
||||||
|
name = xpath if name is None else name
|
||||||
|
raise ExtractorError('Could not find XML element %s' % name)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return n.text
|
||||||
|
|
||||||
|
|
||||||
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
|
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
|
||||||
class BaseHTMLParser(compat_html_parser.HTMLParser):
|
class BaseHTMLParser(compat_html_parser.HTMLParser):
|
||||||
def __init(self):
|
def __init(self):
|
||||||
|
|
Loading…
Reference in a new issue