[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)
This commit is contained in:
Yen Chi Hsuan 2015-12-19 18:21:42 +08:00
parent 4f29fa9906
commit d631d5f9f2
2 changed files with 13 additions and 5 deletions

View file

@ -661,8 +661,8 @@ def test_match_str(self):
{'like_count': 190, 'dislike_count': 10})) {'like_count': 190, 'dislike_count': 10}))
def test_parse_dfxp_time_expr(self): def test_parse_dfxp_time_expr(self):
self.assertEqual(parse_dfxp_time_expr(None), 0.0) self.assertEqual(parse_dfxp_time_expr(None), None)
self.assertEqual(parse_dfxp_time_expr(''), 0.0) self.assertEqual(parse_dfxp_time_expr(''), None)
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
@ -676,6 +676,9 @@ def test_dfxp2srt(self):
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
<p begin="1" end="2">第二行<br/></p> <p begin="1" end="2">第二行<br/></p>
<p begin="2" dur="1"><span>Third<br/>Line</span></p> <p begin="2" dur="1"><span>Third<br/>Line</span></p>
<p begin="3" end="-1">Lines with invalid timestamps are ignored</p>
<p begin="-1" end="-1">Ignore, two</p>
<p begin="3" dur="-1">Ignored, three</p>
</div> </div>
</body> </body>
</tt>''' </tt>'''

View file

@ -1976,7 +1976,7 @@ def _match_func(info_dict):
def parse_dfxp_time_expr(time_expr): def parse_dfxp_time_expr(time_expr):
if not time_expr: if not time_expr:
return 0.0 return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
if mobj: if mobj:
@ -2020,10 +2020,15 @@ def parse_node(node):
raise ValueError('Invalid dfxp/TTML subtitle') raise ValueError('Invalid dfxp/TTML subtitle')
for para, index in zip(paras, itertools.count(1)): for para, index in zip(paras, itertools.count(1)):
begin_time = parse_dfxp_time_expr(para.attrib['begin']) begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
end_time = parse_dfxp_time_expr(para.attrib.get('end')) end_time = parse_dfxp_time_expr(para.attrib.get('end'))
dur = parse_dfxp_time_expr(para.attrib.get('dur'))
if begin_time is None:
continue
if not end_time: if not end_time:
end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur']) if not dur:
continue
end_time = begin_time + dur
out.append('%d\n%s --> %s\n%s\n\n' % ( out.append('%d\n%s --> %s\n%s\n\n' % (
index, index,
srt_subtitles_timecode(begin_time), srt_subtitles_timecode(begin_time),