TEDIE: fix the title extraction

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-07-19 16:13:31 +02:00
parent c364f15ff1
commit 177ed935a9

View file

@ -67,7 +67,7 @@ def _talk_info(self, url, video_id=0):
webpage = self._download_webpage(url, video_id, 'Downloading \"%s\" page' % video_name)
self.report_extraction(video_name)
# If the url includes the language we get the title translated
title = self._html_search_regex(r'<span id="altHeadline" >(?P<title>.*)</span>',
title = self._html_search_regex(r'<span .*?id="altHeadline".+?>(?P<title>.*)</span>',
webpage, 'title')
json_data = self._search_regex(r'<script.*?>var talkDetails = ({.*?})</script>',
webpage, 'json data')