[youtube] Fix mix video regex

Attributes' order in <li> is arbitrary and changes every time playlist
page is fetched, so we can't rely on `data-index` to be before
`data-video-username`.
This commit is contained in:
Sergey M. 2014-02-24 12:51:06 +07:00
parent 92661c994b
commit a2dafe2887

View file

@ -1492,11 +1492,10 @@ def _extract_mix(self, playlist_id):
title_span = (search_title('playlist-title') or title_span = (search_title('playlist-title') or
search_title('title long-title') or search_title('title')) search_title('title long-title') or search_title('title'))
title = clean_html(title_span) title = clean_html(title_span)
video_re = r'''(?x)data-index="\d+".*? video_re = r'''(?x)data-video-username="(.*?)".*?
data-video-username="(.*?)".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id) href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id)
matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL))
# Some of the videos may have beend deleted, their username field is empty # Some of the videos may have been deleted, their username field is empty
ids = [video_id for (username, video_id) in matches if username] ids = [video_id for (username, video_id) in matches if username]
url_results = self._ids_to_results(ids) url_results = self._ids_to_results(ids)