From 58ea7ec81ecc1d0166e04a5a94afc8b70e3791db Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 29 Sep 2014 22:23:21 +0200 Subject: [PATCH] [vimeo] Fix description extraction --- youtube_dl/extractor/vimeo.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index a002555a9..d2c36b58a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -91,6 +91,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader_id': 'openstreetmapus', 'uploader': 'OpenStreetMap US', 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + 'description': 'md5:380943ec71b89736ff4bf27183233d09', 'duration': 1595, }, }, @@ -105,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader': 'The BLN & Business of Software', 'uploader_id': 'theblnbusinessofsoftware', 'duration': 3610, + 'description': None, }, }, { @@ -119,6 +121,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'uploader_id': 'user18948128', 'uploader': 'Jaime Marquínez Ferrándiz', 'duration': 10, + 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', }, 'params': { 'videopassword': 'youtube-dl', @@ -205,6 +208,7 @@ def _real_extract(self, url): # Extract ID from URL mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + orig_url = url if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id @@ -275,9 +279,23 @@ def _real_extract(self, url): _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1] # Extract video description + video_description = self._html_search_regex( - r'(?s)
]*>(.*?)
', - webpage, 'description', fatal=False) + r'(?s)]*>(.*?)', + webpage, 'description', default=None) + if not video_description: + video_description = self._html_search_meta( + 'description', webpage, default=None) + if not video_description and mobj.group('pro'): + orig_webpage = self._download_webpage( + orig_url, video_id, + note='Downloading webpage for description', + fatal=False) + if orig_webpage: + video_description = self._html_search_meta( + 'description', orig_webpage, default=None) + if not video_description and not mobj.group('player'): + self._downloader.report_warning('Cannot find video description') # Extract video duration video_duration = int_or_none(config["video"].get("duration"))