diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0d9b87a34..b3c3dc0fd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -9,6 +9,7 @@ compat_urllib_parse, find_xpath_attr, compat_urlparse, + compat_str, ExtractorError, ) @@ -71,6 +72,19 @@ def _build_brighcove_url(cls, object_str): data = compat_urllib_parse.urlencode(params) return cls._FEDERATED_URL_TEMPLATE % data + @classmethod + def _extract_brightcove_url(cls, webpage): + """Try to extract the brightcove url from the wepbage, returns None + if it can't be found + """ + m_brightcove = re.search( + r']+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?', + webpage, re.DOTALL) + if m_brightcove is not None: + return cls._build_brighcove_url(m_brightcove.group()) + else: + return None + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) query_str = mobj.group('query') @@ -109,7 +123,7 @@ def _get_playlist_info(self, player_key): def _extract_video_info(self, video_info): info = { - 'id': video_info['id'], + 'id': compat_str(video_info['id']), 'title': video_info['displayName'], 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 76d369273..04b7212f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -54,6 +54,21 @@ class GenericIE(InfoExtractor): }, u'skip': u'There is a limit of 200 free downloads / month for the test song', }, + # embedded brightcove video + { + u'add_ie': ['Brightcove'], + u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics', + u'info_dict': { + u'id': u'2365799484001', + u'ext': u'mp4', + u'title': u'Bubble Simulation', + u'description': u'A visualization from a new computer model of foam behavior.', + u'uploader': u'Scientific American', + }, + u'params': { + u'skip_download': True, + }, + }, ] def report_download_webpage(self, video_id): @@ -146,10 +161,9 @@ def _real_extract(self, url): self.report_extraction(video_id) # Look for BrightCove: - m_brightcove = re.search(r']+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?', webpage, re.DOTALL) - if m_brightcove is not None: + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if bc_url is not None: self.to_screen(u'Brightcove video detected.') - bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') # Look for embedded Vimeo player