[brightcove] Add the extraction of the url from generic

2024-11-07 20:30:41 -05:00 · 2013-11-06 16:40:24 +01:00 · 2013-11-06 16:40:24 +01:00 · eeb165e674
commit eeb165e674
parent 9ee2b5f6f2
2 changed files with 32 additions and 4 deletions
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@ -9,6 +9,7 @@
    compat_urllib_parse,
    find_xpath_attr,
    compat_urlparse,
    compat_str,
    ExtractorError,
 )
@ -71,6 +72,19 @@ def _build_brighcove_url(cls, object_str):
        data = compat_urllib_parse.urlencode(params)
        return cls._FEDERATED_URL_TEMPLATE % data
    @classmethod
    def _extract_brightcove_url(cls, webpage):
        """Try to extract the brightcove url from the wepbage, returns None
        if it can't be found
        """
        m_brightcove = re.search(
            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
            webpage, re.DOTALL)
        if m_brightcove is not None:
            return cls._build_brighcove_url(m_brightcove.group())
        else:
            return None
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        query_str = mobj.group('query')
@ -109,7 +123,7 @@ def _get_playlist_info(self, player_key):
    def _extract_video_info(self, video_info):
        info = {
-            'id': video_info['id'],
+            'id': compat_str(video_info['id']),
            'title': video_info['displayName'],
            'description': video_info.get('shortDescription'),
            'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -54,6 +54,21 @@ class GenericIE(InfoExtractor):
            },
            u'skip': u'There is a limit of 200 free downloads / month for the test song',
        },
        # embedded brightcove video
        {
            u'add_ie': ['Brightcove'],
            u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
            u'info_dict': {
                u'id': u'2365799484001',
                u'ext': u'mp4',
                u'title': u'Bubble Simulation',
                u'description': u'A visualization from a new computer model of foam behavior.',
                u'uploader': u'Scientific American',
            },
            u'params': {
                u'skip_download': True,
            },
        },
    ]
    def report_download_webpage(self, video_id):
@ -146,10 +161,9 @@ def _real_extract(self, url):
        self.report_extraction(video_id)
        # Look for BrightCove:
-        m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
-        if m_brightcove is not None:
+        if bc_url is not None:
            self.to_screen(u'Brightcove video detected.')
            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
            return self.url_result(bc_url, 'Brightcove')
        # Look for embedded Vimeo player