[facebook] Improve Facebook embedded detection

Related to #9938.

Another example comes from 9834872bf6.
This commit is contained in:
Yen Chi Hsuan 2016-07-02 21:33:23 +08:00
parent bdafd88da0
commit fd6ca38262
2 changed files with 37 additions and 4 deletions

View file

@ -129,6 +129,21 @@ class FacebookIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
if mobj is not None:
return mobj.group('url')
# Facebook API embed
# see https://developers.facebook.com/docs/plugins/embedded-video-player
mobj = re.search(r'''(?x)<div[^>]+
class=(?P<q1>[\'"])[^\'"]*\bfb-video\b[^\'"]*(?P=q1)[^>]+
data-href=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)''', webpage)
if mobj is not None:
return mobj.group('url')
def _login(self): def _login(self):
(useremail, password) = self._get_login_info() (useremail, password) = self._get_login_info()
if useremail is None: if useremail is None:

View file

@ -66,6 +66,7 @@
from .vessel import VesselIE from .vessel import VesselIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -1260,6 +1261,24 @@ class GenericIE(InfoExtractor):
'uploader': 'TheAtlantic', 'uploader': 'TheAtlantic',
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveLegacy'],
},
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
'info_dict': {
'id': '599637780109885',
'ext': 'mp4',
'title': 'Facebook video #599637780109885',
},
},
# Facebook API embed
{
'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
'info_dict': {
'id': '10153467542406923',
'ext': 'mp4',
'title': 'Facebook video #10153467542406923',
},
} }
] ]
@ -1759,10 +1778,9 @@ def _playlist_from_matches(matches, getter=None, ie=None):
return self.url_result(mobj.group('url')) return self.url_result(mobj.group('url'))
# Look for embedded Facebook player # Look for embedded Facebook player
mobj = re.search( facebook_url = FacebookIE._extract_url(webpage)
r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage) if facebook_url is not None:
if mobj is not None: return self.url_result(facebook_url, 'Facebook')
return self.url_result(mobj.group('url'), 'Facebook')
# Look for embedded VK player # Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage) mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)