Merge pull request #5328 from yan12125/fix_5226

[Yahoo/NBCSports] Fix 5226 and add support for NBC sports
This commit is contained in:
Sergey M. 2015-03-31 20:00:47 +06:00
commit ed06e9949b
5 changed files with 85 additions and 1 deletions

View file

@ -311,6 +311,8 @@
from .nbc import ( from .nbc import (
NBCIE, NBCIE,
NBCNewsIE, NBCNewsIE,
NBCSportsIE,
NBCSportsVPlayerIE,
) )
from .ndr import NDRIE from .ndr import NDRIE
from .ndtv import NDTVIE from .ndtv import NDTVIE

View file

@ -29,6 +29,7 @@
xpath_text, xpath_text,
) )
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
from .smotri import SmotriIE from .smotri import SmotriIE
@ -639,6 +640,15 @@ class GenericIE(InfoExtractor):
'upload_date': '20150228', 'upload_date': '20150228',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
} }
},
# NBC Sports vplayer embeds
{
'url': 'http://bbs.clutchfans.net/showthread.php?t=244180',
'info_dict': {
'id': '_hqLjQ95yx8Z',
'ext': 'flv'
},
'skip': 'This content expired on 9/17/14 12:23 PM',
} }
] ]
@ -1252,6 +1262,11 @@ def _playlist_from_matches(matches, getter=None, ie=None):
if mobj is not None: if mobj is not None:
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
# Look for NBC Sports VPlayer embeds
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True

View file

@ -50,6 +50,58 @@ def _real_extract(self, url):
return self.url_result(theplatform_url) return self.url_result(theplatform_url)
class NBCSportsVPlayerIE(InfoExtractor):
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
_TESTS = [{
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
}
}, {
'note': 'This video is already expired. It\'s for testing _VALID_URL',
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
'only_matching': True,
}]
@staticmethod
def _extract_url(webpage):
iframe_m = re.search(
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
if iframe_m:
return iframe_m.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
theplatform_url = self._og_search_video_url(webpage)
return self.url_result(theplatform_url, 'ThePlatform')
class NBCSportsIE(InfoExtractor):
# Does not include https becuase its certificate is invalid
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
'info_dict': {
'id': 'PHJSaFWbrTY9',
'ext': 'flv',
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return self.url_result(
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
class NBCNewsIE(InfoExtractor): class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)| (?:video/.+?/(?P<id>\d+)|

View file

@ -92,7 +92,7 @@ def _real_extract(self, url):
error_msg = next( error_msg = next(
n.attrib['abstract'] n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref')) for n in meta.findall(_x('.//smil:ref'))
if n.attrib.get('title') == 'Geographic Restriction') if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration: except StopIteration:
pass pass
else: else:

View file

@ -17,6 +17,8 @@
int_or_none, int_or_none,
) )
from .nbc import NBCSportsVPlayerIE
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies' IE_DESC = 'Yahoo screen and movies'
@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
}, { }, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html', 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True, 'only_matching': True,
}, {
'note': 'NBC Sports embeds',
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
'info_dict': {
'id': '9CsDKds0kvHI',
'ext': 'flv',
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
}
} }
] ]
@ -151,6 +162,10 @@ def _real_extract(self, url):
items = json.loads(items_json) items = json.loads(items_json)
video_id = items[0]['id'] video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage) return self._get_info(video_id, display_id, webpage)
# Look for NBCSports iframes
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex( items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,