[nowness] fix video extraction and add support serie and playlist extraction (fixes #6720)

This commit is contained in:
remitamine 2015-09-05 16:38:05 +01:00
parent 4abf617b9c
commit c23c3d7d7d
2 changed files with 88 additions and 27 deletions

View file

@ -403,7 +403,11 @@
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
from .nova import NovaIE from .nova import NovaIE
from .novamov import NovaMovIE from .novamov import NovaMovIE
from .nowness import NownessIE from .nowness import (
NownessIE,
NownessPlaylistIE,
NownessSerieIE,
)
from .nowtv import NowTVIE from .nowtv import NowTVIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .npo import ( from .npo import (

View file

@ -1,19 +1,42 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .brightcove import BrightcoveIE from .brightcove import BrightcoveIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import compat_urllib_request
class NownessIE(InfoExtractor): class NownessBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/[^?#]*?/(?P<id>[0-9]+)/(?P<slug>[^/]+?)(?:$|[?#])' def extract_url_result(self, post):
if post['type'] == 'video':
for media in post['media']:
if media['type'] == 'video':
video_id = media['content']
source = media['source']
if source == 'brightcove':
player_code = self._download_webpage(
'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript',
errnote='Player download failed')
bc_url = BrightcoveIE._extract_brightcove_url(player_code)
if bc_url is None:
raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'Brightcove')
elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube':
return self.url_result(video_id, 'Youtube')
elif source == 'cinematique':
return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
class NownessIE(NownessBaseIE):
IE_NAME = 'nowness'
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(story|series/[^/])/(?P<id>[0-9a-z-]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.nowness.com/day/2013/6/27/3131/candor--the-art-of-gesticulation', 'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
'md5': '068bc0202558c2e391924cb8cc470676', 'md5': '068bc0202558c2e391924cb8cc470676',
'info_dict': { 'info_dict': {
'id': '2520295746001', 'id': '2520295746001',
@ -25,7 +48,7 @@ class NownessIE(InfoExtractor):
} }
}, },
{ {
'url': 'http://cn.nowness.com/day/2014/8/7/4069/kasper-bj-rke-ft-jaakko-eino-kalevi--tnr', 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
'md5': 'e79cf125e387216f86b2e0a5b5c63aa3', 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
'info_dict': { 'info_dict': {
'id': '3716354522001', 'id': '3716354522001',
@ -39,26 +62,60 @@ class NownessIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) display_id = self._match_id(url)
video_id = mobj.group('slug')
webpage = self._download_webpage(url, video_id) lang = 'zh-cn' if 'cn.nowness.com' in url else 'en-us'
player_url = self._search_regex( request = compat_urllib_request.Request('http://api.nowness.com/api/post/getBySlug/%s' % display_id, headers={
r'"([^"]+/content/issue-[0-9.]+.js)"', webpage, 'player URL') 'X-Nowness-Language': lang,
real_id = self._search_regex( })
r'\sdata-videoId="([0-9]+)"', webpage, 'internal video ID') post = self._download_json(request, display_id)
return self.extract_url_result(post)
player_code = self._download_webpage(
player_url, video_id,
note='Downloading player JavaScript',
errnote='Player download failed')
player_code = player_code.replace("'+d+'", real_id)
bc_url = BrightcoveIE._extract_brightcove_url(player_code) class NownessPlaylistIE(NownessBaseIE):
if bc_url is None: IE_NAME = 'nowness:playlist'
raise ExtractorError('Could not find player definition') _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)/[0-9a-z-]+'
return { _TEST = {
'_type': 'url', 'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
'url': bc_url, 'info_dict':
'ie_key': 'Brightcove', {
'id': '3286',
},
'playlist_mincount': 8,
} }
def _real_extract(self, url):
playlist_id = self._match_id(url)
lang = 'zh-cn' if 'cn.nowness.com' in url else 'en-us'
request = compat_urllib_request.Request('http://api.nowness.com/api/post?PlaylistId=%s' % playlist_id, headers={
'X-Nowness-Language': lang,
})
playlist = self._download_json(request, playlist_id)
entries = [self.extract_url_result(item) for item in playlist['items']]
return self.playlist_result(entries, playlist_id)
class NownessSerieIE(NownessBaseIE):
IE_NAME = 'nowness:serie'
_VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[0-9a-z-]+)'
_TEST = {
'url': 'https://www.nowness.com/series/60-seconds',
'info_dict':
{
'id': '60',
},
'playlist_mincount': 4,
}
def _real_extract(self, url):
display_id = self._match_id(url)
lang = 'zh-cn' if 'cn.nowness.com' in url else 'en-us'
request = compat_urllib_request.Request('https://api.nowness.com/api/series/getBySlug/%s' % display_id, headers={
'X-Nowness-Language': lang,
})
serie = self._download_json(request, display_id)
serie_id = str(serie['id'])
entries = [self.extract_url_result(post) for post in serie['posts']]
return self.playlist_result(entries, serie_id)