[safari:api] Separate extractor (Closes #8871)

This commit is contained in:
Sergey M․ 2016-03-19 22:30:48 +06:00
parent 782b1b5bd1
commit 3aec71766d
2 changed files with 32 additions and 21 deletions

View file

@ -628,6 +628,7 @@
from .sandia import SandiaIE from .sandia import SandiaIE
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE,
SafariCourseIE, SafariCourseIE,
) )
from .sapo import SapoIE from .sapo import SapoIE

View file

@ -75,16 +75,7 @@ def _login(self):
class SafariIE(SafariBaseIE): class SafariIE(SafariBaseIE):
IE_NAME = 'safari' IE_NAME = 'safari'
IE_DESC = 'safaribooksonline.com online video' IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'''(?x)https?:// _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
(?:www\.)?safaribooksonline\.com/
(?:
library/view/[^/]+|
api/v1/book
)/
(?P<course_id>[^/]+)/
(?:chapter(?:-content)?/)?
(?P<part>part\d+)\.html
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@ -97,9 +88,6 @@ class SafariIE(SafariBaseIE):
'upload_date': '20150724', 'upload_date': '20150724',
'uploader_id': 'stork', 'uploader_id': 'stork',
}, },
}, {
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
'only_matching': True,
}, { }, {
# non-digits in course id # non-digits in course id
'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
@ -108,13 +96,18 @@ class SafariIE(SafariBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
course_id = mobj.group('course_id') video_id = '%s/%s' % (mobj.group('course_id'), mobj.group('part'))
part = mobj.group('part')
webpage = self._download_webpage(url, '%s/%s' % (course_id, part)) webpage = self._download_webpage(url, video_id)
reference_id = self._search_regex(r'data-reference-id="([^"]+)"', webpage, 'kaltura reference id') reference_id = self._search_regex(
partner_id = self._search_regex(r'data-partner-id="([^"]+)"', webpage, 'kaltura widget id') r'data-reference-id=(["\'])(?P<id>.+?)\1',
ui_id = self._search_regex(r'data-ui-id="([^"]+)"', webpage, 'kaltura uiconf id') webpage, 'kaltura reference id', group='id')
partner_id = self._search_regex(
r'data-partner-id=(["\'])(?P<id>.+?)\1',
webpage, 'kaltura widget id', group='id')
ui_id = self._search_regex(
r'data-ui-id=(["\'])(?P<id>.+?)\1',
webpage, 'kaltura uiconf id', group='id')
query = { query = {
'wid': '_%s' % partner_id, 'wid': '_%s' % partner_id,
@ -125,7 +118,7 @@ def _real_extract(self, url):
if self.LOGGED_IN: if self.LOGGED_IN:
kaltura_session = self._download_json( kaltura_session = self._download_json(
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
course_id, 'Downloading kaltura session JSON', video_id, 'Downloading kaltura session JSON',
'Unable to download kaltura session JSON', fatal=False) 'Unable to download kaltura session JSON', fatal=False)
if kaltura_session: if kaltura_session:
session = kaltura_session.get('session') session = kaltura_session.get('session')
@ -137,6 +130,23 @@ def _real_extract(self, url):
'Kaltura') 'Kaltura')
class SafariApiIE(SafariBaseIE):
IE_NAME = 'safari:api'
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
_TEST = {
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
'only_matching': True,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
part = self._download_json(
url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
'Downloading part JSON')
return self.url_result(part['web_url'], SafariIE.ie_key())
class SafariCourseIE(SafariBaseIE): class SafariCourseIE(SafariBaseIE):
IE_NAME = 'safari:course' IE_NAME = 'safari:course'
IE_DESC = 'safaribooksonline.com online courses' IE_DESC = 'safaribooksonline.com online courses'
@ -168,7 +178,7 @@ def _real_extract(self, url):
'No chapters found for course %s' % course_id, expected=True) 'No chapters found for course %s' % course_id, expected=True)
entries = [ entries = [
self.url_result(chapter, 'Safari') self.url_result(chapter, SafariApiIE.ie_key())
for chapter in course_json['chapters']] for chapter in course_json['chapters']]
course_title = course_json['title'] course_title = course_json['title']