[life:embed] Improve extraction

This commit is contained in:
Sergey M․ 2016-08-14 20:49:11 +07:00
parent 9771b1f901
commit 884cdb6cd9
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -4,7 +4,10 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -96,7 +99,7 @@ def _real_extract(self, url):
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage) r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
iframe_links = re.findall( iframe_links = re.findall(
r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/embed/.+?)["\']', r'<iframe[^>]+src=["\']((?:https?:)?//embed\.life\.ru/(?:embed|video)/.+?)["\']',
webpage) webpage)
if not video_urls and not iframe_links: if not video_urls and not iframe_links:
@ -164,9 +167,9 @@ def make_iframe_entry(video_id, video_url, index=None):
class LifeEmbedIE(InfoExtractor): class LifeEmbedIE(InfoExtractor):
IE_NAME = 'life:embed' IE_NAME = 'life:embed'
_VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' _VALID_URL = r'https?://embed\.life\.ru/(?:embed|video)/(?P<id>[\da-f]{32})'
_TEST = { _TESTS = [{
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',
'md5': 'b889715c9e49cb1981281d0e5458fbbe', 'md5': 'b889715c9e49cb1981281d0e5458fbbe',
'info_dict': { 'info_dict': {
@ -175,30 +178,57 @@ class LifeEmbedIE(InfoExtractor):
'title': 'e50c2dec2867350528e2574c899b8291', 'title': 'e50c2dec2867350528e2574c899b8291',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
} }
} }, {
# with 1080p
'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
thumbnail = None
formats = [] formats = []
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
video_url = compat_urlparse.urljoin(url, video_url) def extract_m3u8(manifest_url):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', manifest_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8')) entry_protocol='m3u8_native', m3u8_id='m3u8'))
else:
def extract_original(original_url):
formats.append({ formats.append({
'url': video_url, 'url': original_url,
'format_id': ext, 'format_id': determine_ext(original_url, None),
'preference': 1, 'preference': 1,
}) })
playlist = self._parse_json(
self._search_regex(
r'options\s*=\s*({.+?});', webpage, 'options', default='{}'),
video_id).get('playlist', {})
if playlist:
master = playlist.get('master')
if isinstance(master, compat_str) and determine_ext(master) == 'm3u8':
extract_m3u8(compat_urlparse.urljoin(url, master))
original = playlist.get('original')
if isinstance(original, compat_str):
extract_original(original)
thumbnail = playlist.get('image')
# Old rendition fallback
if not formats:
for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage):
video_url = compat_urlparse.urljoin(url, video_url)
if determine_ext(video_url) == 'm3u8':
extract_m3u8(video_url)
else:
extract_original(video_url)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = self._search_regex( thumbnail = thumbnail or self._search_regex(
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None) r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
return { return {