From d570746e45cff3c0f89654bf748e44a5da75a924 Mon Sep 17 00:00:00 2001 From: "Andrew \"Akari\" Alexeyew" Date: Wed, 2 Dec 2015 06:00:47 +0200 Subject: [PATCH] [nuevo] Generalize nuevo extractor and add support for trollvids Supports only the nuevo player for now (most common). [trollvids] convert duration to an int [trollvids] added a test [trollvids] made flake8 shut up Generalized the Nuevo extractor Affects: anitube, trollvids, trutube [nuevo] Complied with the code comments. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/anitube.py | 34 +++------------------ youtube_dl/extractor/nuevo.py | 37 +++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 49 +++++++++++++++++++++++++++++++ youtube_dl/extractor/trutube.py | 23 +++++---------- 5 files changed, 98 insertions(+), 46 deletions(-) create mode 100644 youtube_dl/extractor/nuevo.py create mode 100644 youtube_dl/extractor/trollvids.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cee5cfe7c..6f2b35cf1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -726,6 +726,7 @@ from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trollvids import TrollvidsIE from .trutube import TruTubeIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 23f942ae2..73690df82 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -2,10 +2,10 @@ import re -from .common import InfoExtractor +from .nuevo import NuevoBaseIE -class AnitubeIE(InfoExtractor): +class AnitubeIE(NuevoBaseIE): IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' @@ -29,31 +29,5 @@ def _real_extract(self, url): key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_xml = self._download_xml( - 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) - - video_title = config_xml.find('title').text - thumbnail = config_xml.find('image').text - duration = float(config_xml.find('duration').text) - - formats = [] - video_url = config_xml.find('file') - if video_url is not None: - formats.append({ - 'format_id': 'sd', - 'url': video_url.text, - }) - video_url = config_xml.find('filehd') - if video_url is not None: - formats.append({ - 'format_id': 'hd', - 'url': video_url.text, - }) - - return { - 'id': video_id, - 'title': video_title, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats - } + config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key + return self._extract_nuevo(config_url, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py new file mode 100644 index 000000000..ccc697e4f --- /dev/null +++ b/youtube_dl/extractor/nuevo.py @@ -0,0 +1,37 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + float_or_none, + xpath_text +) + + +class NuevoBaseIE(InfoExtractor): + def _extract_nuevo(self, config_url, video_id): + tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + + title = xpath_text(tree, './title') + if title: + title = title.strip() + + thumbnail = xpath_text(tree, './image') + duration = float_or_none(xpath_text(tree, './duration')) + + formats = [] + for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): + video_url = tree.find(element_name) + video_url is None or formats.append({ + 'format_id': format_id, + 'url': video_url.text + }) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats + } diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py new file mode 100644 index 000000000..e4fe620f7 --- /dev/null +++ b/youtube_dl/extractor/trollvids.py @@ -0,0 +1,49 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .nuevo import NuevoBaseIE + +from ..compat import ( + compat_urllib_parse_unquote +) + +import re + + +class TrollvidsIE(NuevoBaseIE): + _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)' + IE_NAME = 'trollvids' + + def _real_extract(self, url): + match = re.match(self._VALID_URL, url) + + video_id = match.group('id') + raw_video_title = match.group('title') + url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) + config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id + + info = self._extract_nuevo(config_url, video_id) + + info.update({ + 'webpage_url': url, + 'age_limit': 18 + }) + + if 'title' not in info: + info['title'] = compat_urllib_parse_unquote(raw_video_title) + + return info + + _TESTS = [ + { + 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': '2349002', + 'ext': 'mp4', + 'title': "【MMD R-18】ガールフレンド carry_me_off", + 'age_limit': 18, + 'duration': 216.78, + }, + }, + ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index e7b79243a..d7ec2ec26 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -from .common import InfoExtractor -from ..utils import xpath_text +from .nuevo import NuevoBaseIE -class TruTubeIE(InfoExtractor): +class TruTubeIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', @@ -22,19 +21,11 @@ class TruTubeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - config = self._download_xml( - 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, - video_id, transform_source=lambda s: s.strip()) + info = self._extract_nuevo(config_url, video_id) - # filehd is always 404 - video_url = xpath_text(config, './file', 'video URL', fatal=True) - title = xpath_text(config, './title', 'title').strip() - thumbnail = xpath_text(config, './image', ' thumbnail') + # filehd always 404s + info['formats'] = info['formats'][:1] - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': thumbnail, - } + return info