From cb3ff6fb01fd099ab4102d0f1212abb50e06e5ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 27 Jul 2014 02:38:05 +0700 Subject: [PATCH] [godtube] Add extractor (Closes #3367) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/godtube.py | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 youtube_dl/extractor/godtube.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a8e593002..b8b341afd 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -115,6 +115,7 @@ from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE from .generic import GenericIE +from .godtube import GodTubeIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE diff --git a/youtube_dl/extractor/godtube.py b/youtube_dl/extractor/godtube.py new file mode 100644 index 000000000..73bd6d890 --- /dev/null +++ b/youtube_dl/extractor/godtube.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, +) + + +class GodTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P[\da-zA-Z]+)' + _TESTS = [ + { + 'url': 'https://www.godtube.com/watch/?v=0C0CNNNU', + 'md5': '77108c1e4ab58f48031101a1a2119789', + 'info_dict': { + 'id': '0C0CNNNU', + 'ext': 'mp4', + 'title': 'Woman at the well.', + 'duration': 159, + 'timestamp': 1205712000, + 'uploader': 'beverlybmusic', + 'upload_date': '20080317', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + config = self._download_xml( + 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(), + video_id, 'Downloading player config XML') + + video_url = config.find('.//file').text + uploader = config.find('.//author').text + timestamp = parse_iso8601(config.find('.//date').text) + duration = parse_duration(config.find('.//duration').text) + thumbnail = config.find('.//image').text + + media = self._download_xml( + 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML') + + title = media.find('.//title').text + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'duration': duration, + }