diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 9898b9803..3b0bff0d7 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1064,10 +1064,7 @@ from .viceland import VicelandIE from .vidbit import VidbitIE from .viddler import ViddlerIE -from .videa import ( - VideaIE, - VideaEmbedIE, -) +from .videa import VideaIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index 3d2e7e31c..039add86b 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,28 +1,32 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( int_or_none, - parse_duration, + mimetype2ext, + parse_codecs, xpath_element, xpath_text, - xpath_attr, - urlencode_postdata, - unescapeHTML, ) class VideaIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P[^#?]+)' + _VALID_URL = r'''(?x) + https?:// + videa\.hu/ + (?: + videok/(?:[^/]+/)*[^?#&]+-| + player\?.*?\bv=| + player/v/ + ) + (?P[^?#&]+) + ''' _TESTS = [{ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', 'info_dict': { 'id': '8YfIAjxwWGwT8HVQ', - 'display_id': '8YfIAjxwWGwT8HVQ', 'ext': 'mp4', 'title': 'Az őrült kígyász 285 kígyót enged szabadon', 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', @@ -31,68 +35,55 @@ class VideaIE(InfoExtractor): }, { 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', 'only_matching': True, + }, { + 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', + 'only_matching': True, + }, { + 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id) - video_url = self._search_regex( - r'src="(.+?)"', video_data.get('html'), 'embed url') + info = self._download_xml( + 'http://videa.hu/videaplayer_get_xml.php', video_id, + query={'v': video_id}) + + video = xpath_element(info, './/video', 'video', fatal=True) + sources = xpath_element(info, './/video_sources', 'sources', fatal=True) + + title = xpath_text(video, './title', fatal=True) + + formats = [] + for source in sources.findall('./video_source'): + source_url = source.text + if not source_url: + continue + f = parse_codecs(source.get('codecs')) + f.update({ + 'url': source_url, + 'ext': mimetype2ext(source.get('mimetype')) or 'mp4', + 'format_id': source.get('name'), + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + }) + formats.append(f) + self._sort_formats(formats) + + thumbnail = xpath_text(video, './poster_src') + duration = int_or_none(xpath_text(video, './duration')) + + age_limit = None + is_adult = xpath_text(video, './is_adult_content', default=None) + if is_adult: + age_limit = 18 if is_adult == '1' else 0 return { - '_type': 'url_transparent', - 'url': video_url, - 'ie_key': 'VideaEmbed' + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, } - -class VideaEmbedIE(InfoExtractor): - _VALID_URL = r'(?Phttps?:)(?P//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P[^/#?]+)'; - _TESTS = [{ - 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', - 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', - 'info_dict': { - 'id': '8YfIAjxwWGwT8HVQ', - 'ext': 'mp4', - 'title': 'Az őrült kígyász 285 kígyót enged szabadon', - 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3', - 'duration': 21 - }, - }, { - 'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH', - 'only_matching': True, - }]; - - @staticmethod - def _extract_url(webpage): - mobj = re.search( - r']+src=(["\'])(?P(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1', - webpage) - if mobj: - return mobj.group('url') - - def _real_extract(self, url): - protocol, base_url, display_id = re.search(self._VALID_URL, url).groups() - xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id) - - medias = [] - - for xml_media in xml.findall('video') + xml.findall('audio'): - media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url') - media = { - 'id': display_id, - 'ext': 'mp4', - 'title': xpath_text(xml_media, 'title', 'title', True), - 'duration': parse_duration(xpath_text(xml_media, 'duration')), - 'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True), - 'url': media_url, - } - medias.append(media) - - if len(medias) > 1: - self._downloader.report_warning( - 'found multiple medias; please ' - 'report this with the video URL to http://yt-dl.org/bug') - if not medias: - raise ExtractorError('No media entries found') - return medias[0]