From 9766538124384b75c6b6cdfd8cb03ddce30136dc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 8 Feb 2014 19:20:11 +0100 Subject: [PATCH] [jadorecettepub] Add extractor (Fixes #2148) --- test/test_all_urls.py | 2 +- test/test_youtube_lists.py | 10 +++--- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/jadorecettepub.py | 49 ++++++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 7 ++-- 5 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 youtube_dl/extractor/jadorecettepub.py diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 50f392088..aa8e4e4bd 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -85,7 +85,7 @@ def test_justin_tv_chapterid_matching(self): self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id) + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index c9632ddf6..38ac989ce 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -30,7 +30,7 @@ def test_youtube_playlist(self): result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'ytdl test PL') - ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) def test_youtube_playlist_noplaylist(self): @@ -39,7 +39,7 @@ def test_youtube_playlist_noplaylist(self): ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') - self.assertEqual(YoutubeIE()._extract_id(result['url']), 'FXxLjLQi3Fg') + self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') def test_issue_673(self): dl = FakeYDL() @@ -59,7 +59,7 @@ def test_youtube_playlist_with_deleted(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') - ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] + ytie_results = [YoutubeIE().extract_id(url['url']) for url in result['entries']] self.assertFalse('pElCt5oNDuI' in ytie_results) self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -76,9 +76,9 @@ def test_youtube_course(self): # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = result['entries'] - self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) - self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_channel(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bc1e57aff..4678cf06c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -105,6 +105,7 @@ IviIE, IviCompilationIE ) +from .jadorecettepub import JadoreCettePubIE from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE diff --git a/youtube_dl/extractor/jadorecettepub.py b/youtube_dl/extractor/jadorecettepub.py new file mode 100644 index 000000000..d918fff81 --- /dev/null +++ b/youtube_dl/extractor/jadorecettepub.py @@ -0,0 +1,49 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from .youtube import YoutubeIE + + +class JadoreCettePubIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P.*?)\.html' + + _TEST = { + 'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', + 'md5': '401286a06067c70b44076044b66515de', + 'info_dict': { + 'id': 'jLMja3tr7a4', + 'ext': 'mp4', + 'title': 'La pire utilisation de Star Wars', + 'description': "Jadorecettepub.com vous a gratifié de plusieurs pubs géniales utilisant Star Wars et Dark Vador plus particulièrement... Mais l'heure est venue de vous proposer une version totalement massacrée, venue du Japon. Quand les Japonais détruisent l'image de Star Wars pour vendre du thon en boite, ça promet...", + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'(.*?)', + webpage, 'title') + description = self._html_search_regex( + r'(?s)
(.*?)