[vimeo] add an extractor for channels

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-07-29 13:12:09 +02:00
parent a3c736def2
commit caeefc29eb
3 changed files with 39 additions and 2 deletions

View file

@ -8,7 +8,7 @@
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.extractor import DailymotionPlaylistIE
from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
from youtube_dl.utils import *
from helper import FakeYDL
@ -26,5 +26,13 @@ def test_dailymotion_playlist(self):
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
def test_vimeo_channel(self):
dl = FakeYDL()
ie = VimeoChannelIE(dl)
result = ie.extract('http://vimeo.com/channels/tributes')
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'Vimeo Tributes')
self.assertTrue(len(result['entries']) > 24)
if __name__ == '__main__':
unittest.main()

View file

@ -71,7 +71,7 @@
from .vbox7 import Vbox7IE
from .veoh import VeohIE
from .vevo import VevoIE
from .vimeo import VimeoIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
from .c56 import C56IE
from .wat import WatIE

View file

@ -1,5 +1,6 @@
import json
import re
import itertools
from .common import InfoExtractor
from ..utils import (
@ -171,3 +172,31 @@ def _real_extract(self, url, new_video=True):
'thumbnail': video_thumbnail,
'description': video_description,
}]
class VimeoChannelIE(InfoExtractor):
IE_NAME = u'vimeo:channel'
_VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
video_ids = []
for pagenum in itertools.count(1):
webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
channel_id, u'Downloading page %s' % pagenum)
video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
webpage, u'channel title')
return {'_type': 'playlist',
'id': channel_id,
'title': channel_title,
'entries': entries,
}