The Daily Show Podcast support

This commit is contained in:
felix 2015-03-16 20:05:02 +01:00
parent 90183a46d8
commit 2e90dff2c2
3 changed files with 64 additions and 1 deletions

View file

@ -84,7 +84,7 @@
) )
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .collegerama import CollegeRamaIE from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE, TheDailyShowPodcastIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
@ -250,6 +250,7 @@
LetvPlaylistIE LetvPlaylistIE
) )
from .lifenews import LifeNewsIE from .lifenews import LifeNewsIE
from .libsyn import LibsynIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import ( from .livestream import (
LivestreamIE, LivestreamIE,

View file

@ -2,6 +2,7 @@
import re import re
from .common import InfoExtractor
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
@ -272,3 +273,23 @@ def _real_extract(self, url):
'title': show_name + ' ' + title, 'title': show_name + ' ' + title,
'description': description, 'description': description,
} }
class TheDailyShowPodcastIE(InfoExtractor):
_VALID_URL = r'(?P<scheme>https?:)?//thedailyshow\.cc\.com/podcast/(?P<id>[a-z\-]+)'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_url = self._search_regex(r'<iframe(?:\s+[^>]+)?\s*src="((?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/[0-9]+)', webpage, 'player URL')
if player_url.startswith('//'):
mobj = re.match(self._VALID_URL, url)
scheme = mobj.group('scheme')
if not scheme:
scheme = 'https:'
player_url = scheme + player_url
return {
'_type': 'url_transparent',
'url': player_url,
}

View file

@ -0,0 +1,41 @@
# encoding: utf-8
from .common import InfoExtractor
from ..utils import (
unified_strdate,
)
class LibsynIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)(?:/.*)?'
def _real_extract(self, url):
if url.startswith('//'):
url = 'https:' + url
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
podcast_title = self._search_regex(r'<h2>(.*?)</h2>', webpage, 'show title')
podcast_episode_title = self._search_regex(r'<h3>(.*?)</h3>', webpage, 'episode title')
podcast_date = unified_strdate(self._search_regex(r'<div class="release_date">Released: (.*?)</div>', webpage, 'release date'))
podcast_description = self._search_regex(r'<div id="info_text_body">(.*?)</div>', webpage, 'description')
url0 = self._search_regex(r'var mediaURLLibsyn = "(?P<url0>https?://.*)";', webpage, 'first media URL')
url1 = self._search_regex(r'var mediaURL = "(?P<url1>https?://.*)";', webpage, 'second media URL')
if url0 != url1:
formats = [{
'url': url0
}, {
'url': url1
}]
else:
formats = [{
'url': url0
}]
return {
'id': display_id,
'title': podcast_episode_title,
'description': podcast_description,
'upload_date': podcast_date,
'formats': formats,
}