democracynow: correct syntax

This commit is contained in:
fnord 2015-07-17 02:57:08 -05:00
parent f870544302
commit eb08081330

View file

@ -1,19 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import time
import hmac
import hashlib
import itertools
import re import re
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
)
from ..compat import compat_urllib_request
from .common import InfoExtractor from .common import InfoExtractor
@ -30,7 +18,7 @@ class DemocracynowIE(InfoExtractor):
'uploader': 'Democracy Now', 'uploader': 'Democracy Now',
'upload_date': None, 'upload_date': None,
}, },
},{ }, {
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree', 'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
'info_dict': { 'info_dict': {
'id': '2015-0703-001', 'id': '2015-0703-001',
@ -40,7 +28,6 @@ class DemocracynowIE(InfoExtractor):
'uploader': 'Democracy Now', 'uploader': 'Democracy Now',
'upload_date': None, 'upload_date': None,
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -49,7 +36,7 @@ def _real_extract(self, url):
if display_id == '': if display_id == '':
display_id = 'home' display_id = 'home'
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
re_desc = re.search(r'<meta property=.og:description. content=(["\'])(.+?)\1',webpage,re.DOTALL) re_desc = re.search(r'<meta property=.og:description. content=(["\'])(.+?)\1', webpage, re.DOTALL)
description = re_desc.group(2) if re_desc else '' description = re_desc.group(2) if re_desc else ''
jstr = self._search_regex(r'({.+?"related_video_xml".+?})', webpage, 'json', default=None) jstr = self._search_regex(r'({.+?"related_video_xml".+?})', webpage, 'json', default=None)
@ -57,30 +44,30 @@ def _real_extract(self, url):
video_id = None video_id = None
formats = [] formats = []
subtitles = {} subtitles = {}
for key in ('caption_file','.......'): for key in ('caption_file', '.......'):
# ....... = pending vtt support that doesn't clobber srt 'chapter_file': # ....... = pending vtt support that doesn't clobber srt 'chapter_file':
url = js.get(key,'') url = js.get(key, '')
if url == '' or url == None: if url == '' or url is None:
continue continue
if not re.match(r'^https?://',url): if not re.match(r'^https?://', url):
url = base_host + url url = base_host + url
ext = re.search(r'\.([^\.]+)$',url).group(1) ext = re.search(r'\.([^\.]+)$', url).group(1)
subtitles['eng'] = [{ subtitles['eng'] = [{
'ext': ext, 'ext': ext,
'url': url, 'url': url,
}] }]
for key in ('file', 'audio'): for key in ('file', 'audio'):
url = js.get(key,'') url = js.get(key, '')
if url == '' or url == None: if url == '' or url is None:
continue continue
if not re.match(r'^https?://',url): if not re.match(r'^https?://', url):
url = base_host + url url = base_host + url
purl = re.search(r'/(?P<dir>[^/]+)/(?:dn)?(?P<fn>[^/]+?)\.(?P<ext>[^\.\?]+)(?P<hasparams>\?|$)',url) purl = re.search(r'/(?P<dir>[^/]+)/(?:dn)?(?P<fn>[^/]+?)\.(?P<ext>[^\.\?]+)(?P<hasparams>\?|$)', url)
if video_id == None: if video_id is None:
video_id = purl.group('fn') video_id = purl.group('fn')
if js.get('start') != None: if js.get('start') is not None:
url += '&' if purl.group('hasparams') == '?' else '?' url += '&' if purl.group('hasparams') == '?' else '?'
url = url + 'start='+str(js.get('start')) url = url + 'start=' + str(js.get('start'))
formats.append({ formats.append({
'format_id': purl.group('dir'), 'format_id': purl.group('dir'),
'ext': purl.group('ext'), 'ext': purl.group('ext'),
@ -92,9 +79,7 @@ def _real_extract(self, url):
'title': js.get('title'), 'title': js.get('title'),
'description': description, 'description': description,
'uploader': 'Democracy Now', 'uploader': 'Democracy Now',
# 'thumbnails': thumbnails,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }
return ret return ret
#