mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-18 00:44:03 -05:00
Merge branch 'master' into opener-to-ydl
This commit is contained in:
commit
e03db0a077
20 changed files with 464 additions and 117 deletions
|
@ -100,6 +100,7 @@ def test_no_duplicates(self):
|
||||||
def test_keywords(self):
|
def test_keywords(self):
|
||||||
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubs', ['youtube:subscriptions'])
|
||||||
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
self.assertMatch(':ytsubscriptions', ['youtube:subscriptions'])
|
||||||
|
self.assertMatch(':ythistory', ['youtube:history'])
|
||||||
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
self.assertMatch(':thedailyshow', ['ComedyCentral'])
|
||||||
self.assertMatch(':tds', ['ComedyCentral'])
|
self.assertMatch(':tds', ['ComedyCentral'])
|
||||||
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
self.assertMatch(':colbertreport', ['ComedyCentral'])
|
||||||
|
|
|
@ -102,7 +102,7 @@ def test_bambuser_channel(self):
|
||||||
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
result = ie.extract('http://bambuser.com/channel/pixelversity')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], u'pixelversity')
|
self.assertEqual(result['title'], u'pixelversity')
|
||||||
self.assertTrue(len(result['entries']) >= 66)
|
self.assertTrue(len(result['entries']) >= 60)
|
||||||
|
|
||||||
def test_bandcamp_album(self):
|
def test_bandcamp_album(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
|
|
|
@ -27,7 +27,7 @@ def assertIsPlaylist(self, info):
|
||||||
def test_youtube_playlist(self):
|
def test_youtube_playlist(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(result['title'], 'ytdl test PL')
|
self.assertEqual(result['title'], 'ytdl test PL')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
|
@ -44,13 +44,13 @@ def test_youtube_playlist_noplaylist(self):
|
||||||
def test_issue_673(self):
|
def test_issue_673(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLBB231211A4F62143')[0]
|
result = ie.extract('PLBB231211A4F62143')
|
||||||
self.assertTrue(len(result['entries']) > 25)
|
self.assertTrue(len(result['entries']) > 25)
|
||||||
|
|
||||||
def test_youtube_playlist_long(self):
|
def test_youtube_playlist_long(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertTrue(len(result['entries']) >= 799)
|
self.assertTrue(len(result['entries']) >= 799)
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ def test_youtube_playlist_with_deleted(self):
|
||||||
#651
|
#651
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
|
@ -66,7 +66,7 @@ def test_youtube_playlist_with_deleted(self):
|
||||||
def test_youtube_playlist_empty(self):
|
def test_youtube_playlist_empty(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')
|
||||||
self.assertIsPlaylist(result)
|
self.assertIsPlaylist(result)
|
||||||
self.assertEqual(len(result['entries']), 0)
|
self.assertEqual(len(result['entries']), 0)
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ def test_youtube_course(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
# TODO find a > 100 (paginating?) videos course
|
# TODO find a > 100 (paginating?) videos course
|
||||||
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0]
|
result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
entries = result['entries']
|
entries = result['entries']
|
||||||
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')
|
||||||
self.assertEqual(len(entries), 25)
|
self.assertEqual(len(entries), 25)
|
||||||
|
@ -84,22 +84,22 @@ def test_youtube_channel(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeChannelIE(dl)
|
ie = YoutubeChannelIE(dl)
|
||||||
#test paginated channel
|
#test paginated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
|
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
|
||||||
self.assertTrue(len(result['entries']) > 90)
|
self.assertTrue(len(result['entries']) > 90)
|
||||||
#test autogenerated channel
|
#test autogenerated channel
|
||||||
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
|
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
self.assertTrue(len(result['entries']) >= 18)
|
self.assertTrue(len(result['entries']) >= 18)
|
||||||
|
|
||||||
def test_youtube_user(self):
|
def test_youtube_user(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubeUserIE(dl)
|
ie = YoutubeUserIE(dl)
|
||||||
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
|
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
|
||||||
self.assertTrue(len(result['entries']) >= 320)
|
self.assertTrue(len(result['entries']) >= 320)
|
||||||
|
|
||||||
def test_youtube_safe_search(self):
|
def test_youtube_safe_search(self):
|
||||||
dl = FakeYDL()
|
dl = FakeYDL()
|
||||||
ie = YoutubePlaylistIE(dl)
|
ie = YoutubePlaylistIE(dl)
|
||||||
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0]
|
result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')
|
||||||
self.assertEqual(len(result['entries']), 2)
|
self.assertEqual(len(result['entries']), 2)
|
||||||
|
|
||||||
def test_youtube_show(self):
|
def test_youtube_show(self):
|
||||||
|
|
|
@ -104,6 +104,7 @@ class YoutubeDL(object):
|
||||||
playlistend: Playlist item to end at.
|
playlistend: Playlist item to end at.
|
||||||
matchtitle: Download only matching titles.
|
matchtitle: Download only matching titles.
|
||||||
rejecttitle: Reject downloads for matching titles.
|
rejecttitle: Reject downloads for matching titles.
|
||||||
|
logger: Log messages to a logging.Logger instance.
|
||||||
logtostderr: Log messages to stderr instead of stdout.
|
logtostderr: Log messages to stderr instead of stdout.
|
||||||
writedescription: Write the video description to a .description file
|
writedescription: Write the video description to a .description file
|
||||||
writeinfojson: Write the video description to a .info.json file
|
writeinfojson: Write the video description to a .info.json file
|
||||||
|
@ -204,7 +205,9 @@ def add_post_processor(self, pp):
|
||||||
|
|
||||||
def to_screen(self, message, skip_eol=False):
|
def to_screen(self, message, skip_eol=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if not self.params.get('quiet', False):
|
if self.params.get('logger'):
|
||||||
|
self.params['logger'].debug(message)
|
||||||
|
elif not self.params.get('quiet', False):
|
||||||
terminator = [u'\n', u''][skip_eol]
|
terminator = [u'\n', u''][skip_eol]
|
||||||
output = message + terminator
|
output = message + terminator
|
||||||
write_string(output, self._screen_file)
|
write_string(output, self._screen_file)
|
||||||
|
@ -212,10 +215,13 @@ def to_screen(self, message, skip_eol=False):
|
||||||
def to_stderr(self, message):
|
def to_stderr(self, message):
|
||||||
"""Print message to stderr."""
|
"""Print message to stderr."""
|
||||||
assert type(message) == type(u'')
|
assert type(message) == type(u'')
|
||||||
output = message + u'\n'
|
if self.params.get('logger'):
|
||||||
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
self.params['logger'].error(message)
|
||||||
output = output.encode(preferredencoding())
|
else:
|
||||||
sys.stderr.write(output)
|
output = message + u'\n'
|
||||||
|
if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
|
||||||
|
output = output.encode(preferredencoding())
|
||||||
|
sys.stderr.write(output)
|
||||||
|
|
||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
|
@ -370,15 +376,17 @@ def prepare_filename(self, info_dict):
|
||||||
def _match_entry(self, info_dict):
|
def _match_entry(self, info_dict):
|
||||||
""" Returns None iff the file should be downloaded """
|
""" Returns None iff the file should be downloaded """
|
||||||
|
|
||||||
title = info_dict['title']
|
if 'title' in info_dict:
|
||||||
matchtitle = self.params.get('matchtitle', False)
|
# This can happen when we're just evaluating the playlist
|
||||||
if matchtitle:
|
title = info_dict['title']
|
||||||
if not re.search(matchtitle, title, re.IGNORECASE):
|
matchtitle = self.params.get('matchtitle', False)
|
||||||
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
if matchtitle:
|
||||||
rejecttitle = self.params.get('rejecttitle', False)
|
if not re.search(matchtitle, title, re.IGNORECASE):
|
||||||
if rejecttitle:
|
return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
rejecttitle = self.params.get('rejecttitle', False)
|
||||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
if rejecttitle:
|
||||||
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
|
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
date = info_dict.get('upload_date', None)
|
date = info_dict.get('upload_date', None)
|
||||||
if date is not None:
|
if date is not None:
|
||||||
dateRange = self.params.get('daterange', DateRange())
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
|
@ -389,8 +397,8 @@ def _match_entry(self, info_dict):
|
||||||
if age_limit < info_dict.get('age_limit', 0):
|
if age_limit < info_dict.get('age_limit', 0):
|
||||||
return u'Skipping "' + title + '" because it is age restricted'
|
return u'Skipping "' + title + '" because it is age restricted'
|
||||||
if self.in_download_archive(info_dict):
|
if self.in_download_archive(info_dict):
|
||||||
return (u'%(title)s has already been recorded in archive'
|
return (u'%s has already been recorded in archive'
|
||||||
% info_dict)
|
% info_dict.get('title', info_dict.get('id', u'video')))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -469,7 +477,7 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
ie_key=ie_result.get('ie_key'),
|
ie_key=ie_result.get('ie_key'),
|
||||||
extra_info=extra_info)
|
extra_info=extra_info)
|
||||||
elif result_type == 'playlist':
|
elif result_type == 'playlist':
|
||||||
self.add_extra_info(ie_result, extra_info)
|
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
playlist = ie_result.get('title', None) or ie_result.get('id', None)
|
||||||
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
|
||||||
|
@ -499,6 +507,12 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reason = self._match_entry(entry)
|
||||||
|
if reason is not None:
|
||||||
|
self.to_screen(u'[download] ' + reason)
|
||||||
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.process_ie_result(entry,
|
||||||
download=download,
|
download=download,
|
||||||
extra_info=extra)
|
extra_info=extra)
|
||||||
|
@ -654,7 +668,7 @@ def process_info(self, info_dict):
|
||||||
|
|
||||||
# Forced printings
|
# Forced printings
|
||||||
if self.params.get('forcetitle', False):
|
if self.params.get('forcetitle', False):
|
||||||
compat_print(info_dict['title'])
|
compat_print(info_dict['fulltitle'])
|
||||||
if self.params.get('forceid', False):
|
if self.params.get('forceid', False):
|
||||||
compat_print(info_dict['id'])
|
compat_print(info_dict['id'])
|
||||||
if self.params.get('forceurl', False):
|
if self.params.get('forceurl', False):
|
||||||
|
@ -825,7 +839,16 @@ def in_download_archive(self, info_dict):
|
||||||
fn = self.params.get('download_archive')
|
fn = self.params.get('download_archive')
|
||||||
if fn is None:
|
if fn is None:
|
||||||
return False
|
return False
|
||||||
vid_id = info_dict['extractor'] + u' ' + info_dict['id']
|
extractor = info_dict.get('extractor_id')
|
||||||
|
if extractor is None:
|
||||||
|
if 'id' in info_dict:
|
||||||
|
extractor = info_dict.get('ie_key') # key in a playlist
|
||||||
|
if extractor is None:
|
||||||
|
return False # Incomplete video information
|
||||||
|
# Future-proof against any change in case
|
||||||
|
# and backwards compatibility with prior versions
|
||||||
|
extractor = extractor.lower()
|
||||||
|
vid_id = extractor + u' ' + info_dict['id']
|
||||||
try:
|
try:
|
||||||
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
|
||||||
for line in archive_file:
|
for line in archive_file:
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
'Jelle van der Waa',
|
'Jelle van der Waa',
|
||||||
'Marcin Cieślak',
|
'Marcin Cieślak',
|
||||||
'Anton Larionov',
|
'Anton Larionov',
|
||||||
|
'Takuya Tsuchida',
|
||||||
)
|
)
|
||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
from .canalplus import CanalplusIE
|
from .canalplus import CanalplusIE
|
||||||
from .canalc2 import Canalc2IE
|
from .canalc2 import Canalc2IE
|
||||||
from .cinemassacre import CinemassacreIE
|
from .cinemassacre import CinemassacreIE
|
||||||
|
from .clipfish import ClipfishIE
|
||||||
from .cnn import CNNIE
|
from .cnn import CNNIE
|
||||||
from .collegehumor import CollegeHumorIE
|
from .collegehumor import CollegeHumorIE
|
||||||
from .comedycentral import ComedyCentralIE
|
from .comedycentral import ComedyCentralIE
|
||||||
|
@ -98,6 +99,7 @@
|
||||||
from .nbc import NBCNewsIE
|
from .nbc import NBCNewsIE
|
||||||
from .newgrounds import NewgroundsIE
|
from .newgrounds import NewgroundsIE
|
||||||
from .nhl import NHLIE, NHLVideocenterIE
|
from .nhl import NHLIE, NHLVideocenterIE
|
||||||
|
from .niconico import NiconicoIE
|
||||||
from .nowvideo import NowVideoIE
|
from .nowvideo import NowVideoIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .orf import ORFIE
|
from .orf import ORFIE
|
||||||
|
@ -156,6 +158,7 @@
|
||||||
from .videopremium import VideoPremiumIE
|
from .videopremium import VideoPremiumIE
|
||||||
from .vimeo import VimeoIE, VimeoChannelIE
|
from .vimeo import VimeoIE, VimeoChannelIE
|
||||||
from .vine import VineIE
|
from .vine import VineIE
|
||||||
|
from .viki import VikiIE
|
||||||
from .vk import VKIE
|
from .vk import VKIE
|
||||||
from .wat import WatIE
|
from .wat import WatIE
|
||||||
from .websurg import WeBSurgIE
|
from .websurg import WeBSurgIE
|
||||||
|
@ -183,6 +186,7 @@
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
YoutubeFavouritesIE,
|
YoutubeFavouritesIE,
|
||||||
|
YoutubeHistoryIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
|
|
|
@ -20,28 +20,6 @@ class BandcampIE(InfoExtractor):
|
||||||
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
u"title": u"youtube-dl test song \"'/\\\u00e4\u21ad"
|
||||||
},
|
},
|
||||||
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
u'skip': u'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
|
||||||
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
|
||||||
u'playlist': [
|
|
||||||
{
|
|
||||||
u'file': u'1353101989.mp3',
|
|
||||||
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Intro',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
u'file': u'38097443.mp3',
|
|
||||||
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
|
||||||
u'info_dict': {
|
|
||||||
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
],
|
|
||||||
u'params': {
|
|
||||||
u'playlistend': 2
|
|
||||||
},
|
|
||||||
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -56,20 +34,17 @@ def _real_extract(self, url):
|
||||||
json_code = m_trackinfo.group(1)
|
json_code = m_trackinfo.group(1)
|
||||||
data = json.loads(json_code)
|
data = json.loads(json_code)
|
||||||
|
|
||||||
entries = []
|
|
||||||
for d in data:
|
for d in data:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': 'format_id',
|
'format_id': 'format_id',
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'ext': format_id.partition('-')[0]
|
'ext': format_id.partition('-')[0]
|
||||||
} for format_id, format_url in sorted(d['file'].items())]
|
} for format_id, format_url in sorted(d['file'].items())]
|
||||||
entries.append({
|
return {
|
||||||
'id': compat_str(d['id']),
|
'id': compat_str(d['id']),
|
||||||
'title': d['title'],
|
'title': d['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
}
|
||||||
|
|
||||||
return self.playlist_result(entries, title, title)
|
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(u'No free songs found')
|
raise ExtractorError(u'No free songs found')
|
||||||
|
|
||||||
|
@ -112,6 +87,30 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
IE_NAME = u'Bandcamp:album'
|
IE_NAME = u'Bandcamp:album'
|
||||||
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
_VALID_URL = r'http://.*?\.bandcamp\.com/album/(?P<title>.*)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
u'playlist': [
|
||||||
|
{
|
||||||
|
u'file': u'1353101989.mp3',
|
||||||
|
u'md5': u'39bc1eded3476e927c724321ddf116cf',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Intro',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u'file': u'38097443.mp3',
|
||||||
|
u'md5': u'1a2c32e2691474643e912cc6cd4bffaa',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
u'params': {
|
||||||
|
u'playlistend': 2
|
||||||
|
},
|
||||||
|
u'skip': u'Bancamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
title = mobj.group('title')
|
title = mobj.group('title')
|
||||||
|
|
|
@ -75,16 +75,22 @@ def _build_brighcove_url(cls, object_str):
|
||||||
params = {'flashID': object_doc.attrib['id'],
|
params = {'flashID': object_doc.attrib['id'],
|
||||||
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
'playerID': find_xpath_attr(object_doc, './param', 'name', 'playerID').attrib['value'],
|
||||||
}
|
}
|
||||||
playerKey = find_xpath_attr(object_doc, './param', 'name', 'playerKey')
|
def find_param(name):
|
||||||
|
node = find_xpath_attr(object_doc, './param', 'name', name)
|
||||||
|
if node is not None:
|
||||||
|
return node.attrib['value']
|
||||||
|
return None
|
||||||
|
playerKey = find_param('playerKey')
|
||||||
# Not all pages define this value
|
# Not all pages define this value
|
||||||
if playerKey is not None:
|
if playerKey is not None:
|
||||||
params['playerKey'] = playerKey.attrib['value']
|
params['playerKey'] = playerKey
|
||||||
videoPlayer = find_xpath_attr(object_doc, './param', 'name', '@videoPlayer')
|
# The three fields hold the id of the video
|
||||||
|
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID')
|
||||||
if videoPlayer is not None:
|
if videoPlayer is not None:
|
||||||
params['@videoPlayer'] = videoPlayer.attrib['value']
|
params['@videoPlayer'] = videoPlayer
|
||||||
linkBase = find_xpath_attr(object_doc, './param', 'name', 'linkBaseURL')
|
linkBase = find_param('linkBaseURL')
|
||||||
if linkBase is not None:
|
if linkBase is not None:
|
||||||
params['linkBaseURL'] = linkBase.attrib['value']
|
params['linkBaseURL'] = linkBase
|
||||||
data = compat_urllib_parse.urlencode(params)
|
data = compat_urllib_parse.urlencode(params)
|
||||||
return cls._FEDERATED_URL_TEMPLATE % data
|
return cls._FEDERATED_URL_TEMPLATE % data
|
||||||
|
|
||||||
|
|
53
youtube_dl/extractor/clipfish.py
Normal file
53
youtube_dl/extractor/clipfish.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ClipfishIE(InfoExtractor):
|
||||||
|
IE_NAME = u'clipfish'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.clipfish.de/special/supertalent/video/4028320/supertalent-2013-ivana-opacak-singt-nobodys-perfect/',
|
||||||
|
u'file': u'4028320.f4v',
|
||||||
|
u'md5': u'5e38bda8c329fbfb42be0386a3f5a382',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Supertalent 2013: Ivana Opacak singt Nobody\'s Perfect',
|
||||||
|
u'duration': 399,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
|
||||||
|
(video_id, int(time.time())))
|
||||||
|
info_xml = self._download_webpage(
|
||||||
|
info_url, video_id, note=u'Downloading info page')
|
||||||
|
doc = xml.etree.ElementTree.fromstring(info_xml)
|
||||||
|
title = doc.find('title').text
|
||||||
|
video_url = doc.find('filename').text
|
||||||
|
thumbnail = doc.find('imageurl').text
|
||||||
|
duration_str = doc.find('duration').text
|
||||||
|
m = re.match(
|
||||||
|
r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
|
||||||
|
duration_str)
|
||||||
|
if m:
|
||||||
|
duration = (
|
||||||
|
(int(m.group('hours')) * 60 * 60) +
|
||||||
|
(int(m.group('minutes')) * 60) +
|
||||||
|
(int(m.group('seconds')))
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
|
@ -1,5 +1,4 @@
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -46,11 +45,10 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id
|
||||||
metaXml = self._download_webpage(xmlUrl, video_id,
|
mdoc = self._download_xml(xmlUrl, video_id,
|
||||||
u'Downloading info XML',
|
u'Downloading info XML',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
mdoc = xml.etree.ElementTree.fromstring(metaXml)
|
|
||||||
try:
|
try:
|
||||||
videoNode = mdoc.findall('./video')[0]
|
videoNode = mdoc.findall('./video')[0]
|
||||||
youtubeIdNode = videoNode.find('./youtubeID')
|
youtubeIdNode = videoNode.find('./youtubeID')
|
||||||
|
@ -65,11 +63,10 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
if next_url.endswith(u'manifest.f4m'):
|
if next_url.endswith(u'manifest.f4m'):
|
||||||
manifest_url = next_url + '?hdcore=2.10.3'
|
manifest_url = next_url + '?hdcore=2.10.3'
|
||||||
manifestXml = self._download_webpage(manifest_url, video_id,
|
adoc = self._download_xml(manifest_url, video_id,
|
||||||
u'Downloading XML manifest',
|
u'Downloading XML manifest',
|
||||||
u'Unable to download video info XML')
|
u'Unable to download video info XML')
|
||||||
|
|
||||||
adoc = xml.etree.ElementTree.fromstring(manifestXml)
|
|
||||||
try:
|
try:
|
||||||
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import netrc
|
import netrc
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
@ -208,6 +209,11 @@ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
|
||||||
""" Returns the data of the page as a string """
|
""" Returns the data of the page as a string """
|
||||||
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
|
||||||
|
|
||||||
|
def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'):
|
||||||
|
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||||
|
xml_string = self._download_webpage(url_or_request, video_id, note, errnote)
|
||||||
|
return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
|
||||||
|
@ -229,12 +235,14 @@ def report_login(self):
|
||||||
self.to_screen(u'Logging in')
|
self.to_screen(u'Logging in')
|
||||||
|
|
||||||
#Methods for following #608
|
#Methods for following #608
|
||||||
def url_result(self, url, ie=None):
|
def url_result(self, url, ie=None, video_id=None):
|
||||||
"""Returns a url that points to a page that should be processed"""
|
"""Returns a url that points to a page that should be processed"""
|
||||||
#TODO: ie should be the class used for getting the info
|
#TODO: ie should be the class used for getting the info
|
||||||
video_info = {'_type': 'url',
|
video_info = {'_type': 'url',
|
||||||
'url': url,
|
'url': url,
|
||||||
'ie_key': ie}
|
'ie_key': ie}
|
||||||
|
if video_id is not None:
|
||||||
|
video_info['id'] = video_id
|
||||||
return video_info
|
return video_info
|
||||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||||
"""Returns a playlist"""
|
"""Returns a playlist"""
|
||||||
|
|
|
@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
u'url': u'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||||
u'file': u'390161.mp4',
|
u'file': u'390161.mp4',
|
||||||
u'md5': u'1d7ba54e2c9d7dc6935ef39e00529138',
|
u'md5': u'8b743df908c42f60cf6496586c7f12c3',
|
||||||
u'info_dict': {
|
u'info_dict': {
|
||||||
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
u"description": u"The square knot, also known as the reef knot, is one of the oldest, most basic knots to tie, and can be used in many different ways. Here's the proper way to tie a square knot.",
|
||||||
u"title": u"How to Tie a Square Knot Properly"
|
u"title": u"How to Tie a Square Knot Properly"
|
||||||
|
|
|
@ -60,7 +60,7 @@ def _real_extract(self, url):
|
||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'url': final_song_url,
|
'url': final_song_url,
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'description': info['description'],
|
'description': info.get('description'),
|
||||||
'thumbnail': info['pictures'].get('extra_large'),
|
'thumbnail': info['pictures'].get('extra_large'),
|
||||||
'uploader': info['user']['name'],
|
'uploader': info['user']['name'],
|
||||||
'uploader_id': info['user']['username'],
|
'uploader_id': info['user']['username'],
|
||||||
|
|
131
youtube_dl/extractor/niconico.py
Normal file
131
youtube_dl/extractor/niconico.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_http_client,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
compat_urlparse,
|
||||||
|
compat_str,
|
||||||
|
|
||||||
|
ExtractorError,
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoIE(InfoExtractor):
|
||||||
|
IE_NAME = u'niconico'
|
||||||
|
IE_DESC = u'ニコニコ動画'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
|
u'file': u'sm22312215.mp4',
|
||||||
|
u'md5': u'd1a75c0823e2f629128c43e1212760f9',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Big Buck Bunny',
|
||||||
|
u'uploader': u'takuya0301',
|
||||||
|
u'uploader_id': u'2698420',
|
||||||
|
u'upload_date': u'20131123',
|
||||||
|
u'description': u'(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'username': u'ydl.niconico@gmail.com',
|
||||||
|
u'password': u'youtube-dl',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$'
|
||||||
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
# If True it will raise an error if no login info is provided
|
||||||
|
_LOGIN_REQUIRED = True
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
(username, password) = self._get_login_info()
|
||||||
|
# No authentication to be performed
|
||||||
|
if username is None:
|
||||||
|
if self._LOGIN_REQUIRED:
|
||||||
|
raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Log in
|
||||||
|
login_form_strs = {
|
||||||
|
u'mail': username,
|
||||||
|
u'password': password,
|
||||||
|
}
|
||||||
|
# Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
|
||||||
|
# chokes on unicode
|
||||||
|
login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
|
||||||
|
login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
u'https://secure.nicovideo.jp/secure/login', login_data)
|
||||||
|
login_results = self._download_webpage(
|
||||||
|
request, u'', note=u'Logging in', errnote=u'Unable to log in')
|
||||||
|
if re.search(r'(?i)<h1 class="mb8p4">Log in error</h1>', login_results) is not None:
|
||||||
|
self._downloader.report_warning(u'unable to log in: bad username or password')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
# Get video webpage. We are not actually interested in it, but need
|
||||||
|
# the cookies in order to be able to download the info webpage
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
|
|
||||||
|
video_info_webpage = self._download_webpage(
|
||||||
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
|
||||||
|
note=u'Downloading video info page')
|
||||||
|
|
||||||
|
# Get flv info
|
||||||
|
flv_info_webpage = self._download_webpage(
|
||||||
|
u'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||||
|
video_id, u'Downloading flv info')
|
||||||
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
|
# Start extracting information
|
||||||
|
video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
|
||||||
|
video_title = video_info.find('.//title').text
|
||||||
|
video_extension = video_info.find('.//movie_type').text
|
||||||
|
video_format = video_extension.upper()
|
||||||
|
video_thumbnail = video_info.find('.//thumbnail_url').text
|
||||||
|
video_description = video_info.find('.//description').text
|
||||||
|
video_uploader_id = video_info.find('.//user_id').text
|
||||||
|
video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
|
||||||
|
video_view_count = video_info.find('.//view_counter').text
|
||||||
|
video_webpage_url = video_info.find('.//watch_url').text
|
||||||
|
|
||||||
|
# uploader
|
||||||
|
video_uploader = video_uploader_id
|
||||||
|
url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
|
||||||
|
try:
|
||||||
|
user_info_webpage = self._download_webpage(
|
||||||
|
url, video_id, note=u'Downloading user information')
|
||||||
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
|
||||||
|
else:
|
||||||
|
user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
|
||||||
|
video_uploader = user_info.find('.//nickname').text
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_real_url,
|
||||||
|
'title': video_title,
|
||||||
|
'ext': video_extension,
|
||||||
|
'format': video_format,
|
||||||
|
'thumbnail': video_thumbnail,
|
||||||
|
'description': video_description,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
'upload_date': video_upload_date,
|
||||||
|
'uploader_id': video_uploader_id,
|
||||||
|
'view_count': video_view_count,
|
||||||
|
'webpage_url': video_webpage_url,
|
||||||
|
}
|
|
@ -21,6 +21,7 @@ class StreamcloudIE(InfoExtractor):
|
||||||
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
u'title': u'youtube-dl test video \'/\\ ä ↭',
|
||||||
u'duration': 9,
|
u'duration': 9,
|
||||||
},
|
},
|
||||||
|
u'skip': u'Only available from the EU'
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
91
youtube_dl/extractor/viki.py
Normal file
91
youtube_dl/extractor/viki.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
)
|
||||||
|
from .subtitles import SubtitlesInfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class VikiIE(SubtitlesInfoExtractor):
|
||||||
|
IE_NAME = u'viki'
|
||||||
|
|
||||||
|
_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||||
|
u'file': u'1023585v.mp4',
|
||||||
|
u'md5': u'a21454021c2646f5433514177e2caa5f',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Heirs Episode 14',
|
||||||
|
u'uploader': u'SBS',
|
||||||
|
u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||||
|
u'upload_date': u'20131121',
|
||||||
|
u'age_limit': 13,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
uploader = self._html_search_regex(
|
||||||
|
r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
|
||||||
|
u'uploader')
|
||||||
|
if uploader is not None:
|
||||||
|
uploader = uploader.strip()
|
||||||
|
|
||||||
|
rating_str = self._html_search_regex(
|
||||||
|
r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
|
||||||
|
u'rating information', default='').strip()
|
||||||
|
RATINGS = {
|
||||||
|
'G': 0,
|
||||||
|
'PG': 10,
|
||||||
|
'PG-13': 13,
|
||||||
|
'R': 16,
|
||||||
|
'NC': 18,
|
||||||
|
}
|
||||||
|
age_limit = RATINGS.get(rating_str)
|
||||||
|
|
||||||
|
info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
|
||||||
|
info_webpage = self._download_webpage(info_url, video_id)
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
|
||||||
|
|
||||||
|
upload_date_str = self._html_search_regex(
|
||||||
|
r'"created_at":"([^"]+)"', info_webpage, u'upload date')
|
||||||
|
upload_date = (
|
||||||
|
unified_strdate(upload_date_str)
|
||||||
|
if upload_date_str is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# subtitles
|
||||||
|
video_subtitles = self.extract_subtitles(video_id, info_webpage)
|
||||||
|
if self._downloader.params.get('listsubtitles', False):
|
||||||
|
self._list_available_subtitles(video_id, info_webpage)
|
||||||
|
return
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'uploader': uploader,
|
||||||
|
'subtitles': video_subtitles,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_available_subtitles(self, video_id, info_webpage):
|
||||||
|
res = {}
|
||||||
|
for sturl in re.findall(r'<track src="([^"]+)"/>'):
|
||||||
|
m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
|
||||||
|
if not m:
|
||||||
|
continue
|
||||||
|
res[m.group('lang')] = sturl
|
||||||
|
return res
|
|
@ -1510,7 +1510,7 @@ def _real_extract(self, url):
|
||||||
})
|
})
|
||||||
return results
|
return results
|
||||||
|
|
||||||
class YoutubePlaylistIE(InfoExtractor):
|
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com playlists'
|
IE_DESC = u'YouTube.com playlists'
|
||||||
_VALID_URL = r"""(?:
|
_VALID_URL = r"""(?:
|
||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
|
@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
|
|
|
|
||||||
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
|
||||||
)"""
|
)"""
|
||||||
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
|
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
|
||||||
_MAX_RESULTS = 50
|
_MORE_PAGES_INDICATOR = r'data-link-type="next"'
|
||||||
|
_VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'
|
||||||
IE_NAME = u'youtube:playlist'
|
IE_NAME = u'youtube:playlist'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -1535,6 +1536,9 @@ def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._login()
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# Extract playlist id
|
# Extract playlist id
|
||||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||||
|
@ -1548,45 +1552,28 @@ def _real_extract(self, url):
|
||||||
video_id = query_dict['v'][0]
|
video_id = query_dict['v'][0]
|
||||||
if self._downloader.params.get('noplaylist'):
|
if self._downloader.params.get('noplaylist'):
|
||||||
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
|
||||||
return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
|
return self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
else:
|
else:
|
||||||
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||||
|
|
||||||
# Download playlist videos from API
|
# Extract the video ids from the playlist pages
|
||||||
videos = []
|
ids = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
start_index = self._MAX_RESULTS * (page_num - 1) + 1
|
url = self._TEMPLATE_URL % (playlist_id, page_num)
|
||||||
if start_index >= 1000:
|
|
||||||
self._downloader.report_warning(u'Max number of results reached')
|
|
||||||
break
|
|
||||||
url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
|
|
||||||
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
|
||||||
|
# The ids are duplicated
|
||||||
|
new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
|
||||||
|
ids.extend(new_ids)
|
||||||
|
|
||||||
try:
|
if re.search(self._MORE_PAGES_INDICATOR, page) is None:
|
||||||
response = json.loads(page)
|
|
||||||
except ValueError as err:
|
|
||||||
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
|
|
||||||
|
|
||||||
if 'feed' not in response:
|
|
||||||
raise ExtractorError(u'Got a malformed response from YouTube API')
|
|
||||||
playlist_title = response['feed']['title']['$t']
|
|
||||||
if 'entry' not in response['feed']:
|
|
||||||
# Number of videos is a multiple of self._MAX_RESULTS
|
|
||||||
break
|
break
|
||||||
|
|
||||||
for entry in response['feed']['entry']:
|
playlist_title = self._og_search_title(page)
|
||||||
index = entry['yt$position']['$t']
|
|
||||||
if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
|
|
||||||
videos.append((
|
|
||||||
index,
|
|
||||||
'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
|
|
||||||
))
|
|
||||||
|
|
||||||
videos = [v[1] for v in sorted(videos)]
|
url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||||
|
for vid_id in ids]
|
||||||
url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
|
return self.playlist_result(url_results, playlist_id, playlist_title)
|
||||||
return [self.playlist_result(url_results, playlist_id, playlist_title)]
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeChannelIE(InfoExtractor):
|
class YoutubeChannelIE(InfoExtractor):
|
||||||
|
@ -1640,9 +1627,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
|
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
|
for video_id in video_ids]
|
||||||
return [self.playlist_result(url_entries, channel_id)]
|
return self.playlist_result(url_entries, channel_id)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
|
@ -1706,9 +1693,11 @@ def _real_extract(self, url):
|
||||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
|
url_results = [
|
||||||
url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
return [self.playlist_result(url_results, playlist_title = username)]
|
for video_id in video_ids]
|
||||||
|
return self.playlist_result(url_results, playlist_title=username)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor):
|
||||||
IE_DESC = u'YouTube.com searches'
|
IE_DESC = u'YouTube.com searches'
|
||||||
|
@ -1749,7 +1738,8 @@ def _get_n_results(self, query, n):
|
||||||
|
|
||||||
if len(video_ids) > n:
|
if len(video_ids) > n:
|
||||||
video_ids = video_ids[:n]
|
video_ids = video_ids[:n]
|
||||||
videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
|
videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in video_ids]
|
||||||
return self.playlist_result(videos, query)
|
return self.playlist_result(videos, query)
|
||||||
|
|
||||||
class YoutubeSearchDateIE(YoutubeSearchIE):
|
class YoutubeSearchDateIE(YoutubeSearchIE):
|
||||||
|
@ -1809,7 +1799,9 @@ def _real_extract(self, url):
|
||||||
feed_html = info['feed_html']
|
feed_html = info['feed_html']
|
||||||
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
|
||||||
ids = orderedSet(m.group(1) for m in m_ids)
|
ids = orderedSet(m.group(1) for m in m_ids)
|
||||||
feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
|
feed_entries.extend(
|
||||||
|
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||||
|
for video_id in ids)
|
||||||
if info['paging'] is None:
|
if info['paging'] is None:
|
||||||
break
|
break
|
||||||
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
|
||||||
|
@ -1834,6 +1826,20 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
|
||||||
_PAGING_STEP = 100
|
_PAGING_STEP = 100
|
||||||
_PERSONAL_FEED = True
|
_PERSONAL_FEED = True
|
||||||
|
|
||||||
|
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
||||||
|
IE_DESC = u'Youtube watch history, "ythistory" keyword (requires authentication)'
|
||||||
|
_VALID_URL = u'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||||
|
_FEED_NAME = 'history'
|
||||||
|
_PERSONAL_FEED = True
|
||||||
|
_PLAYLIST_TITLE = u'Youtube Watch History'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
|
||||||
|
data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
|
||||||
|
# The step is actually a ridiculously big number (like 1374343569725646)
|
||||||
|
self._PAGING_STEP = int(data_paging)
|
||||||
|
return super(YoutubeHistoryIE, self)._real_extract(url)
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||||
IE_NAME = u'youtube:favorites'
|
IE_NAME = u'youtube:favorites'
|
||||||
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
|
||||||
|
|
|
@ -41,6 +41,7 @@ def b(x):
|
||||||
if signature != sha256(message).digest(): return False
|
if signature != sha256(message).digest(): return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def update_self(to_screen, verbose):
|
def update_self(to_screen, verbose):
|
||||||
"""Update the program file with the latest version from the repository"""
|
"""Update the program file with the latest version from the repository"""
|
||||||
|
|
||||||
|
@ -82,6 +83,13 @@ def update_self(to_screen, verbose):
|
||||||
return
|
return
|
||||||
|
|
||||||
version_id = versions_info['latest']
|
version_id = versions_info['latest']
|
||||||
|
|
||||||
|
def version_tuple(version_str):
|
||||||
|
return tuple(map(int, version_str.split('.')))
|
||||||
|
if version_tuple(__version__) >= version_tuple(version_id):
|
||||||
|
to_screen(u'youtube-dl is up to date (%s)' % __version__)
|
||||||
|
return
|
||||||
|
|
||||||
to_screen(u'Updating to version ' + version_id + '...')
|
to_screen(u'Updating to version ' + version_id + '...')
|
||||||
version = versions_info['versions'][version_id]
|
version = versions_info['versions'][version_id]
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
import pipes
|
import pipes
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import ssl
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -536,12 +537,29 @@ def formatSeconds(secs):
|
||||||
return '%d' % secs
|
return '%d' % secs
|
||||||
|
|
||||||
def make_HTTPS_handler(opts_no_check_certificate):
|
def make_HTTPS_handler(opts_no_check_certificate):
|
||||||
if sys.version_info < (3,2):
|
if sys.version_info < (3, 2):
|
||||||
# Python's 2.x handler is very simplistic
|
import httplib
|
||||||
return compat_urllib_request.HTTPSHandler()
|
|
||||||
|
class HTTPSConnectionV3(httplib.HTTPSConnection):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
httplib.HTTPSConnection.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
sock = socket.create_connection((self.host, self.port), self.timeout)
|
||||||
|
if self._tunnel_host:
|
||||||
|
self.sock = sock
|
||||||
|
self._tunnel()
|
||||||
|
try:
|
||||||
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
|
||||||
|
except ssl.SSLError as e:
|
||||||
|
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
|
||||||
|
|
||||||
|
class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
|
||||||
|
def https_open(self, req):
|
||||||
|
return self.do_open(HTTPSConnectionV3, req)
|
||||||
|
return HTTPSHandlerV3()
|
||||||
else:
|
else:
|
||||||
import ssl
|
context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
|
||||||
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
|
||||||
context.set_default_verify_paths()
|
context.set_default_verify_paths()
|
||||||
|
|
||||||
context.verify_mode = (ssl.CERT_NONE
|
context.verify_mode = (ssl.CERT_NONE
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
|
|
||||||
__version__ = '2013.11.22'
|
__version__ = '2013.11.24.1'
|
||||||
|
|
Loading…
Reference in a new issue