[YoutubeDL] format spec: allow grouping specifiers with parentheses

This commit is contained in:
Jaime Marquínez Ferrándiz 2015-06-29 12:42:02 +02:00
parent 5acfa126c8
commit 0130afb76e
2 changed files with 61 additions and 2 deletions

View file

@ -245,6 +245,30 @@ def format_info(f_id):
self.assertEqual(downloaded['format_id'], '137+141') self.assertEqual(downloaded['format_id'], '137+141')
self.assertEqual(downloaded['ext'], 'mp4') self.assertEqual(downloaded['ext'], 'mp4')
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
yie = YoutubeIE(ydl)
yie._sort_formats(info_dict['formats'])
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['248+141'])
for f1, f2 in zip(formats_order, formats_order[1:]): for f1, f2 in zip(formats_order, formats_order[1:]):
info_dict = _make_result([f1, f2], extractor='youtube') info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL({'format': 'best/bestvideo'}) ydl = YDL({'format': 'best/bestvideo'})

View file

@ -920,6 +920,7 @@ def syntax_error(note, start):
PICKFIRST = 'PICKFIRST' PICKFIRST = 'PICKFIRST'
MERGE = 'MERGE' MERGE = 'MERGE'
SINGLE = 'SINGLE' SINGLE = 'SINGLE'
GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
def _parse_filter(tokens): def _parse_filter(tokens):
@ -942,6 +943,10 @@ def _parse_format_selection(tokens, endwith=[]):
elif type == tokenize.OP: elif type == tokenize.OP:
if string in endwith: if string in endwith:
break break
elif string == ')':
# ')' will be handled by the parentheses group
tokens.restore_last_token()
break
if string == ',': if string == ',':
selectors.append(current_selector) selectors.append(current_selector)
current_selector = None current_selector = None
@ -955,6 +960,10 @@ def _parse_format_selection(tokens, endwith=[]):
current_selector = FormatSelector(SINGLE, 'best', []) current_selector = FormatSelector(SINGLE, 'best', [])
format_filter = _parse_filter(tokens) format_filter = _parse_filter(tokens)
current_selector.filters.append(format_filter) current_selector.filters.append(format_filter)
elif string == '(':
if current_selector:
raise syntax_error('Unexpected "("', start)
current_selector = FormatSelector(GROUP, _parse_format_selection(tokens, [')']), [])
elif string == '+': elif string == '+':
video_selector = current_selector video_selector = current_selector
audio_selector = _parse_format_selection(tokens, [',']) audio_selector = _parse_format_selection(tokens, [','])
@ -977,6 +986,8 @@ def selector_function(formats):
for format in f(formats): for format in f(formats):
yield format yield format
return selector_function return selector_function
elif selector.type == GROUP:
selector_function = _build_selector_function(selector.selector)
elif selector.type == PICKFIRST: elif selector.type == PICKFIRST:
fs = [_build_selector_function(s) for s in selector.selector] fs = [_build_selector_function(s) for s in selector.selector]
@ -1084,8 +1095,32 @@ def final_selector(formats):
return final_selector return final_selector
stream = io.BytesIO(format_spec.encode('utf-8')) stream = io.BytesIO(format_spec.encode('utf-8'))
tokens = compat_tokenize_tokenize(stream.readline) try:
parsed_selector = _parse_format_selection(tokens) tokens = list(compat_tokenize_tokenize(stream.readline))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
class TokenIterator(object):
def __init__(self, tokens):
self.tokens = tokens
self.counter = 0
def __iter__(self):
return self
def __next__(self):
if self.counter >= len(self.tokens):
raise StopIteration()
value = self.tokens[self.counter]
self.counter += 1
return value
next = __next__
def restore_last_token(self):
self.counter -= 1
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector) return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict): def _calc_headers(self, info_dict):