Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly.
_sort_formats has been added to all the necessary places in code.

Closes #8051
This commit is contained in:
Sergey M․ 2016-03-27 07:03:08 +06:00
parent 395fd4b08a
commit 19dbaeece3
29 changed files with 56 additions and 21 deletions

View file

@ -44,6 +44,7 @@ def _real_extract(self, url):
'contentURL', webpage, 'm3u8 url', fatal=True) 'contentURL', webpage, 'm3u8 url', fatal=True)
formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4') formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
self._sort_formats(formats)
title = self._og_search_title(webpage).strip() title = self._og_search_title(webpage).strip()
description = self._og_search_description(webpage).strip() description = self._og_search_description(webpage).strip()

View file

@ -120,6 +120,7 @@ def _real_extract(self, url):
bc_info = self._download_json(req, user) bc_info = self._download_json(req, user)
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS') m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4') formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
self._sort_formats(formats)
return { return {
'id': info['id'], 'id': info['id'],

View file

@ -94,6 +94,7 @@ def _real_extract(self, url):
xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')
formats = self._extract_smil_formats(smil_url, display_id) formats = self._extract_smil_formats(smil_url, display_id)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -122,6 +122,7 @@ def _real_extract(self, url):
for entry in f4m_formats: for entry in f4m_formats:
# URLs without the extra param induce an 404 error # URLs without the extra param induce an 404 error
entry.update({'extra_param_to_segment_url': hdcore_sign}) entry.update({'extra_param_to_segment_url': hdcore_sign})
self._sort_formats(f4m_formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -48,6 +48,7 @@ def _real_extract(self, url):
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -1021,8 +1021,6 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
'height': int_or_none(media_el.attrib.get('height')), 'height': int_or_none(media_el.attrib.get('height')),
'preference': preference, 'preference': preference,
}) })
self._sort_formats(formats)
return formats return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
@ -1143,7 +1141,6 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
last_media = None last_media = None
formats.append(f) formats.append(f)
last_info = {} last_info = {}
self._sort_formats(formats)
return formats return formats
@staticmethod @staticmethod
@ -1317,8 +1314,6 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
}) })
continue continue
self._sort_formats(formats)
return formats return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@ -1536,7 +1531,6 @@ def extract_multisegment_info(element, ms_parent_info):
existing_format.update(f) existing_format.update(f)
else: else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
self._sort_formats(formats)
return formats return formats
def _live_title(self, name): def _live_title(self, name):

View file

@ -57,6 +57,7 @@ def _real_extract(self, url):
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4') video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
self._sort_formats(formats)
thumbnails = [{ thumbnails = [{
'url': image['uri'], 'url': image['uri'],

View file

@ -38,6 +38,7 @@ def _real_extract(self, url):
token_el = f4m_info.find('token') token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
formats = self._extract_f4m_formats(manifest_url, display_id) formats = self._extract_f4m_formats(manifest_url, display_id)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -63,18 +63,23 @@ def _real_extract(self, url):
video_title = info.get('playlist_title') or info.get('video_title') video_title = info.get('playlist_title') or info.get('video_title')
entries = [{ entries = []
'id': compat_str(video_info['id']),
'formats': self._extract_m3u8_formats( for idx, video_info in enumerate(info['playlist']):
formats = self._extract_m3u8_formats(
video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls', video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
note='Download m3u8 information for video %d' % (idx + 1)), note='Download m3u8 information for video %d' % (idx + 1))
'title': video_info['title'], self._sort_formats(formats)
'description': video_info.get('description'), entries.append({
'duration': parse_duration(video_info.get('video_length')), 'id': compat_str(video_info['id']),
'webpage_url': video_info.get('href') or video_info.get('url'), 'formats': formats,
'thumbnail': video_info.get('thumbnailURL'), 'title': video_info['title'],
'alt_title': video_info.get('secondary_title'), 'description': video_info.get('description'),
'timestamp': parse_iso8601(video_info.get('publishedDate')), 'duration': parse_duration(video_info.get('video_length')),
} for idx, video_info in enumerate(info['playlist'])] 'webpage_url': video_info.get('href') or video_info.get('url'),
'thumbnail': video_info.get('thumbnailURL'),
'alt_title': video_info.get('secondary_title'),
'timestamp': parse_iso8601(video_info.get('publishedDate')),
})
return self.playlist_result(entries, display_id, video_title) return self.playlist_result(entries, display_id, video_title)

View file

@ -118,6 +118,8 @@ def extract_formats(protocol, manifest_url):
if info.get(protocol): if info.get(protocol):
extract_formats(protocol, info[protocol]) extract_formats(protocol, info[protocol])
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,

View file

@ -39,13 +39,13 @@ def _real_extract(self, url):
hidden_inputs = self._hidden_inputs(webpage) hidden_inputs = self._hidden_inputs(webpage)
title = hidden_inputs['media_title'] title = hidden_inputs['media_title']
formats = []
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
formats = self._extract_smil_formats( formats = self._extract_smil_formats(
'http://www.dw.com/smil/v-%s' % media_id, media_id, 'http://www.dw.com/smil/v-%s' % media_id, media_id,
transform_source=lambda s: s.replace( transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/', 'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/')) 'http://tv-download.dw.de/dwtv_video/flv/'))
self._sort_formats(formats)
else: else:
formats = [{'url': hidden_inputs['file_name']}] formats = [{'url': hidden_inputs['file_name']}]

View file

@ -1310,6 +1310,7 @@ def _real_extract(self, url):
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
info_dict['direct'] = True info_dict['direct'] = True
self._sort_formats(formats)
info_dict['formats'] = formats info_dict['formats'] = formats
return info_dict return info_dict
@ -1336,6 +1337,7 @@ def _real_extract(self, url):
# Is it an M3U playlist? # Is it an M3U playlist?
if first_bytes.startswith(b'#EXTM3U'): if first_bytes.startswith(b'#EXTM3U'):
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4') info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
self._sort_formats(info_dict['formats'])
return info_dict return info_dict
# Maybe it's a direct link to a video? # Maybe it's a direct link to a video?
@ -1360,15 +1362,19 @@ def _real_extract(self, url):
if doc.tag == 'rss': if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc) return self._extract_rss(url, video_id, doc)
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
return self._parse_smil(doc, url, video_id) smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats( info_dict['formats'] = self._parse_mpd_formats(
doc, video_id, mpd_base_url=url.rpartition('/')[0]) doc, video_id, mpd_base_url=url.rpartition('/')[0])
self._sort_formats(info_dict['formats'])
return info_dict return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id) info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
self._sort_formats(info_dict['formats'])
return info_dict return info_dict
except compat_xml_parse_error: except compat_xml_parse_error:
pass pass
@ -2053,6 +2059,9 @@ def filter_video(urls):
else: else:
entry_info_dict['url'] = video_url entry_info_dict['url'] = video_url
if entry_info_dict.get('formats'):
self._sort_formats(entry_info_dict['formats'])
entries.append(entry_info_dict) entries.append(entry_info_dict)
if len(entries) == 1: if len(entries) == 1:

View file

@ -130,6 +130,7 @@ def _real_extract(self, url):
formats = self._extract_f4m_formats( formats = self._extract_f4m_formats(
'%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth), '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
video_id, f4m_id='hds') video_id, f4m_id='hds')
self._sort_formats(formats)
categories_str = _v('meta_sports') categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else [] categories = categories_str.split(',') if categories_str else []

View file

@ -37,6 +37,7 @@ def _real_extract(self, url):
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)', r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
webpage, 'm3u8 url', group='url') webpage, 'm3u8 url', group='url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage) thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)

View file

@ -47,6 +47,7 @@ def _real_extract(self, url):
video_url = self._download_json(request, video_id)['data']['videoUrl'] video_url = self._download_json(request, video_id)['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id) formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': self._live_title('Матч ТВ - Прямой эфир'), 'title': self._live_title('Матч ТВ - Прямой эфир'),

View file

@ -67,6 +67,7 @@ def _real_extract(self, url):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
display_id, f4m_id=loc)) display_id, f4m_id=loc))
self._sort_formats(formats)
title = self._search_regex( title = self._search_regex(
r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')

View file

@ -63,6 +63,7 @@ def _real_extract(self, url):
if determine_ext(media_url) == 'f4m': if determine_ext(media_url) == 'f4m':
formats = self._extract_f4m_formats( formats = self._extract_f4m_formats(
media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds') media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
self._sort_formats(formats)
else: else:
formats = [{ formats = [{
'url': media_url, 'url': media_url,

View file

@ -31,6 +31,7 @@ def _real_extract(self, url):
formats = self._extract_smil_formats( formats = self._extract_smil_formats(
'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id, 'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
video_id) video_id)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -49,6 +49,7 @@ def _real_extract(self, url):
# f4m_url = server + relative_url # f4m_url = server + relative_url
f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url'] f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
f4m_formats = self._extract_f4m_formats(f4m_url, video_id) f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(f4m_formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -209,6 +209,7 @@ def _real_extract(self, url):
png = self._download_webpage(png_url, video_id, 'Downloading url information') png = self._download_webpage(png_url, video_id, 'Downloading url information')
m3u8_url = _decrypt_url(png) m3u8_url = _decrypt_url(png)
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -38,6 +38,7 @@ def _real_extract(self, url):
item['file'], video_id, ext='mp4', entry_protocol='m3u8_native')) item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
elif item.get('type') == '': elif item.get('type') == '':
formats.append({'url': item['file']}) formats.append({'url': item['file']})
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -77,6 +77,7 @@ def _real_extract(self, url):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4') formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
self._sort_formats(formats)
video = self._download_json( video = self._download_json(
'%s/%s/%s?%s' % ( '%s/%s/%s?%s' % (

View file

@ -99,6 +99,7 @@ def _real_extract(self, url):
webpage, 'hls file') webpage, 'hls file')
formats = self._extract_m3u8_formats(hls, video_id, 'mp4') formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
self._sort_formats(formats)
title = self._search_regex( title = self._search_regex(
r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')

View file

@ -82,6 +82,7 @@ def _real_extract(self, url):
) )
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native') token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
self._sort_formats(formats)
return { return {
'id': embed_data['videoId'], 'id': embed_data['videoId'],

View file

@ -69,6 +69,7 @@ def _real_extract(self, url):
apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu') apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
m3u8_url = codecs.decode(apu, 'rot_13')[::-1] m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,

View file

@ -111,6 +111,7 @@ def _real_extract(self, url):
video_url = xpath_text(video, './/video_url', 'video url', fatal=True) video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
self._sort_formats(formats)
data = self._download_json( data = self._download_json(
'http://videomore.ru/video/tracks/%s.json' % video_id, 'http://videomore.ru/video/tracks/%s.json' % video_id,

View file

@ -50,6 +50,7 @@ def _real_extract(self, url):
playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
self._sort_formats(formats)
title = self._og_search_title(webpage, default=display_id) title = self._og_search_title(webpage, default=display_id)
description = self._og_search_description(webpage, default=None) description = self._og_search_description(webpage, default=None)

View file

@ -151,6 +151,7 @@ def extract_part(part_id):
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
smil = self._download_smil(smil_url, lecture_id) smil = self._download_smil(smil_url, lecture_id)
info = self._parse_smil(smil, smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id)
self._sort_formats(info['formats'])
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
if multipart: if multipart:

View file

@ -41,10 +41,12 @@ def _real_extract(self, url):
m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title) m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
if m: if m:
title = m.group('title') title = m.group('title')
formats = self._extract_f4m_formats(f4m_url, video_id)
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': self._extract_f4m_formats(f4m_url, video_id), 'formats': formats,
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }