[utils] Add join_nonempty

This commit is contained in:
pukkandan 2021-11-06 06:35:24 +05:30
parent a331949df3
commit 34921b4345
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
24 changed files with 82 additions and 131 deletions

View file

@ -74,6 +74,7 @@
int_or_none,
iri_to_uri,
ISO3166Utils,
join_nonempty,
LazyList,
LINK_TEMPLATES,
locked_file,
@ -1169,7 +1170,7 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
sub_ext = ''
if len(fn_groups) > 2:
sub_ext = fn_groups[-2]
filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
return filename
except ValueError as err:
@ -3221,12 +3222,12 @@ def list_formats(self, info_dict):
format_field(f, 'acodec', default='unknown').replace('none', ''),
format_field(f, 'abr', f'%{abr_digits}dk'),
format_field(f, 'asr', '%5dHz'),
', '.join(filter(None, (
self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
join_nonempty(
self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
format_field(f, 'language', '[%s]'),
format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))),
))),
delim=', '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers(
'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO',

View file

@ -9,6 +9,7 @@
float_or_none,
int_or_none,
ISO639Utils,
join_nonempty,
OnDemandPagedList,
parse_duration,
str_or_none,
@ -263,7 +264,7 @@ def _real_extract(self, url):
continue
formats.append({
'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
'format_id': join_nonempty(source.get('format'), source.get('label')),
'height': int_or_none(source.get('height') or None),
'tbr': int_or_none(source.get('bitrate') or None),
'width': int_or_none(source.get('width') or None),

View file

@ -8,6 +8,7 @@
determine_ext,
extract_attributes,
ExtractorError,
join_nonempty,
url_or_none,
urlencode_postdata,
urljoin,
@ -140,15 +141,8 @@ def extract_info(html, video_id, num=None):
kind = self._search_regex(
r'videomaterialurl/\d+/([^/]+)/',
playlist_url, 'media kind', default=None)
format_id_list = []
if lang:
format_id_list.append(lang)
if kind:
format_id_list.append(kind)
if not format_id_list and num is not None:
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
format_id = join_nonempty(lang, kind) if lang or kind else str(num)
format_note = join_nonempty(kind, lang_note, delim=', ')
item_id_list = []
if format_id:
item_id_list.append(format_id)
@ -195,12 +189,10 @@ def extract_info(html, video_id, num=None):
if not file_:
continue
ext = determine_ext(file_)
format_id_list = [lang, kind]
if ext == 'm3u8':
format_id_list.append('hls')
elif source.get('type') == 'video/dash' or ext == 'mpd':
format_id_list.append('dash')
format_id = '-'.join(filter(None, format_id_list))
format_id = join_nonempty(
lang, kind,
'hls' if ext == 'm3u8' else None,
'dash' if source.get('type') == 'video/dash' or ext == 'mpd' else None)
if ext == 'm3u8':
file_formats = self._extract_m3u8_formats(
file_, video_id, 'mp4',

View file

@ -16,6 +16,7 @@
determine_ext,
intlist_to_bytes,
int_or_none,
join_nonempty,
strip_jsonp,
unescapeHTML,
unsmuggle_url,
@ -303,13 +304,13 @@ def _get_anvato_videos(self, access_key, video_id):
tbr = int_or_none(published_url.get('kbps'))
a_format = {
'url': video_url,
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
'tbr': tbr if tbr != 0 else None,
'format_id': join_nonempty('http', published_url.get('cdn_name')).lower(),
'tbr': tbr or None,
}
if media_format == 'm3u8' and tbr is not None:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'format_id': join_nonempty('hls', tbr),
'ext': 'mp4',
})
elif media_format == 'm3u8-variant' or ext == 'm3u8':

View file

@ -54,6 +54,7 @@
GeoRestrictedError,
GeoUtils,
int_or_none,
join_nonempty,
js_to_json,
JSON_LD_RE,
mimetype2ext,
@ -1911,7 +1912,7 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
tbr = int_or_none(media_el.attrib.get('bitrate'))
width = int_or_none(media_el.attrib.get('width'))
height = int_or_none(media_el.attrib.get('height'))
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
format_id = join_nonempty(f4m_id, tbr or i)
# If <bootstrapInfo> is present, the specified f4m is a
# stream-level manifest, and only set-level manifests may refer to
# external resources. See section 11.4 and section 4 of F4M spec
@ -1973,7 +1974,7 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, quality=None, m3u8_id=None):
return {
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'format_id': join_nonempty(m3u8_id, 'meta'),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
@ -2068,7 +2069,7 @@ def _extract_m3u8_playlist_indices(*args, **kwargs):
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
formats = [{
'format_id': '-'.join(map(str, filter(None, [m3u8_id, idx]))),
'format_id': join_nonempty(m3u8_id, idx),
'format_index': idx,
'url': m3u8_url,
'ext': ext,
@ -2117,7 +2118,7 @@ def extract_media(x_media_line):
if media_url:
manifest_url = format_url(media_url)
formats.extend({
'format_id': '-'.join(map(str, filter(None, (m3u8_id, group_id, name, idx)))),
'format_id': join_nonempty(m3u8_id, group_id, name, idx),
'format_note': name,
'format_index': idx,
'url': manifest_url,
@ -2174,9 +2175,9 @@ def build_stream_name():
# format_id intact.
if not live:
stream_name = build_stream_name()
format_id[1] = stream_name if stream_name else '%d' % (tbr if tbr else len(formats))
format_id[1] = stream_name or '%d' % (tbr or len(formats))
f = {
'format_id': '-'.join(map(str, filter(None, format_id))),
'format_id': join_nonempty(*format_id),
'format_index': idx,
'url': manifest_url,
'manifest_url': m3u8_url,
@ -2965,13 +2966,6 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
})
fragment_ctx['time'] += fragment_ctx['duration']
format_id = []
if ism_id:
format_id.append(ism_id)
if stream_name:
format_id.append(stream_name)
format_id.append(compat_str(tbr))
if stream_type == 'text':
subtitles.setdefault(stream_language, []).append({
'ext': 'ismt',
@ -2990,7 +2984,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
})
elif stream_type in ('video', 'audio'):
formats.append({
'format_id': '-'.join(format_id),
'format_id': join_nonempty(ism_id, stream_name, tbr),
'url': ism_url,
'manifest_url': ism_url,
'ext': 'ismv' if stream_type == 'video' else 'isma',

View file

@ -7,8 +7,8 @@
from ..utils import (
int_or_none,
unified_strdate,
compat_str,
determine_ext,
join_nonempty,
update_url_query,
)
@ -119,18 +119,13 @@ def _real_extract(self, url):
continue
formats.append(f)
continue
format_id = []
if flavor_format:
format_id.append(flavor_format)
if tbr:
format_id.append(compat_str(tbr))
ext = determine_ext(flavor_url)
if flavor_format == 'applehttp' or ext == 'm3u8':
ext = 'mp4'
width = int_or_none(flavor.get('width'))
height = int_or_none(flavor.get('height'))
formats.append({
'format_id': '-'.join(format_id),
'format_id': join_nonempty(flavor_format, tbr),
'url': flavor_url,
'width': width,
'height': height,

View file

@ -8,6 +8,7 @@
determine_ext,
ExtractorError,
int_or_none,
join_nonempty,
js_to_json,
mimetype2ext,
try_get,
@ -139,13 +140,9 @@ def _parse_video_metadata(self, js, video_id, timestamp):
label = video.get('label')
height = self._search_regex(
r'^(\d+)[pP]', label or '', 'height', default=None)
format_id = ['http']
for f in (ext, label):
if f:
format_id.append(f)
formats.append({
'url': video_url,
'format_id': '-'.join(format_id),
'format_id': join_nonempty('http', ext, label),
'height': int_or_none(height),
})
self._sort_formats(formats)

View file

@ -10,6 +10,7 @@
from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
js_to_json,
orderedSet,
qualities,
@ -288,10 +289,11 @@ def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_n
sub_type = sub_type if sub_type != 'FULL' else None
current_sub = {
'url': text_track['src'],
'name': ' '.join(filter(None, (version, text_track.get('label'), sub_type)))
'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' ')
}
lang = '_'.join(filter(None, (
text_track.get('language', 'und'), version if version != 'Simulcast' else None, sub_type)))
lang = join_nonempty(text_track.get('language', 'und'),
version if version != 'Simulcast' else None,
sub_type, delim='_')
if current_sub not in subtitles.get(lang, []):
subtitles.setdefault(lang, []).append(current_sub)
return subtitles

View file

@ -8,6 +8,7 @@
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
qualities,
)
@ -102,12 +103,8 @@ def _real_extract(self, url):
m3u8_id=video_source_format, fatal=False))
else:
video_source_quality = video_source.get('Quality')
format_id = []
for v in (video_source_format, video_source_quality):
if v:
format_id.append(v)
f = {
'format_id': '-'.join(format_id),
'format_id': join_nonempty(video_source_format, video_source_quality),
'quality': q(video_source_quality),
'url': video_source_url,
}

View file

@ -2,13 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
join_nonempty,
parse_duration,
parse_iso8601,
url_or_none,
@ -148,13 +146,9 @@ def _real_extract(self, url):
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
format_id = [media_type]
if vbr or abr:
format_id.append(compat_str(vbr or abr))
f = {
'url': video_url,
'format_id': '-'.join(format_id),
'format_id': join_nonempty(media_type, vbr or abr),
'filesize': filesize,
'abr': abr,
'vbr': vbr,

View file

@ -15,6 +15,7 @@
float_or_none,
HEADRequest,
int_or_none,
join_nonempty,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@ -99,9 +100,9 @@ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
formats.extend([{
'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
'url': rtmp_video_url,
'format_id': '-'.join(filter(None, [
'format_id': join_nonempty(
'rtmp' if rtmp_video_url.startswith('rtmp') else None,
rendition.get('bitrate')])),
rendition.get('bitrate')),
'width': int(rendition.get('width')),
'height': int(rendition.get('height')),
}])

View file

@ -11,6 +11,7 @@
float_or_none,
HEADRequest,
int_or_none,
join_nonempty,
orderedSet,
remove_end,
str_or_none,
@ -82,12 +83,7 @@ def _real_extract(self, url):
src = url_or_none(fd.get('src'))
if not src:
continue
format_id_list = []
for key in ('delivery', 'quality', 'quality_string'):
value = fd.get(key)
if value:
format_id_list.append(value)
format_id = '-'.join(format_id_list)
format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd)
ext = determine_ext(src)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(

View file

@ -4,11 +4,11 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
dict_get,
ExtractorError,
int_or_none,
join_nonempty,
parse_iso8601,
try_get,
unescapeHTML,
@ -116,12 +116,8 @@ def process_asset_file(asset_file):
elif asset_type == 'audio':
tbr = abr
format_id = ['http']
if tbr:
format_id.append(compat_str(tbr))
formats.append({
'format_id': '-'.join(format_id),
'format_id': join_nonempty('http', tbr),
'url': unescapeHTML(http_url),
'vbr': vbr,
'abr': abr,

View file

@ -7,6 +7,7 @@
ExtractorError,
float_or_none,
int_or_none,
join_nonempty,
parse_iso8601,
qualities,
try_get,
@ -94,11 +95,7 @@ def _real_extract(self, url):
continue
protocol = source.get('protocol')
quality = source.get('quality')
format_id = []
for e in (protocol, source.get('encoding'), quality):
if e:
format_id.append(e)
format_id = '-'.join(format_id)
format_id = join_nonempty(protocol, source.get('encoding'), quality)
if protocol in ('HDS', 'HLS'):
if source.get('tokenType') == 'AKAMAI':

View file

@ -9,6 +9,7 @@
ExtractorError,
float_or_none,
int_or_none,
join_nonempty,
parse_iso8601,
)
@ -119,24 +120,16 @@ def _real_extract(self, url):
src = s.get('src')
if not (src and self._is_valid_url(src, video_id)):
continue
width = None
format_id = ['http']
ext = determine_ext(src)
if ext:
format_id.append(ext)
height = int_or_none(s.get('height'))
if height:
format_id.append('%dp' % height)
if aspect:
width = int(height * aspect)
formats.append({
'ext': ext,
'format_id': '-'.join(format_id),
'format_id': join_nonempty('http', ext, height and '%dp' % height),
'height': height,
'source_preference': 0,
'url': src,
'vcodec': 'none' if height == 0 else None,
'width': width,
'width': int(height * aspect) if height and aspect else None,
})
# It seems like this would be correctly handled by default
# However, unless someone can confirm this, the old

View file

@ -12,6 +12,7 @@
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
str_or_none,
traverse_obj,
try_get,
@ -107,8 +108,8 @@ def extract_addr(addr, add_meta={}):
'acodec': 'aac',
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
**add_meta, **parsed_meta,
'format_note': ' '.join(filter(None, (
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else '')))
'format_note': join_nonempty(
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' ')
} for url in addr.get('url_list') or []]
# Hack: Add direct video links first to prioritize them when removing duplicate formats

View file

@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
from ..utils import int_or_none, join_nonempty
class TOnlineIE(InfoExtractor):
@ -30,13 +30,8 @@ def _real_extract(self, url):
asset_source = asset.get('source') or asset.get('source2')
if not asset_source:
continue
formats_id = []
for field_key in ('type', 'profile'):
field_value = asset.get(field_key)
if field_value:
formats_id.append(field_value)
formats.append({
'format_id': '-'.join(formats_id),
'format_id': join_nonempty('type', 'profile', from_dict=asset),
'url': asset_source,
})

View file

@ -13,6 +13,7 @@
ExtractorError,
int_or_none,
float_or_none,
join_nonempty,
mimetype2ext,
str_or_none,
)
@ -139,8 +140,8 @@ def resolve_dash_template(template, idx, chunk_hash):
content_type = stream['contentType']
kind = content_type.split('/')[0]
f = {
'format_id': '-'.join(filter(None, [
'dash', kind, str_or_none(stream.get('bitrate'))])),
'format_id': join_nonempty(
'dash', kind, str_or_none(stream.get('bitrate'))),
'protocol': 'http_dash_segments',
# TODO: generate a MPD doc for external players?
'url': encode_data_uri(b'<MPD/>', 'text/xml'),

View file

@ -19,6 +19,7 @@
ExtractorError,
float_or_none,
int_or_none,
join_nonempty,
traverse_obj,
)
@ -141,14 +142,10 @@ def _real_initialize(self):
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
return []
stream_id_list = []
if audio_lang:
stream_id_list.append('audio-%s' % audio_lang)
if hardsub_lang:
stream_id_list.append('hardsub-%s' % hardsub_lang)
format_id = stream_format
if stream_id_list:
format_id += '-' + '-'.join(stream_id_list)
format_id = join_nonempty(
stream_format,
audio_lang and 'audio-%s' % audio_lang,
hardsub_lang and 'hardsub-%s' % hardsub_lang)
if 'hls' in stream_format:
adaptive_formats = self._extract_m3u8_formats(
url, video_id, 'mp4', m3u8_id=format_id,

View file

@ -6,6 +6,7 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
join_nonempty,
xpath_text,
)
@ -34,12 +35,9 @@ def _real_extract(self, url):
title = xpath_text(video, './/event_name', 'event name', fatal=True)
def make_id(parts, separator):
return separator.join(filter(None, parts))
formats = []
for format_id in (None, 'noise'):
track_tag = make_id(('track', format_id), '_')
track_tag = join_nonempty('track', format_id, delim='_')
for track in video.findall('.//iphone/%s' % track_tag):
track_url = track.text
if not track_url:
@ -48,7 +46,7 @@ def make_id(parts, separator):
m3u8_formats = self._extract_m3u8_formats(
track_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id=make_id(('hls', format_id), '-'), fatal=False)
m3u8_id=join_nonempty('hls', format_id, delim='-'), fatal=False)
for f in m3u8_formats:
f.update({
'source_preference': 0 if format_id == 'noise' else 1,

View file

@ -39,6 +39,7 @@
int_or_none,
intlist_to_bytes,
is_html,
join_nonempty,
mimetype2ext,
network_exceptions,
orderedSet,
@ -2507,11 +2508,11 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live):
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
'format_note': ', '.join(filter(None, (
'format_note': join_nonempty(
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if audio_track.get('audioIsDefault') else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED'))),
throttled and 'THROTTLED', delim=', '),
'source_preference': -10 if throttled else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,

View file

@ -12,6 +12,7 @@
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
try_get,
url_or_none,
urlencode_postdata,
@ -156,15 +157,9 @@ def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
watch_url = url_or_none(watch.get('url'))
if not watch_url:
continue
format_id_list = [stream_type]
maxrate = watch.get('maxrate')
if maxrate:
format_id_list.append(compat_str(maxrate))
audio_channel = watch.get('audio_channel')
if audio_channel:
format_id_list.append(compat_str(audio_channel))
preference = 1 if audio_channel == 'A' else None
format_id = '-'.join(format_id_list)
format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
this_formats = self._extract_mpd_formats(
watch_url, video_id, mpd_id=format_id, fatal=False)

View file

@ -9,12 +9,12 @@
determine_ext,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
NO_DEFAULT,
orderedSet,
parse_codecs,
qualities,
str_or_none,
try_get,
unified_timestamp,
update_url_query,
@ -70,11 +70,11 @@ def _extract_format(self, video_id, formats, format_urls, meta):
f = {'vcodec': data[0], 'acodec': data[1]}
f.update({
'url': format_url,
'format_id': '-'.join(filter(str_or_none, ('http', meta.get('type'), meta.get('quality')))),
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
})
new_formats = [f]
formats.extend(merge_dicts(f, {
'format_note': ', '.join(filter(None, (meta.get('quality'), meta.get('class')))),
'format_note': join_nonempty('quality', 'class', from_dict=meta, delim=', '),
'language': meta.get('language'),
'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
'quality': qualities(self._QUALITIES)(meta.get('quality')),

View file

@ -6570,3 +6570,9 @@ def remove_terminal_sequences(string):
def number_of_digits(number):
return len('%d' % number)
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
values = operator.itemgetter(values)(from_dict)
return delim.join(map(str, filter(None, values)))