2021-08-09 15:52:55 -04:00
|
|
|
import re
|
|
|
|
|
|
|
|
from .common import PostProcessor
|
2023-02-17 07:22:22 -05:00
|
|
|
from ..utils import Namespace, filter_dict, function_with_repr
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
|
|
|
|
class MetadataParserPP(PostProcessor):
|
|
|
|
def __init__(self, downloader, actions):
|
2022-04-18 17:27:20 -04:00
|
|
|
super().__init__(downloader)
|
2021-08-09 15:52:55 -04:00
|
|
|
self._actions = []
|
|
|
|
for f in actions:
|
2022-04-17 16:58:28 -04:00
|
|
|
action, *args = f
|
|
|
|
assert action in self.Actions
|
2022-04-18 17:27:20 -04:00
|
|
|
self._actions.append(action(self, *args))
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def validate_action(cls, action, *data):
|
2022-04-17 16:58:28 -04:00
|
|
|
"""Each action can be:
|
2021-08-09 15:52:55 -04:00
|
|
|
(Actions.INTERPRET, from, to) OR
|
|
|
|
(Actions.REPLACE, field, search, replace)
|
2022-04-17 16:58:28 -04:00
|
|
|
"""
|
|
|
|
if action not in cls.Actions:
|
2021-08-09 15:52:55 -04:00
|
|
|
raise ValueError(f'{action!r} is not a valid action')
|
2022-04-18 17:27:20 -04:00
|
|
|
action(cls, *data) # So this can raise error to validate
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def field_to_template(tmpl):
|
|
|
|
if re.match(r'[a-zA-Z_]+$', tmpl):
|
|
|
|
return f'%({tmpl})s'
|
2022-03-11 03:58:27 -05:00
|
|
|
|
|
|
|
from ..YoutubeDL import YoutubeDL
|
|
|
|
err = YoutubeDL.validate_outtmpl(tmpl)
|
|
|
|
if err:
|
|
|
|
raise err
|
2021-08-09 15:52:55 -04:00
|
|
|
return tmpl
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def format_to_regex(fmt):
|
|
|
|
r"""
|
|
|
|
Converts a string like
|
|
|
|
'%(title)s - %(artist)s'
|
|
|
|
to a regex like
|
|
|
|
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
|
|
|
"""
|
|
|
|
if not re.search(r'%\(\w+\)s', fmt):
|
|
|
|
return fmt
|
|
|
|
lastpos = 0
|
|
|
|
regex = ''
|
|
|
|
# replace %(..)s with regex group and escape other string parts
|
|
|
|
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
|
|
|
regex += re.escape(fmt[lastpos:match.start()])
|
|
|
|
regex += rf'(?P<{match.group(1)}>.+)'
|
|
|
|
lastpos = match.end()
|
|
|
|
if lastpos < len(fmt):
|
|
|
|
regex += re.escape(fmt[lastpos:])
|
|
|
|
return regex
|
|
|
|
|
|
|
|
def run(self, info):
|
|
|
|
for f in self._actions:
|
|
|
|
f(info)
|
|
|
|
return [], info
|
|
|
|
|
2023-02-17 07:22:22 -05:00
|
|
|
@function_with_repr
|
2021-08-09 15:52:55 -04:00
|
|
|
def interpretter(self, inp, out):
|
|
|
|
def f(info):
|
2021-10-08 15:11:59 -04:00
|
|
|
data_to_parse = self._downloader.evaluate_outtmpl(template, info)
|
2021-08-11 04:12:23 -04:00
|
|
|
self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
|
2021-08-09 15:52:55 -04:00
|
|
|
match = out_re.search(data_to_parse)
|
|
|
|
if match is None:
|
2022-01-23 14:51:39 -05:00
|
|
|
self.to_screen(f'Could not interpret {inp!r} as {out!r}')
|
2021-08-09 15:52:55 -04:00
|
|
|
return
|
2022-07-29 08:44:06 -04:00
|
|
|
for attribute, value in filter_dict(match.groupdict()).items():
|
2021-08-09 15:52:55 -04:00
|
|
|
info[attribute] = value
|
2022-07-29 08:44:06 -04:00
|
|
|
self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}')
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
template = self.field_to_template(inp)
|
|
|
|
out_re = re.compile(self.format_to_regex(out))
|
|
|
|
return f
|
|
|
|
|
2023-02-17 07:22:22 -05:00
|
|
|
@function_with_repr
|
2021-08-09 15:52:55 -04:00
|
|
|
def replacer(self, field, search, replace):
|
|
|
|
def f(info):
|
|
|
|
val = info.get(field)
|
|
|
|
if val is None:
|
2022-01-23 14:51:39 -05:00
|
|
|
self.to_screen(f'Video does not have a {field}')
|
2021-08-09 15:52:55 -04:00
|
|
|
return
|
|
|
|
elif not isinstance(val, str):
|
|
|
|
self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
|
|
|
|
return
|
2021-08-11 04:12:23 -04:00
|
|
|
self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}')
|
2021-08-09 15:52:55 -04:00
|
|
|
info[field], n = search_re.subn(replace, val)
|
|
|
|
if n:
|
|
|
|
self.to_screen(f'Changed {field} to: {info[field]}')
|
|
|
|
else:
|
2021-08-11 04:12:23 -04:00
|
|
|
self.to_screen(f'Did not find {search!r} in {field}')
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
search_re = re.compile(search)
|
|
|
|
return f
|
|
|
|
|
2022-04-17 16:58:28 -04:00
|
|
|
Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer)
|
|
|
|
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
class MetadataFromFieldPP(MetadataParserPP):
|
|
|
|
@classmethod
|
|
|
|
def to_action(cls, f):
|
2021-12-22 20:42:26 -05:00
|
|
|
match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
|
2021-08-09 15:52:55 -04:00
|
|
|
if match is None:
|
|
|
|
raise ValueError(f'it should be FROM:TO, not {f!r}')
|
|
|
|
return (
|
|
|
|
cls.Actions.INTERPRET,
|
|
|
|
match.group('in').replace('\\:', ':'),
|
2021-11-30 23:42:25 -05:00
|
|
|
match.group('out'),
|
|
|
|
)
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
def __init__(self, downloader, formats):
|
2021-11-30 23:42:25 -05:00
|
|
|
super().__init__(downloader, [self.to_action(f) for f in formats])
|
2021-08-09 15:52:55 -04:00
|
|
|
|
|
|
|
|
2021-11-29 12:46:06 -05:00
|
|
|
# Deprecated
|
|
|
|
class MetadataFromTitlePP(MetadataParserPP):
|
2021-08-09 15:52:55 -04:00
|
|
|
def __init__(self, downloader, titleformat):
|
2021-11-30 23:42:25 -05:00
|
|
|
super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
|
2021-11-29 12:46:06 -05:00
|
|
|
self.deprecation_warning(
|
|
|
|
'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated '
|
2021-11-30 23:42:25 -05:00
|
|
|
'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead')
|