2015-03-14 14:55:42 -04:00
|
|
|
from __future__ import unicode_literals
|
2015-03-04 16:33:56 -05:00
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
from .common import PostProcessor
|
|
|
|
|
|
|
|
|
|
|
|
class MetadataFromTitlePP(PostProcessor):
|
|
|
|
def __init__(self, downloader, titleformat):
|
2015-03-14 14:55:42 -04:00
|
|
|
super(MetadataFromTitlePP, self).__init__(downloader)
|
2015-03-04 16:33:56 -05:00
|
|
|
self._titleformat = titleformat
|
2017-05-13 13:03:15 -04:00
|
|
|
self._titleregex = (self.format_to_regex(titleformat)
|
|
|
|
if re.search(r'%\(\w+\)s', titleformat)
|
|
|
|
else titleformat)
|
2015-03-04 16:33:56 -05:00
|
|
|
|
2015-03-14 14:55:42 -04:00
|
|
|
def format_to_regex(self, fmt):
|
2017-01-02 07:08:07 -05:00
|
|
|
r"""
|
2015-03-04 16:33:56 -05:00
|
|
|
Converts a string like
|
|
|
|
'%(title)s - %(artist)s'
|
|
|
|
to a regex like
|
|
|
|
'(?P<title>.+)\ \-\ (?P<artist>.+)'
|
|
|
|
"""
|
|
|
|
lastpos = 0
|
2016-02-14 04:37:17 -05:00
|
|
|
regex = ''
|
2015-03-04 16:33:56 -05:00
|
|
|
# replace %(..)s with regex group and escape other string parts
|
|
|
|
for match in re.finditer(r'%\((\w+)\)s', fmt):
|
|
|
|
regex += re.escape(fmt[lastpos:match.start()])
|
|
|
|
regex += r'(?P<' + match.group(1) + '>.+)'
|
|
|
|
lastpos = match.end()
|
|
|
|
if lastpos < len(fmt):
|
2017-04-12 15:38:43 -04:00
|
|
|
regex += re.escape(fmt[lastpos:])
|
2015-03-04 16:33:56 -05:00
|
|
|
return regex
|
|
|
|
|
|
|
|
def run(self, info):
|
|
|
|
title = info['title']
|
|
|
|
match = re.match(self._titleregex, title)
|
|
|
|
if match is None:
|
2021-01-07 14:28:41 -05:00
|
|
|
self.to_screen('Could not interpret title of video as "%s"' % self._titleformat)
|
2016-08-05 19:21:39 -04:00
|
|
|
return [], info
|
2015-03-04 16:33:56 -05:00
|
|
|
for attribute, value in match.groupdict().items():
|
|
|
|
info[attribute] = value
|
2021-01-07 14:28:41 -05:00
|
|
|
self.to_screen('parsed %s: %s' % (attribute, value if value is not None else 'NA'))
|
2015-03-04 16:33:56 -05:00
|
|
|
|
2015-04-18 05:36:42 -04:00
|
|
|
return [], info
|