comictagger/comictaggerlib/filerenamer.py
2022-04-20 13:13:03 -07:00

296 lines
12 KiB
Python

"""Functions for renaming files based on metadata"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import calendar
import datetime
import logging
import os
import re
import string
import sys
from pathvalidate import sanitize_filename
from comicapi.genericmetadata import GenericMetadata
from comicapi.issuestring import IssueString
logger = logging.getLogger(__name__)
class FileRenamer:
def __init__(self, metadata):
self.template = "%series% v%volume% #%issue% (of %issuecount%) (%year%)"
self.smart_cleanup = True
self.issue_zero_padding = 3
self.metadata = metadata
def set_metadata(self, metadata: GenericMetadata):
self.metadata = metadata
def set_issue_zero_padding(self, count):
self.issue_zero_padding = count
def set_smart_cleanup(self, on):
self.smart_cleanup = on
def set_template(self, template: str):
self.template = template
def replace_token(self, text, value, token):
# helper func
def is_token(word):
return word[0] == "%" and word[-1:] == "%"
if value is not None:
return text.replace(token, str(value))
if self.smart_cleanup:
# smart cleanup means we want to remove anything appended to token if it's empty (e.g "#%issue%" or "v%volume%")
# (TODO: This could fail if there is more than one token appended together, I guess)
text_list = text.split()
# special case for issuecount, remove preceding non-token word, as in "...(of %issuecount%)..."
if token == "%issuecount%":
for idx, word in enumerate(text_list):
if token in word and not is_token(text_list[idx - 1]):
text_list[idx - 1] = ""
text_list = [x for x in text_list if token not in x]
return " ".join(text_list)
return text.replace(token, "")
def determine_name(self, ext):
md = self.metadata
new_name = self.template
new_name = self.replace_token(new_name, md.series, "%series%")
new_name = self.replace_token(new_name, md.volume, "%volume%")
if md.issue is not None:
issue_str = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
else:
issue_str = None
new_name = self.replace_token(new_name, issue_str, "%issue%")
new_name = self.replace_token(new_name, md.issue_count, "%issuecount%")
new_name = self.replace_token(new_name, md.year, "%year%")
new_name = self.replace_token(new_name, md.publisher, "%publisher%")
new_name = self.replace_token(new_name, md.title, "%title%")
new_name = self.replace_token(new_name, md.month, "%month%")
month_name = None
if md.month is not None:
if (isinstance(md.month, str) and md.month.isdigit()) or isinstance(md.month, int):
if int(md.month) in range(1, 13):
dt = datetime.datetime(1970, int(md.month), 1, 0, 0)
month_name = dt.strftime("%B")
new_name = self.replace_token(new_name, month_name, "%month_name%")
new_name = self.replace_token(new_name, md.genre, "%genre%")
new_name = self.replace_token(new_name, md.language, "%language_code%")
new_name = self.replace_token(new_name, md.critical_rating, "%criticalrating%")
new_name = self.replace_token(new_name, md.alternate_series, "%alternateseries%")
new_name = self.replace_token(new_name, md.alternate_number, "%alternatenumber%")
new_name = self.replace_token(new_name, md.alternate_count, "%alternatecount%")
new_name = self.replace_token(new_name, md.imprint, "%imprint%")
new_name = self.replace_token(new_name, md.format, "%format%")
new_name = self.replace_token(new_name, md.maturity_rating, "%maturityrating%")
new_name = self.replace_token(new_name, md.story_arc, "%storyarc%")
new_name = self.replace_token(new_name, md.series_group, "%seriesgroup%")
new_name = self.replace_token(new_name, md.scan_info, "%scaninfo%")
if self.smart_cleanup:
# remove empty braces,brackets, parentheses
new_name = re.sub(r"\(\s*[-:]*\s*\)", "", new_name)
new_name = re.sub(r"\[\s*[-:]*\s*]", "", new_name)
new_name = re.sub(r"{\s*[-:]*\s*}", "", new_name)
# remove duplicate spaces
new_name = " ".join(new_name.split())
# remove remove duplicate -, _,
new_name = re.sub(r"[-_]{2,}\s+", "-- ", new_name)
new_name = re.sub(r"(\s--)+", " --", new_name)
new_name = re.sub(r"(\s-)+", " -", new_name)
# remove dash or double dash at end of line
new_name = re.sub(r"[-]{1,2}\s*$", "", new_name)
# remove duplicate spaces (again!)
new_name = " ".join(new_name.split())
new_name += ext
# some tweaks to keep various filesystems happy
new_name = new_name.replace("/", "-")
new_name = new_name.replace(" :", " -")
new_name = new_name.replace(": ", " - ")
new_name = new_name.replace(":", "-")
new_name = new_name.replace("?", "")
return new_name
class MetadataFormatter(string.Formatter):
def __init__(self, smart_cleanup=False, platform="auto"):
super().__init__()
self.smart_cleanup = smart_cleanup
self.platform = platform
def format_field(self, value, format_spec):
if value is None or value == "":
return ""
return super().format_field(value, format_spec)
def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, auto_arg_index=0):
if recursion_depth < 0:
raise ValueError("Max string recursion exceeded")
result = []
lstrip = False
for literal_text, field_name, format_spec, conversion in self.parse(format_string):
# output the literal text
if literal_text:
if lstrip:
literal_text = literal_text.lstrip("-_)}]#")
if self.smart_cleanup:
lspace = literal_text[0].isspace()
rspace = literal_text[-1].isspace()
literal_text = " ".join(literal_text.split())
if literal_text == "":
literal_text = " "
else:
if lspace:
literal_text = " " + literal_text
if rspace:
literal_text += " "
result.append(literal_text)
lstrip = False
# if there's a field, output it
if field_name is not None:
field_name = field_name.lower()
# this is some markup, find the object and do the formatting
# handle arg indexing when empty field_names are given.
if field_name == "":
if auto_arg_index is False:
raise ValueError("cannot switch from manual field specification to automatic field numbering")
field_name = str(auto_arg_index)
auto_arg_index += 1
elif field_name.isdigit():
if auto_arg_index:
raise ValueError("cannot switch from manual field specification to automatic field numbering")
# disable auto arg incrementing, if it gets used later on, then an exception will be raised
auto_arg_index = False
# given the field_name, find the object it references
# and the argument it came from
obj, arg_used = self.get_field(field_name, args, kwargs)
used_args.add(arg_used)
# do any conversion on the resulting object
obj = self.convert_field(obj, conversion)
# expand the format spec, if needed
format_spec, auto_arg_index = self._vformat(
format_spec, args, kwargs, used_args, recursion_depth - 1, auto_arg_index=auto_arg_index
)
# format the object and append to the result
fmt_obj = self.format_field(obj, format_spec)
if fmt_obj == "" and len(result) > 0 and self.smart_cleanup:
lstrip = True
result.pop()
if self.smart_cleanup:
fmt_obj = " ".join(fmt_obj.split())
fmt_obj = sanitize_filename(fmt_obj, platform=self.platform)
result.append(fmt_obj)
return "".join(result), auto_arg_index
class FileRenamer2:
def __init__(self, metadata, platform="auto"):
self.template = "{publisher}/{series}/{series} v{volume} #{issue} (of {issue_count}) ({year})"
self.smart_cleanup = True
self.issue_zero_padding = 3
self.metadata = metadata
self.move = False
self.platform = platform
def set_metadata(self, metadata: GenericMetadata):
self.metadata = metadata
def set_issue_zero_padding(self, count):
self.issue_zero_padding = count
def set_smart_cleanup(self, on):
self.smart_cleanup = on
def set_template(self, template: str):
self.template = template
def determine_name(self, ext):
class Default(dict):
def __missing__(self, key):
return "{" + key + "}"
md = self.metadata
# padding for issue
md.issue = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
template = self.template
path_components = template.split(os.sep)
new_name = ""
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform)
md_dict = vars(md)
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
md_dict[role] = md.get_primary_credit(role)
if (isinstance(md.month, int) or isinstance(md.month, str) and md.month.isdigit()) and 0 < int(md.month) < 13:
md_dict["month_name"] = calendar.month_name[int(md.month)]
md_dict["month_abbr"] = calendar.month_abbr[int(md.month)]
else:
md_dict["month_name"] = ""
md_dict["month_abbr"] = ""
for Component in path_components:
if (
self.platform.lower() in ["universal", "windows"] or sys.platform.lower() in ["windows"]
) and self.smart_cleanup:
# colons get special treatment
Component = Component.replace(": ", " - ")
Component = Component.replace(":", "-")
new_basename = sanitize_filename(
fmt.vformat(Component, args=[], kwargs=Default(md_dict)), platform=self.platform
).strip()
new_name = os.path.join(new_name, new_basename)
new_name += ext
new_basename += ext
# remove padding
md.issue = IssueString(md.issue).as_string()
if self.move:
return new_name.strip()
return new_basename.strip()