comictagger/comictaggerlib/filerenamer.py
Timmy Welch 7037877a77 Add a strict mode to file renaming
Strict renaming removes all reserved names and characters regardless
 of operating system, with out strict mode only for the current
 Operating System
Add more edge cases to smart cleanup
Add more tests for file renaming
2022-04-18 22:55:13 -07:00

299 lines
12 KiB
Python

"""Functions for renaming files based on metadata"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import calendar
import datetime
import logging
import os
import re
import string
import sys
from pathvalidate import sanitize_filename
from comicapi.genericmetadata import GenericMetadata
from comicapi.issuestring import IssueString
logger = logging.getLogger(__name__)
class FileRenamer:
def __init__(self, metadata):
self.template = "%series% v%volume% #%issue% (of %issuecount%) (%year%)"
self.smart_cleanup = True
self.issue_zero_padding = 3
self.metadata = metadata
def set_metadata(self, metadata: GenericMetadata):
self.metadata = metadata
def set_issue_zero_padding(self, count):
self.issue_zero_padding = count
def set_smart_cleanup(self, on):
self.smart_cleanup = on
def set_template(self, template: str):
self.template = template
def replace_token(self, text, value, token):
# helper func
def is_token(word):
return word[0] == "%" and word[-1:] == "%"
if value is not None:
return text.replace(token, str(value))
if self.smart_cleanup:
# smart cleanup means we want to remove anything appended to token if it's empty (e.g "#%issue%" or "v%volume%")
# (TODO: This could fail if there is more than one token appended together, I guess)
text_list = text.split()
# special case for issuecount, remove preceding non-token word,
# as in "...(of %issuecount%)..."
if token == "%issuecount%":
for idx, word in enumerate(text_list):
if token in word and not is_token(text_list[idx - 1]):
text_list[idx - 1] = ""
text_list = [x for x in text_list if token not in x]
return " ".join(text_list)
return text.replace(token, "")
def determine_name(self, ext):
md = self.metadata
new_name = self.template
new_name = self.replace_token(new_name, md.series, "%series%")
new_name = self.replace_token(new_name, md.volume, "%volume%")
if md.issue is not None:
issue_str = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
else:
issue_str = None
new_name = self.replace_token(new_name, issue_str, "%issue%")
new_name = self.replace_token(new_name, md.issue_count, "%issuecount%")
new_name = self.replace_token(new_name, md.year, "%year%")
new_name = self.replace_token(new_name, md.publisher, "%publisher%")
new_name = self.replace_token(new_name, md.title, "%title%")
new_name = self.replace_token(new_name, md.month, "%month%")
month_name = None
if md.month is not None:
if (isinstance(md.month, str) and md.month.isdigit()) or isinstance(md.month, int):
if int(md.month) in range(1, 13):
dt = datetime.datetime(1970, int(md.month), 1, 0, 0)
month_name = dt.strftime("%B")
new_name = self.replace_token(new_name, month_name, "%month_name%")
new_name = self.replace_token(new_name, md.genre, "%genre%")
new_name = self.replace_token(new_name, md.language, "%language_code%")
new_name = self.replace_token(new_name, md.critical_rating, "%criticalrating%")
new_name = self.replace_token(new_name, md.alternate_series, "%alternateseries%")
new_name = self.replace_token(new_name, md.alternate_number, "%alternatenumber%")
new_name = self.replace_token(new_name, md.alternate_count, "%alternatecount%")
new_name = self.replace_token(new_name, md.imprint, "%imprint%")
new_name = self.replace_token(new_name, md.format, "%format%")
new_name = self.replace_token(new_name, md.maturity_rating, "%maturityrating%")
new_name = self.replace_token(new_name, md.story_arc, "%storyarc%")
new_name = self.replace_token(new_name, md.series_group, "%seriesgroup%")
new_name = self.replace_token(new_name, md.scan_info, "%scaninfo%")
if self.smart_cleanup:
# remove empty braces,brackets, parentheses
new_name = re.sub(r"\(\s*[-:]*\s*\)", "", new_name)
new_name = re.sub(r"\[\s*[-:]*\s*]", "", new_name)
new_name = re.sub(r"{\s*[-:]*\s*}", "", new_name)
# remove duplicate spaces
new_name = " ".join(new_name.split())
# remove remove duplicate -, _,
new_name = re.sub(r"[-_]{2,}\s+", "-- ", new_name)
new_name = re.sub(r"(\s--)+", " --", new_name)
new_name = re.sub(r"(\s-)+", " -", new_name)
# remove dash or double dash at end of line
new_name = re.sub(r"[-]{1,2}\s*$", "", new_name)
# remove duplicate spaces (again!)
new_name = " ".join(new_name.split())
new_name += ext
# some tweaks to keep various filesystems happy
new_name = new_name.replace("/", "-")
new_name = new_name.replace(" :", " -")
new_name = new_name.replace(": ", " - ")
new_name = new_name.replace(":", "-")
new_name = new_name.replace("?", "")
return new_name
class MetadataFormatter(string.Formatter):
def __init__(self, smart_cleanup=False, platform="auto"):
super().__init__()
self.smart_cleanup = smart_cleanup
self.platform = platform
def format_field(self, value, format_spec):
if value is None or value == "":
return ""
return super().format_field(value, format_spec)
def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, auto_arg_index=0):
if recursion_depth < 0:
raise ValueError("Max string recursion exceeded")
result = []
lstrip = False
for literal_text, field_name, format_spec, conversion in self.parse(format_string):
# output the literal text
if literal_text:
if lstrip:
literal_text = literal_text.lstrip("-_)}]#")
if self.smart_cleanup:
lspace = literal_text[0].isspace()
rspace = literal_text[-1].isspace()
literal_text = " ".join(literal_text.split())
if literal_text == "":
literal_text = " "
else:
if lspace:
literal_text = " " + literal_text
if rspace:
literal_text += " "
result.append(literal_text)
lstrip = False
# if there's a field, output it
if field_name is not None:
field_name = field_name.lower()
# this is some markup, find the object and do the formatting
# handle arg indexing when empty field_names are given.
if field_name == "":
if auto_arg_index is False:
raise ValueError("cannot switch from manual field specification to automatic field numbering")
field_name = str(auto_arg_index)
auto_arg_index += 1
elif field_name.isdigit():
if auto_arg_index:
raise ValueError("cannot switch from manual field specification to automatic field numbering")
# disable auto arg incrementing, if it gets used later on, then an exception will be raised
auto_arg_index = False
# given the field_name, find the object it references
# and the argument it came from
obj, arg_used = self.get_field(field_name, args, kwargs)
used_args.add(arg_used)
# do any conversion on the resulting object
obj = self.convert_field(obj, conversion)
# expand the format spec, if needed
format_spec, auto_arg_index = self._vformat(
format_spec, args, kwargs, used_args, recursion_depth - 1, auto_arg_index=auto_arg_index
)
# format the object and append to the result
fmt_obj = self.format_field(obj, format_spec)
if fmt_obj == "" and len(result) > 0 and self.smart_cleanup:
lstrip = True
result.pop()
if self.smart_cleanup:
fmt_obj = " ".join(fmt_obj.split())
fmt_obj = sanitize_filename(fmt_obj, platform=self.platform)
result.append(fmt_obj)
return "".join(result), auto_arg_index
class FileRenamer2:
def __init__(self, metadata, platform="auto"):
self.template = "{publisher}/{series}/{series} v{volume} #{issue} (of {issue_count}) ({year})"
self.smart_cleanup = True
self.issue_zero_padding = 3
self.metadata = metadata
self.move = False
self.platform = platform
def set_metadata(self, metadata: GenericMetadata):
self.metadata = metadata
def set_issue_zero_padding(self, count):
self.issue_zero_padding = count
def set_smart_cleanup(self, on):
self.smart_cleanup = on
def set_template(self, template: str):
self.template = template
def determine_name(self, ext):
class Default(dict):
def __missing__(self, key):
return "{" + key + "}"
md = self.metadata
# padding for issue
md.issue = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
template = self.template
path_components = template.split(os.sep)
new_name = ""
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform)
md_dict = vars(md)
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
md_dict[role] = md.get_primary_credit(role)
if (isinstance(md.month, int) or isinstance(md.month, str) and md.month.isdigit()) and 0 < int(md.month) < 13:
md_dict["month_name"] = calendar.month_name[int(md.month)]
md_dict["month_abbr"] = calendar.month_abbr[int(md.month)]
else:
print(md.month)
md_dict["month_name"] = ""
md_dict["month_abbr"] = ""
for Component in path_components:
if (
self.platform.lower() in ["universal", "windows"] or sys.platform.lower() in ["windows"]
) and self.smart_cleanup:
# colons get special treatment
Component = Component.replace(": ", " - ")
Component = Component.replace(":", "-")
new_basename = sanitize_filename(
fmt.vformat(Component, args=[], kwargs=Default(md_dict)), platform=self.platform
).strip()
new_name = os.path.join(new_name, new_basename)
new_name += ext
new_basename += ext
# remove padding
md.issue = IssueString(md.issue).as_string()
if self.move:
return new_name.strip()
else:
return new_basename.strip()