Files
comictagger/comictaggerlib/filerenamer.py
Timmy Welch 9137cee3cd Fix edgecases with filerenaming
Add warnings in gui for sub-optimal rename templates
Fix using lists when renaming
Explicitly disallow fields starting with `_`
Mark credit roles with prefix `credit_` so that values will properly
  return None for unkown roles
Add `credit_item_` prefix to allow direct usage of the credit class

Make filename tests more readable
2025-08-10 19:42:40 -07:00

417 lines
15 KiB
Python

"""Functions for renaming files based on metadata"""
#
# Copyright 2012-2014 ComicTagger Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import calendar
import datetime
import logging
import os
import pathlib
import string
import sys
import unicodedata
from collections.abc import Collection, Iterable, Mapping, Sequence, Sized
from typing import Any, cast
from pathvalidate import Platform, normalize_platform, sanitize_filename
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import GenericMetadata
from comicapi.issuestring import IssueString
from comictaggerlib.defaults import DEFAULT_REPLACEMENTS, Replacement, Replacements
logger = logging.getLogger(__name__)
STANDARD_CREDIT_ROLES = ("writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor", "translator")
def get_rename_dir(ca: ComicArchive, rename_dir: str | pathlib.Path | None) -> pathlib.Path:
folder = ca.path.parent.absolute()
if rename_dir is not None:
if isinstance(rename_dir, str):
rename_dir = pathlib.Path(rename_dir.strip())
folder = rename_dir.absolute()
return folder
def _isnamedtupleinstance(x: Any) -> bool: # pragma: no cover
t = type(x)
b = t.__bases__
if len(b) != 1 or b[0] != tuple:
return False
f = getattr(t, "_fields", None)
if not isinstance(f, tuple):
return False
return all(isinstance(n, str) for n in f)
class MetadataFormatter(string.Formatter):
def __init__(
self, smart_cleanup: bool = False, platform: str = "auto", replacements: Replacements = DEFAULT_REPLACEMENTS
) -> None:
super().__init__()
self.smart_cleanup = smart_cleanup
self.platform = normalize_platform(platform)
self.replacements = replacements
self.warnings: list[str] = []
def format_field(self, value: Any, format_spec: str) -> str:
if value is None or value == "":
return ""
return cast(str, super().format_field(value, format_spec))
def convert_field(self, value: Any, conversion: str | None) -> str:
if value is None:
return ""
if isinstance(value, Iterable) and not isinstance(value, (str, tuple)):
if conversion == "C":
if isinstance(value, Sized):
return str(len(value))
return ""
if conversion and conversion.isdecimal():
if not isinstance(value, Collection):
return ""
i = int(conversion)
if i < len(value):
try:
return sorted(value)[i]
except Exception:
...
return list(value)[i]
return ""
reverse = False
if conversion == "R":
reverse = True
conversion = "s"
if conversion == "j":
conversion = "s"
try:
value = sorted((v for v in value if v is not None), reverse=reverse)
except Exception:
...
return ", ".join(list(str(self.convert_field(v, conversion)) for v in value if v is not None))
if not conversion:
return cast(str, super().convert_field(value, conversion))
if conversion == "u":
return str(value).upper()
if conversion == "l":
return str(value).casefold()
if conversion == "c":
return str(value).capitalize()
if conversion == "S":
return str(value).swapcase()
if conversion == "t":
return str(value).title()
if conversion.isdecimal():
return ""
return cast(str, super().convert_field(value, conversion))
def handle_replacements(self, string: str, replacements: list[Replacement]) -> str:
for find, replace, strict_only in replacements:
if self.is_strict() or not strict_only:
string = string.replace(find, replace)
return string
def __get_object(self, original: str, field_name: str, args: Sequence[Any], kwargs: Mapping[str, Any]) -> str:
if field_name.startswith("_"):
return field_name[1:]
if field_name not in kwargs or field_name == original:
return field_name
try:
obj, arg_used = self.get_field(field_name, args, kwargs)
except Exception:
obj = field_name
return obj
def none_replacement(
self,
value: Any,
field_name: str,
replacement: str,
r: str,
args: Sequence[Any],
kwargs: Mapping[str, Any],
) -> Any:
if r == "-" and value is None or value == "":
return self.__get_object(field_name, replacement, args, kwargs)
if r == "+" and value is not None:
return self.__get_object(field_name, replacement, args, kwargs)
return value
def split_replacement(self, field_name: str) -> tuple[str, str, str]:
pos_index = field_name.index("+") if "+" in field_name else sys.maxsize
neg_index = field_name.index("-") if "-" in field_name else sys.maxsize
if neg_index < pos_index:
return field_name.partition("-")
if pos_index < neg_index:
return field_name.partition("+")
return field_name, "", ""
def is_strict(self) -> bool:
return self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]
def _re_format(self, field_name: str, format_spec: str | None, conversion: str | None) -> str:
s = "{" + field_name
if conversion:
s += "!" + conversion
if format_spec:
s += ":" + format_spec
return s + "}"
def _vformat(
self,
format_string: str,
args: Sequence[Any],
kwargs: Mapping[str, Any],
used_args: set[Any],
recursion_depth: int,
auto_arg_index: int = 0,
) -> tuple[str, int]:
if recursion_depth < 0:
raise ValueError("Max string recursion exceeded")
result = []
lstrip = False
for literal_text, field_name, format_spec, conversion in self.parse(format_string):
# output the literal text
if literal_text:
if lstrip:
literal_text = literal_text.lstrip("-_)}]#")
if self.smart_cleanup:
literal_text = self.handle_replacements(literal_text, self.replacements.literal_text)
lspace = literal_text[0].isspace() if literal_text else False
rspace = literal_text[-1].isspace() if literal_text else False
literal_text = " ".join(literal_text.split())
if literal_text == "":
literal_text = " "
else:
if lspace:
literal_text = " " + literal_text
if rspace:
literal_text += " "
result.append(literal_text)
lstrip = False
# if there's not a field, skip to the next item
if not field_name:
continue
field_name, r, replacement = self.split_replacement(field_name)
field_name = field_name.casefold()
# Needs to happen before self.get_field. Otherwise errors will swallow this warning
if field_name.endswith("]"):
self.warnings.append(
"You appear to be trying to get an item from a list instead of {story_arc[2]} use {story_arc!2}"
)
# Disallow index based fields
if field_name.isdigit():
raise ValueError("cannot use a number as a field name")
# given the field_name, find the object it references
# and the argument it came from
try:
obj, arg_used = self.get_field(field_name, args, kwargs)
used_args.add(arg_used)
if arg_used in STANDARD_CREDIT_ROLES:
self.warnings.append(f"Please use {{credit_{arg_used}}} instead of {{{arg_used}}}")
# this is an error specifically so that mising fields show an obvious error.
if arg_used not in kwargs:
result.append(self._re_format(f"{field_name}{r}{replacement}", format_spec, conversion))
continue
except Exception:
result.append(self._re_format(f"{field_name}{r}{replacement}", format_spec, conversion))
continue
obj = self.none_replacement(obj, field_name, replacement, r, args, kwargs)
# do any conversion on the resulting object
obj = self.convert_field(obj, conversion)
if r == "-":
obj = self.none_replacement(obj, field_name, replacement, r, args, kwargs)
# expand the format spec, if needed
format_spec, _ = self._vformat(
cast(str, format_spec), args, kwargs, used_args, recursion_depth - 1, auto_arg_index=False
)
# format the object and append to the result
fmt_obj = self.format_field(obj, format_spec)
if fmt_obj == "" and result and self.smart_cleanup:
if self.str_contains(result[-1], "({["):
lstrip = True # trailing braces are handled above
if result[-1].startswith(" "):
result[-1] = "" # handles `v{volume}` where volume is None
result[-1] = self.rstrip(result[-1]) # cleans up opening punctuation, spaces, dashes
if self.smart_cleanup:
# colons and slashes get special treatment
fmt_obj = self.handle_replacements(fmt_obj, self.replacements.format_value)
fmt_obj = self.strip_internal(fmt_obj)
result.append(fmt_obj)
return "".join(result), False
def str_contains(self, chars: str, string: str) -> bool:
for char in chars:
if char in string:
return True
return False
def rstrip(self, string: str) -> str:
while string:
r = string[-1]
if unicodedata.category(r) in ("Po", "Ps", "Pd", "Zl", "Zp", "Zs"):
string = string[:-1]
else:
break
return string
def strip_internal(self, string: str) -> str:
s = list(string)
p = False
for i, x in reversed(list(enumerate(s))):
if p and x.isspace():
del s[i]
p = x.isspace()
return "".join(s)
class FileRenamer:
def __init__(
self,
metadata: GenericMetadata | None,
platform: str = "auto",
replacements: Replacements = DEFAULT_REPLACEMENTS,
) -> None:
self.template = "{publisher}/{series}/{series} v{volume} #{issue} (of {issue_count}) ({year})"
self.smart_cleanup = True
self.issue_zero_padding = 3
self.metadata = metadata or GenericMetadata()
self.move = False
self.platform = platform
self.replacements = replacements
self.original_name = ""
self.move_only = False
self.warnings: list[str] = []
def set_metadata(self, metadata: GenericMetadata, original_name: str) -> None:
self.metadata = metadata
self.original_name = original_name
def set_issue_zero_padding(self, count: int) -> None:
self.issue_zero_padding = count
def set_smart_cleanup(self, on: bool) -> None:
self.smart_cleanup = on
def set_template(self, template: str) -> None:
self.template = template
def determine_name(self, ext: str) -> str:
class Default(dict[str, Any]):
def __missing__(self, key: str) -> str | None:
if key.startswith("credit_"):
self[key] = None
return None
return "{" + key + "}"
self.warnings.clear()
md = self.metadata
template = self.template
new_name = ""
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements)
md_dict = vars(md)
md_dict.update(
dict(
month_name=None,
month_abbr=None,
date=None,
genre=None,
story_arc=None,
series_group=None,
web_link=None,
character=None,
team=None,
location=None,
)
)
md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
if (isinstance(md.month, int) or isinstance(md.month, str) and md.month.isdigit()) and 0 < int(md.month) < 13:
md_dict["month_name"] = calendar.month_name[int(md.month)]
md_dict["month_abbr"] = calendar.month_abbr[int(md.month)]
if md.year is not None and datetime.MINYEAR <= md.year <= datetime.MAXYEAR:
md_dict["date"] = datetime.datetime(year=md.year, month=md.month or 1, day=md.day or 1)
if md.genres:
md_dict["genre"] = sorted(md.genres)[0]
if md.story_arcs:
md_dict["story_arc"] = md.story_arcs[0]
if md.series_groups:
md_dict["series_group"] = md.series_groups[0]
if md.web_links:
md_dict["web_link"] = md.web_links[0]
if md.characters:
md_dict["character"] = sorted(md.characters)[0]
if md.teams:
md_dict["team"] = sorted(md.teams)[0]
if md.locations:
md_dict["location"] = sorted(md.locations)[0]
for role in {c.role.casefold() for c in md.credits}:
if f"credit_{role}" in md_dict:
continue
credit = md.get_primary_credit(role)
if credit is None:
continue
if role in STANDARD_CREDIT_ROLES:
md_dict[role] = credit.person
md_dict[f"credit_{role}"] = credit.person
md_dict[f"credit_item_{role}"] = credit
# Ensure standard credit roles are always defined
for role in STANDARD_CREDIT_ROLES:
if role not in md_dict:
md_dict[role] = None
md_dict[f"credit_{role}"] = None
md_dict[f"credit_item_{role}"] = None
new_basename = ""
for component in pathlib.PureWindowsPath(template).parts:
new_component = fmt.vformat(component, args=[], kwargs=Default(md_dict))
self.warnings.extend(fmt.warnings)
new_basename = str(sanitize_filename(new_component, platform=self.platform)).strip()
new_name = os.path.join(new_name, new_basename)
if self.move_only:
new_folder = os.path.join(new_name, os.path.splitext(self.original_name)[0])
return new_folder + ext
if self.move:
return new_name.strip() + ext
return new_basename.strip() + ext