7037877a77
Strict renaming removes all reserved names and characters regardless of operating system, with out strict mode only for the current Operating System Add more edge cases to smart cleanup Add more tests for file renaming
299 lines
12 KiB
Python
299 lines
12 KiB
Python
"""Functions for renaming files based on metadata"""
|
|
|
|
# Copyright 2012-2014 Anthony Beville
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import calendar
|
|
import datetime
|
|
import logging
|
|
import os
|
|
import re
|
|
import string
|
|
import sys
|
|
|
|
from pathvalidate import sanitize_filename
|
|
|
|
from comicapi.genericmetadata import GenericMetadata
|
|
from comicapi.issuestring import IssueString
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class FileRenamer:
|
|
def __init__(self, metadata):
|
|
self.template = "%series% v%volume% #%issue% (of %issuecount%) (%year%)"
|
|
self.smart_cleanup = True
|
|
self.issue_zero_padding = 3
|
|
self.metadata = metadata
|
|
|
|
def set_metadata(self, metadata: GenericMetadata):
|
|
self.metadata = metadata
|
|
|
|
def set_issue_zero_padding(self, count):
|
|
self.issue_zero_padding = count
|
|
|
|
def set_smart_cleanup(self, on):
|
|
self.smart_cleanup = on
|
|
|
|
def set_template(self, template: str):
|
|
self.template = template
|
|
|
|
def replace_token(self, text, value, token):
|
|
# helper func
|
|
def is_token(word):
|
|
return word[0] == "%" and word[-1:] == "%"
|
|
|
|
if value is not None:
|
|
return text.replace(token, str(value))
|
|
|
|
if self.smart_cleanup:
|
|
# smart cleanup means we want to remove anything appended to token if it's empty (e.g "#%issue%" or "v%volume%")
|
|
# (TODO: This could fail if there is more than one token appended together, I guess)
|
|
text_list = text.split()
|
|
|
|
# special case for issuecount, remove preceding non-token word,
|
|
# as in "...(of %issuecount%)..."
|
|
if token == "%issuecount%":
|
|
for idx, word in enumerate(text_list):
|
|
if token in word and not is_token(text_list[idx - 1]):
|
|
text_list[idx - 1] = ""
|
|
|
|
text_list = [x for x in text_list if token not in x]
|
|
return " ".join(text_list)
|
|
|
|
return text.replace(token, "")
|
|
|
|
def determine_name(self, ext):
|
|
|
|
md = self.metadata
|
|
new_name = self.template
|
|
|
|
new_name = self.replace_token(new_name, md.series, "%series%")
|
|
new_name = self.replace_token(new_name, md.volume, "%volume%")
|
|
|
|
if md.issue is not None:
|
|
issue_str = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
|
|
else:
|
|
issue_str = None
|
|
new_name = self.replace_token(new_name, issue_str, "%issue%")
|
|
|
|
new_name = self.replace_token(new_name, md.issue_count, "%issuecount%")
|
|
new_name = self.replace_token(new_name, md.year, "%year%")
|
|
new_name = self.replace_token(new_name, md.publisher, "%publisher%")
|
|
new_name = self.replace_token(new_name, md.title, "%title%")
|
|
new_name = self.replace_token(new_name, md.month, "%month%")
|
|
month_name = None
|
|
if md.month is not None:
|
|
if (isinstance(md.month, str) and md.month.isdigit()) or isinstance(md.month, int):
|
|
if int(md.month) in range(1, 13):
|
|
dt = datetime.datetime(1970, int(md.month), 1, 0, 0)
|
|
month_name = dt.strftime("%B")
|
|
new_name = self.replace_token(new_name, month_name, "%month_name%")
|
|
|
|
new_name = self.replace_token(new_name, md.genre, "%genre%")
|
|
new_name = self.replace_token(new_name, md.language, "%language_code%")
|
|
new_name = self.replace_token(new_name, md.critical_rating, "%criticalrating%")
|
|
new_name = self.replace_token(new_name, md.alternate_series, "%alternateseries%")
|
|
new_name = self.replace_token(new_name, md.alternate_number, "%alternatenumber%")
|
|
new_name = self.replace_token(new_name, md.alternate_count, "%alternatecount%")
|
|
new_name = self.replace_token(new_name, md.imprint, "%imprint%")
|
|
new_name = self.replace_token(new_name, md.format, "%format%")
|
|
new_name = self.replace_token(new_name, md.maturity_rating, "%maturityrating%")
|
|
new_name = self.replace_token(new_name, md.story_arc, "%storyarc%")
|
|
new_name = self.replace_token(new_name, md.series_group, "%seriesgroup%")
|
|
new_name = self.replace_token(new_name, md.scan_info, "%scaninfo%")
|
|
|
|
if self.smart_cleanup:
|
|
# remove empty braces,brackets, parentheses
|
|
new_name = re.sub(r"\(\s*[-:]*\s*\)", "", new_name)
|
|
new_name = re.sub(r"\[\s*[-:]*\s*]", "", new_name)
|
|
new_name = re.sub(r"{\s*[-:]*\s*}", "", new_name)
|
|
|
|
# remove duplicate spaces
|
|
new_name = " ".join(new_name.split())
|
|
|
|
# remove remove duplicate -, _,
|
|
new_name = re.sub(r"[-_]{2,}\s+", "-- ", new_name)
|
|
new_name = re.sub(r"(\s--)+", " --", new_name)
|
|
new_name = re.sub(r"(\s-)+", " -", new_name)
|
|
|
|
# remove dash or double dash at end of line
|
|
new_name = re.sub(r"[-]{1,2}\s*$", "", new_name)
|
|
|
|
# remove duplicate spaces (again!)
|
|
new_name = " ".join(new_name.split())
|
|
|
|
new_name += ext
|
|
|
|
# some tweaks to keep various filesystems happy
|
|
new_name = new_name.replace("/", "-")
|
|
new_name = new_name.replace(" :", " -")
|
|
new_name = new_name.replace(": ", " - ")
|
|
new_name = new_name.replace(":", "-")
|
|
new_name = new_name.replace("?", "")
|
|
|
|
return new_name
|
|
|
|
|
|
class MetadataFormatter(string.Formatter):
|
|
def __init__(self, smart_cleanup=False, platform="auto"):
|
|
super().__init__()
|
|
self.smart_cleanup = smart_cleanup
|
|
self.platform = platform
|
|
|
|
def format_field(self, value, format_spec):
|
|
if value is None or value == "":
|
|
return ""
|
|
return super().format_field(value, format_spec)
|
|
|
|
def _vformat(self, format_string, args, kwargs, used_args, recursion_depth, auto_arg_index=0):
|
|
if recursion_depth < 0:
|
|
raise ValueError("Max string recursion exceeded")
|
|
result = []
|
|
lstrip = False
|
|
for literal_text, field_name, format_spec, conversion in self.parse(format_string):
|
|
|
|
# output the literal text
|
|
if literal_text:
|
|
if lstrip:
|
|
literal_text = literal_text.lstrip("-_)}]#")
|
|
if self.smart_cleanup:
|
|
lspace = literal_text[0].isspace()
|
|
rspace = literal_text[-1].isspace()
|
|
literal_text = " ".join(literal_text.split())
|
|
if literal_text == "":
|
|
literal_text = " "
|
|
else:
|
|
if lspace:
|
|
literal_text = " " + literal_text
|
|
if rspace:
|
|
literal_text += " "
|
|
result.append(literal_text)
|
|
|
|
lstrip = False
|
|
# if there's a field, output it
|
|
if field_name is not None:
|
|
field_name = field_name.lower()
|
|
# this is some markup, find the object and do the formatting
|
|
|
|
# handle arg indexing when empty field_names are given.
|
|
if field_name == "":
|
|
if auto_arg_index is False:
|
|
raise ValueError("cannot switch from manual field specification to automatic field numbering")
|
|
field_name = str(auto_arg_index)
|
|
auto_arg_index += 1
|
|
elif field_name.isdigit():
|
|
if auto_arg_index:
|
|
raise ValueError("cannot switch from manual field specification to automatic field numbering")
|
|
# disable auto arg incrementing, if it gets used later on, then an exception will be raised
|
|
auto_arg_index = False
|
|
|
|
# given the field_name, find the object it references
|
|
# and the argument it came from
|
|
obj, arg_used = self.get_field(field_name, args, kwargs)
|
|
used_args.add(arg_used)
|
|
|
|
# do any conversion on the resulting object
|
|
obj = self.convert_field(obj, conversion)
|
|
|
|
# expand the format spec, if needed
|
|
format_spec, auto_arg_index = self._vformat(
|
|
format_spec, args, kwargs, used_args, recursion_depth - 1, auto_arg_index=auto_arg_index
|
|
)
|
|
|
|
# format the object and append to the result
|
|
fmt_obj = self.format_field(obj, format_spec)
|
|
if fmt_obj == "" and len(result) > 0 and self.smart_cleanup:
|
|
lstrip = True
|
|
result.pop()
|
|
if self.smart_cleanup:
|
|
fmt_obj = " ".join(fmt_obj.split())
|
|
fmt_obj = sanitize_filename(fmt_obj, platform=self.platform)
|
|
result.append(fmt_obj)
|
|
|
|
return "".join(result), auto_arg_index
|
|
|
|
|
|
class FileRenamer2:
|
|
def __init__(self, metadata, platform="auto"):
|
|
self.template = "{publisher}/{series}/{series} v{volume} #{issue} (of {issue_count}) ({year})"
|
|
self.smart_cleanup = True
|
|
self.issue_zero_padding = 3
|
|
self.metadata = metadata
|
|
self.move = False
|
|
self.platform = platform
|
|
|
|
def set_metadata(self, metadata: GenericMetadata):
|
|
self.metadata = metadata
|
|
|
|
def set_issue_zero_padding(self, count):
|
|
self.issue_zero_padding = count
|
|
|
|
def set_smart_cleanup(self, on):
|
|
self.smart_cleanup = on
|
|
|
|
def set_template(self, template: str):
|
|
self.template = template
|
|
|
|
def determine_name(self, ext):
|
|
class Default(dict):
|
|
def __missing__(self, key):
|
|
return "{" + key + "}"
|
|
|
|
md = self.metadata
|
|
|
|
# padding for issue
|
|
md.issue = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
|
|
|
|
template = self.template
|
|
|
|
path_components = template.split(os.sep)
|
|
new_name = ""
|
|
|
|
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform)
|
|
md_dict = vars(md)
|
|
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
|
|
md_dict[role] = md.get_primary_credit(role)
|
|
|
|
if (isinstance(md.month, int) or isinstance(md.month, str) and md.month.isdigit()) and 0 < int(md.month) < 13:
|
|
md_dict["month_name"] = calendar.month_name[int(md.month)]
|
|
md_dict["month_abbr"] = calendar.month_abbr[int(md.month)]
|
|
else:
|
|
print(md.month)
|
|
md_dict["month_name"] = ""
|
|
md_dict["month_abbr"] = ""
|
|
|
|
for Component in path_components:
|
|
if (
|
|
self.platform.lower() in ["universal", "windows"] or sys.platform.lower() in ["windows"]
|
|
) and self.smart_cleanup:
|
|
# colons get special treatment
|
|
Component = Component.replace(": ", " - ")
|
|
Component = Component.replace(":", "-")
|
|
|
|
new_basename = sanitize_filename(
|
|
fmt.vformat(Component, args=[], kwargs=Default(md_dict)), platform=self.platform
|
|
).strip()
|
|
new_name = os.path.join(new_name, new_basename)
|
|
|
|
new_name += ext
|
|
new_basename += ext
|
|
|
|
# remove padding
|
|
md.issue = IssueString(md.issue).as_string()
|
|
if self.move:
|
|
return new_name.strip()
|
|
else:
|
|
return new_basename.strip()
|