Add comicfn2dict as an alternative filename parser

This commit is contained in:
Timmy Welch 2024-03-03 21:47:31 -08:00
parent ea43eccd78
commit 5155762711
12 changed files with 171 additions and 69 deletions

View File

@ -364,7 +364,7 @@ class ComicArchive:
def metadata_from_filename(
self,
complicated_parser: bool = False,
parser: utils.Parser = utils.Parser.ORIGINAL,
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
@ -376,7 +376,7 @@ class ComicArchive:
filename_info = utils.parse_filename(
self.path.name,
complicated_parser=complicated_parser,
parser=parser,
remove_c2c=remove_c2c,
remove_fcbd=remove_fcbd,
remove_publisher=remove_publisher,

View File

@ -20,12 +20,16 @@ import logging
import os
import pathlib
import platform
import sys
import unicodedata
from collections import defaultdict
from collections.abc import Iterable, Mapping
from enum import Enum, auto
from shutil import which # noqa: F401
from typing import Any, TypeVar, cast
from comicfn2dict import comicfn2dict
import comicapi.data
from comicapi import filenamelexer, filenameparser
@ -37,9 +41,55 @@ try:
except ImportError:
icu_available = False
if sys.version_info < (3, 11):
class StrEnum(str, Enum):
"""
Enum where members are also (and must be) strings
"""
def __new__(cls, *values: Any) -> Any:
"values must already be of type `str`"
if len(values) > 3:
raise TypeError(f"too many arguments for str(): {values!r}")
if len(values) == 1:
# it must be a string
if not isinstance(values[0], str):
raise TypeError(f"{values[0]!r} is not a string")
if len(values) >= 2:
# check that encoding argument is a string
if not isinstance(values[1], str):
raise TypeError(f"encoding must be a string, not {values[1]!r}")
if len(values) == 3:
# check that errors argument is a string
if not isinstance(values[2], str):
raise TypeError("errors must be a string, not %r" % (values[2]))
value = str(*values)
member = str.__new__(cls, value)
member._value_ = value
return member
@staticmethod
def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str:
"""
Return the lower-cased version of the member name.
"""
return name.lower()
else:
from enum import StrEnum
logger = logging.getLogger(__name__)
class Parser(StrEnum):
ORIGINAL = auto()
COMPLICATED = auto()
COMICFN2DICT = auto()
def _custom_key(tup: Any) -> Any:
import natsort
@ -67,7 +117,7 @@ def os_sorted(lst: Iterable[T]) -> Iterable[T]:
def parse_filename(
filename: str,
complicated_parser: bool = False,
parser: Parser = Parser.ORIGINAL,
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
@ -99,7 +149,25 @@ def parse_filename(
filename, ext = os.path.splitext(filename)
filename = " ".join(wordninja.split(filename)) + ext
if complicated_parser:
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
format="",
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
)
if parser == Parser.COMPLICATED:
lex = filenamelexer.Lex(filename, allow_issue_start_with_letter)
p = filenameparser.Parse(
lex.items,
@ -108,7 +176,26 @@ def parse_filename(
remove_publisher=remove_publisher,
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
)
return p.filename_info
fni = p.filename_info
elif parser == Parser.COMICFN2DICT:
fn2d = comicfn2dict(filename)
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive=fn2d.get("ext", ""),
c2c=False,
fcbd=False,
issue=fn2d.get("issue", ""),
issue_count=fn2d.get("issue_count", ""),
publisher=fn2d.get("publisher", ""),
remainder=fn2d.get("scan_info", ""),
series=fn2d.get("series", ""),
title=fn2d.get("title", ""),
volume=fn2d.get("volume", ""),
volume_count=fn2d.get("volume_count", ""),
year=fn2d.get("year", ""),
format=fn2d.get("original_format", ""),
)
else:
fnp = filenameparser.FileNameParser()
fnp.parse_filename(filename)
@ -129,7 +216,7 @@ def parse_filename(
year=fnp.year,
format="",
)
return fni
return fni
def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str:

View File

@ -233,7 +233,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.config.internal__load_data_style)
if md.is_empty:
md = ca.metadata_from_filename(
self.config.Filename_Parsing__complicated_parser,
self.config.Filename_Parsing__filename_parser,
self.config.Filename_Parsing__remove_c2c,
self.config.Filename_Parsing__remove_fcbd,
self.config.Filename_Parsing__remove_publisher,

View File

@ -250,7 +250,7 @@ class CLI:
# now, overlay the parsed filename info
if self.config.Runtime_Options__parse_filename:
f_md = ca.metadata_from_filename(
self.config.Filename_Parsing__complicated_parser,
self.config.Filename_Parsing__filename_parser,
self.config.Filename_Parsing__remove_c2c,
self.config.Filename_Parsing__remove_fcbd,
self.config.Filename_Parsing__remove_publisher,

View File

@ -5,6 +5,7 @@ import uuid
import settngs
from comicapi import utils
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns
from comictaggerlib.defaults import DEFAULT_REPLACEMENTS, Replacement, Replacements
@ -102,10 +103,12 @@ def dialog(parser: settngs.Manager) -> None:
def filename(parser: settngs.Manager) -> None:
# filename parsing settings
parser.add_setting(
"--complicated-parser",
default=False,
action=argparse.BooleanOptionalAction,
help="Enables the new parser which tries to extract more information from filenames",
"--filename-parser",
default=utils.Parser.ORIGINAL,
metavar=f"{{{','.join(utils.Parser)}}}",
type=utils.Parser,
choices=[p.value for p in utils.Parser],
help="Select the filename parser, defaults to original",
)
parser.add_setting(
"--remove-c2c",

View File

@ -5,6 +5,7 @@ import typing
import settngs
import comicapi.genericmetadata
import comicapi.utils
import comictaggerlib.ctsettings.types
import comictaggerlib.defaults
import comictaggerlib.resulttypes
@ -61,7 +62,7 @@ class SettngsNS(settngs.TypedNS):
Issue_Identifier__exact_series_matches_first: bool
Issue_Identifier__always_use_publisher_filter: bool
Filename_Parsing__complicated_parser: bool
Filename_Parsing__filename_parser: comicapi.utils.Parser
Filename_Parsing__remove_c2c: bool
Filename_Parsing__remove_fcbd: bool
Filename_Parsing__remove_publisher: bool
@ -172,7 +173,7 @@ class Issue_Identifier(typing.TypedDict):
class Filename_Parsing(typing.TypedDict):
complicated_parser: bool
filename_parser: comicapi.utils.Parser
remove_c2c: bool
remove_fcbd: bool
remove_publisher: bool

View File

@ -84,7 +84,7 @@ class RenameWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.data_style)
if md.is_empty:
md = ca.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,

View File

@ -2,51 +2,11 @@ from __future__ import annotations
import dataclasses
import pathlib
import sys
from enum import Enum, auto
from typing import Any
from enum import auto
from comicapi import utils
from comicapi.genericmetadata import GenericMetadata
if sys.version_info < (3, 11):
class StrEnum(str, Enum):
"""
Enum where members are also (and must be) strings
"""
def __new__(cls, *values: Any) -> Any:
"values must already be of type `str`"
if len(values) > 3:
raise TypeError(f"too many arguments for str(): {values!r}")
if len(values) == 1:
# it must be a string
if not isinstance(values[0], str):
raise TypeError(f"{values[0]!r} is not a string")
if len(values) >= 2:
# check that encoding argument is a string
if not isinstance(values[1], str):
raise TypeError(f"encoding must be a string, not {values[1]!r}")
if len(values) == 3:
# check that errors argument is a string
if not isinstance(values[2], str):
raise TypeError("errors must be a string, not %r" % (values[2]))
value = str(*values)
member = str.__new__(cls, value)
member._value_ = value
return member
@staticmethod
def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str:
"""
Return the lower-cased version of the member name.
"""
return name.lower()
else:
from enum import StrEnum
@dataclasses.dataclass
class IssueResult:
@ -69,7 +29,7 @@ class IssueResult:
return f"series: {self.series}; series id: {self.series_id}; issue number: {self.issue_number}; issue id: {self.issue_id}; published: {self.month} {self.year}"
class Action(StrEnum):
class Action(utils.StrEnum):
print = auto()
delete = auto()
copy = auto()
@ -80,14 +40,14 @@ class Action(StrEnum):
list_plugins = auto()
class MatchStatus(StrEnum):
class MatchStatus(utils.StrEnum):
good_match = auto()
no_match = auto()
multiple_match = auto()
low_confidence_match = auto()
class Status(StrEnum):
class Status(utils.StrEnum):
success = auto()
match_failure = auto()
write_failure = auto()

View File

@ -192,6 +192,8 @@ class SettingsWindow(QtWidgets.QDialog):
self.sources = comictaggerlib.ui.talkeruigenerator.generate_source_option_tabs(
self.tComicTalkers, self.config, self.talkers
)
self.cbFilenameParser.clear()
self.cbFilenameParser.addItems(utils.Parser)
self.connect_signals()
self.settings_to_form()
self.rename_test()
@ -209,7 +211,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.btnTemplateHelp.clicked.connect(self.show_template_help)
self.cbxMoveFiles.clicked.connect(self.dir_test)
self.leDirectory.textEdited.connect(self.dir_test)
self.cbxComplicatedParser.clicked.connect(self.switch_parser)
self.cbFilenameParser.currentIndexChanged.connect(self.switch_parser)
self.btnAddLiteralReplacement.clicked.connect(self.addLiteralReplacement)
self.btnAddValueReplacement.clicked.connect(self.addValueReplacement)
@ -244,7 +246,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.btnResetSettings.clicked.disconnect()
self.btnTemplateHelp.clicked.disconnect()
self.cbxChangeExtension.clicked.disconnect()
self.cbxComplicatedParser.clicked.disconnect()
self.cbFilenameParser.currentIndexChanged.disconnect()
self.cbxMoveFiles.clicked.disconnect()
self.cbxRenameStrict.clicked.disconnect()
self.cbxSmartCleanup.clicked.disconnect()
@ -273,9 +275,10 @@ class SettingsWindow(QtWidgets.QDialog):
self._filename_parser_test(self.leFilenameParserTest.text())
def _filename_parser_test(self, filename: str) -> None:
self.cbFilenameParser: QtWidgets.QComboBox
filename_info = utils.parse_filename(
filename=filename,
complicated_parser=self.cbxComplicatedParser.isChecked(),
parser=utils.Parser(self.cbFilenameParser.currentText()),
remove_c2c=self.cbxRemoveC2C.isChecked(),
remove_fcbd=self.cbxRemoveFCBD.isChecked(),
remove_publisher=self.cbxRemovePublisher.isChecked(),
@ -358,8 +361,9 @@ class SettingsWindow(QtWidgets.QDialog):
self.lblRenameTest.setText(str(e))
def switch_parser(self) -> None:
complicated = self.cbxComplicatedParser.isChecked()
currentParser = utils.Parser(self.cbFilenameParser.currentText())
complicated = currentParser == utils.Parser.COMPLICATED
self.cbxRemoveC2C.setEnabled(complicated)
self.cbxRemoveFCBD.setEnabled(complicated)
self.cbxRemovePublisher.setEnabled(complicated)
@ -380,7 +384,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.cbxCheckForNewVersion.setChecked(self.config[0].General__check_for_new_version)
self.cbxShortMetadataNames.setChecked(self.config[0].General__use_short_metadata_names)
self.cbxComplicatedParser.setChecked(self.config[0].Filename_Parsing__complicated_parser)
self.cbFilenameParser.setCurrentText(self.config[0].Filename_Parsing__filename_parser)
self.cbxRemoveC2C.setChecked(self.config[0].Filename_Parsing__remove_c2c)
self.cbxRemoveFCBD.setChecked(self.config[0].Filename_Parsing__remove_fcbd)
self.cbxRemovePublisher.setChecked(self.config[0].Filename_Parsing__remove_publisher)
@ -507,7 +511,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.config[0].Issue_Identifier__series_match_search_thresh = self.sbNameMatchSearchThresh.value()
self.config[0].Issue_Identifier__publisher_filter = utils.split(self.tePublisherFilter.toPlainText(), "\n")
self.config[0].Filename_Parsing__complicated_parser = self.cbxComplicatedParser.isChecked()
self.config[0].Filename_Parsing__filename_parser = utils.Parser(self.cbFilenameParser.currentText())
self.config[0].Filename_Parsing__remove_c2c = self.cbxRemoveC2C.isChecked()
self.config[0].Filename_Parsing__remove_fcbd = self.cbxRemoveFCBD.isChecked()
self.config[0].Filename_Parsing__remove_publisher = self.cbxRemovePublisher.isChecked()

View File

@ -1004,7 +1004,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
# copy the form onto metadata object
self.form_to_metadata()
new_metadata = self.comic_archive.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,
@ -1732,7 +1732,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
logger.error("Failed to load metadata for %s: %s", ca.path, e)
if md.is_empty:
md = ca.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,

View File

@ -98,6 +98,9 @@
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="buddy">
<cstring>btnResetSettings</cstring>
</property>
</widget>
</item>
<item row="4" column="1">
@ -114,6 +117,9 @@
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="buddy">
<cstring>btnClearCache</cstring>
</property>
</widget>
</item>
<item row="2" column="0">
@ -173,6 +179,9 @@
<property name="text">
<string>Default Name Match Ratio Threshold: Search:</string>
</property>
<property name="buddy">
<cstring>sbNameMatchSearchThresh</cstring>
</property>
</widget>
</item>
<item row="1" column="0">
@ -183,6 +192,9 @@
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>sbNameMatchIdentifyThresh</cstring>
</property>
</widget>
</item>
<item row="2" column="0">
@ -190,6 +202,9 @@
<property name="text">
<string>Always use Publisher Filter on &quot;manual&quot; searches:</string>
</property>
<property name="buddy">
<cstring>cbxUseFilter</cstring>
</property>
</widget>
</item>
<item row="2" column="1">
@ -207,6 +222,9 @@
<property name="text">
<string>Publisher Filter:</string>
</property>
<property name="buddy">
<cstring>tePublisherFilter</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
@ -308,9 +326,19 @@
<widget class="QGroupBox" name="groupBox_2">
<layout class="QVBoxLayout" name="verticalLayout_7">
<item>
<widget class="QCheckBox" name="cbxComplicatedParser">
<widget class="QLabel" name="lblFilenamearser">
<property name="text">
<string>Use &quot;Complicated&quot; Parser</string>
<string>Select the filename parser</string>
</property>
<property name="buddy">
<cstring>cbFilenameParser</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="cbFilenameParser">
<property name="insertPolicy">
<enum>QComboBox::NoInsert</enum>
</property>
</widget>
</item>
@ -546,6 +574,9 @@
<property name="text">
<string>Template:</string>
</property>
<property name="buddy">
<cstring>leRenameTemplate</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
@ -576,6 +607,9 @@
<property name="text">
<string>Issue # Zero Padding</string>
</property>
<property name="buddy">
<cstring>leIssueNumPadding</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
@ -629,6 +663,9 @@
<property name="text">
<string>Destination Directory:</string>
</property>
<property name="buddy">
<cstring>leDirectory</cstring>
</property>
</widget>
</item>
<item row="9" column="1">
@ -734,6 +771,9 @@
<property name="text">
<string>Value Text Replacements</string>
</property>
<property name="buddy">
<cstring>twValueReplacements</cstring>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
@ -741,6 +781,9 @@
<property name="text">
<string>Literal Text Replacements</string>
</property>
<property name="buddy">
<cstring>twLiteralReplacements</cstring>
</property>
</widget>
</item>
</layout>
@ -776,6 +819,9 @@
<property name="text">
<string>RAR program</string>
</property>
<property name="buddy">
<cstring>leRarExePath</cstring>
</property>
</widget>
</item>
<item row="1" column="1">

View File

@ -37,6 +37,7 @@ install_requires =
appdirs==1.4.4
beautifulsoup4>=4.1
chardet>=5.1.0,<6
comicfn2dict>=0.2.0
importlib-metadata>=3.3.0
isocodes>=2023.11.26
natsort>=8.1.0