From 51557627119d9b7c6f49632b9f4f50df2c155562 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sun, 3 Mar 2024 21:47:31 -0800 Subject: [PATCH 1/3] Add comicfn2dict as an alternative filename parser --- comicapi/comicarchive.py | 4 +- comicapi/utils.py | 95 ++++++++++++++++++- comictaggerlib/autotagmatchwindow.py | 2 +- comictaggerlib/cli.py | 2 +- comictaggerlib/ctsettings/file.py | 11 ++- .../ctsettings/settngs_namespace.py | 5 +- comictaggerlib/renamewindow.py | 2 +- comictaggerlib/resulttypes.py | 48 +--------- comictaggerlib/settingswindow.py | 16 ++-- comictaggerlib/taggerwindow.py | 4 +- comictaggerlib/ui/settingswindow.ui | 50 +++++++++- setup.cfg | 1 + 12 files changed, 171 insertions(+), 69 deletions(-) diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 032e867..d5e505e 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -364,7 +364,7 @@ class ComicArchive: def metadata_from_filename( self, - complicated_parser: bool = False, + parser: utils.Parser = utils.Parser.ORIGINAL, remove_c2c: bool = False, remove_fcbd: bool = False, remove_publisher: bool = False, @@ -376,7 +376,7 @@ class ComicArchive: filename_info = utils.parse_filename( self.path.name, - complicated_parser=complicated_parser, + parser=parser, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher, diff --git a/comicapi/utils.py b/comicapi/utils.py index 795e7f9..4af4f48 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -20,12 +20,16 @@ import logging import os import pathlib import platform +import sys import unicodedata from collections import defaultdict from collections.abc import Iterable, Mapping +from enum import Enum, auto from shutil import which # noqa: F401 from typing import Any, TypeVar, cast +from comicfn2dict import comicfn2dict + import comicapi.data from comicapi import filenamelexer, filenameparser @@ -37,9 +41,55 @@ try: except ImportError: icu_available = False + +if sys.version_info < (3, 11): + + class StrEnum(str, Enum): + """ + Enum where members are also (and must be) strings + """ + + def __new__(cls, *values: Any) -> Any: + "values must already be of type `str`" + if len(values) > 3: + raise TypeError(f"too many arguments for str(): {values!r}") + if len(values) == 1: + # it must be a string + if not isinstance(values[0], str): + raise TypeError(f"{values[0]!r} is not a string") + if len(values) >= 2: + # check that encoding argument is a string + if not isinstance(values[1], str): + raise TypeError(f"encoding must be a string, not {values[1]!r}") + if len(values) == 3: + # check that errors argument is a string + if not isinstance(values[2], str): + raise TypeError("errors must be a string, not %r" % (values[2])) + value = str(*values) + member = str.__new__(cls, value) + member._value_ = value + return member + + @staticmethod + def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str: + """ + Return the lower-cased version of the member name. + """ + return name.lower() + +else: + from enum import StrEnum + + logger = logging.getLogger(__name__) +class Parser(StrEnum): + ORIGINAL = auto() + COMPLICATED = auto() + COMICFN2DICT = auto() + + def _custom_key(tup: Any) -> Any: import natsort @@ -67,7 +117,7 @@ def os_sorted(lst: Iterable[T]) -> Iterable[T]: def parse_filename( filename: str, - complicated_parser: bool = False, + parser: Parser = Parser.ORIGINAL, remove_c2c: bool = False, remove_fcbd: bool = False, remove_publisher: bool = False, @@ -99,7 +149,25 @@ def parse_filename( filename, ext = os.path.splitext(filename) filename = " ".join(wordninja.split(filename)) + ext - if complicated_parser: + fni = filenameparser.FilenameInfo( + alternate="", + annual=False, + archive="", + c2c=False, + fcbd=False, + format="", + issue="", + issue_count="", + publisher="", + remainder="", + series="", + title="", + volume="", + volume_count="", + year="", + ) + + if parser == Parser.COMPLICATED: lex = filenamelexer.Lex(filename, allow_issue_start_with_letter) p = filenameparser.Parse( lex.items, @@ -108,7 +176,26 @@ def parse_filename( remove_publisher=remove_publisher, protofolius_issue_number_scheme=protofolius_issue_number_scheme, ) - return p.filename_info + fni = p.filename_info + elif parser == Parser.COMICFN2DICT: + fn2d = comicfn2dict(filename) + fni = filenameparser.FilenameInfo( + alternate="", + annual=False, + archive=fn2d.get("ext", ""), + c2c=False, + fcbd=False, + issue=fn2d.get("issue", ""), + issue_count=fn2d.get("issue_count", ""), + publisher=fn2d.get("publisher", ""), + remainder=fn2d.get("scan_info", ""), + series=fn2d.get("series", ""), + title=fn2d.get("title", ""), + volume=fn2d.get("volume", ""), + volume_count=fn2d.get("volume_count", ""), + year=fn2d.get("year", ""), + format=fn2d.get("original_format", ""), + ) else: fnp = filenameparser.FileNameParser() fnp.parse_filename(filename) @@ -129,7 +216,7 @@ def parse_filename( year=fnp.year, format="", ) - return fni + return fni def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str: diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 3d3ba78..fc49c20 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -233,7 +233,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog): md = ca.read_metadata(self.config.internal__load_data_style) if md.is_empty: md = ca.metadata_from_filename( - self.config.Filename_Parsing__complicated_parser, + self.config.Filename_Parsing__filename_parser, self.config.Filename_Parsing__remove_c2c, self.config.Filename_Parsing__remove_fcbd, self.config.Filename_Parsing__remove_publisher, diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index f718159..891f74b 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -250,7 +250,7 @@ class CLI: # now, overlay the parsed filename info if self.config.Runtime_Options__parse_filename: f_md = ca.metadata_from_filename( - self.config.Filename_Parsing__complicated_parser, + self.config.Filename_Parsing__filename_parser, self.config.Filename_Parsing__remove_c2c, self.config.Filename_Parsing__remove_fcbd, self.config.Filename_Parsing__remove_publisher, diff --git a/comictaggerlib/ctsettings/file.py b/comictaggerlib/ctsettings/file.py index a6901f7..8dbadab 100644 --- a/comictaggerlib/ctsettings/file.py +++ b/comictaggerlib/ctsettings/file.py @@ -5,6 +5,7 @@ import uuid import settngs +from comicapi import utils from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns from comictaggerlib.defaults import DEFAULT_REPLACEMENTS, Replacement, Replacements @@ -102,10 +103,12 @@ def dialog(parser: settngs.Manager) -> None: def filename(parser: settngs.Manager) -> None: # filename parsing settings parser.add_setting( - "--complicated-parser", - default=False, - action=argparse.BooleanOptionalAction, - help="Enables the new parser which tries to extract more information from filenames", + "--filename-parser", + default=utils.Parser.ORIGINAL, + metavar=f"{{{','.join(utils.Parser)}}}", + type=utils.Parser, + choices=[p.value for p in utils.Parser], + help="Select the filename parser, defaults to original", ) parser.add_setting( "--remove-c2c", diff --git a/comictaggerlib/ctsettings/settngs_namespace.py b/comictaggerlib/ctsettings/settngs_namespace.py index faafdea..c0b3a8c 100644 --- a/comictaggerlib/ctsettings/settngs_namespace.py +++ b/comictaggerlib/ctsettings/settngs_namespace.py @@ -5,6 +5,7 @@ import typing import settngs import comicapi.genericmetadata +import comicapi.utils import comictaggerlib.ctsettings.types import comictaggerlib.defaults import comictaggerlib.resulttypes @@ -61,7 +62,7 @@ class SettngsNS(settngs.TypedNS): Issue_Identifier__exact_series_matches_first: bool Issue_Identifier__always_use_publisher_filter: bool - Filename_Parsing__complicated_parser: bool + Filename_Parsing__filename_parser: comicapi.utils.Parser Filename_Parsing__remove_c2c: bool Filename_Parsing__remove_fcbd: bool Filename_Parsing__remove_publisher: bool @@ -172,7 +173,7 @@ class Issue_Identifier(typing.TypedDict): class Filename_Parsing(typing.TypedDict): - complicated_parser: bool + filename_parser: comicapi.utils.Parser remove_c2c: bool remove_fcbd: bool remove_publisher: bool diff --git a/comictaggerlib/renamewindow.py b/comictaggerlib/renamewindow.py index 0ade134..a7353d7 100644 --- a/comictaggerlib/renamewindow.py +++ b/comictaggerlib/renamewindow.py @@ -84,7 +84,7 @@ class RenameWindow(QtWidgets.QDialog): md = ca.read_metadata(self.data_style) if md.is_empty: md = ca.metadata_from_filename( - self.config[0].Filename_Parsing__complicated_parser, + self.config[0].Filename_Parsing__filename_parser, self.config[0].Filename_Parsing__remove_c2c, self.config[0].Filename_Parsing__remove_fcbd, self.config[0].Filename_Parsing__remove_publisher, diff --git a/comictaggerlib/resulttypes.py b/comictaggerlib/resulttypes.py index cdc0d8b..3b436e5 100644 --- a/comictaggerlib/resulttypes.py +++ b/comictaggerlib/resulttypes.py @@ -2,51 +2,11 @@ from __future__ import annotations import dataclasses import pathlib -import sys -from enum import Enum, auto -from typing import Any +from enum import auto from comicapi import utils from comicapi.genericmetadata import GenericMetadata -if sys.version_info < (3, 11): - - class StrEnum(str, Enum): - """ - Enum where members are also (and must be) strings - """ - - def __new__(cls, *values: Any) -> Any: - "values must already be of type `str`" - if len(values) > 3: - raise TypeError(f"too many arguments for str(): {values!r}") - if len(values) == 1: - # it must be a string - if not isinstance(values[0], str): - raise TypeError(f"{values[0]!r} is not a string") - if len(values) >= 2: - # check that encoding argument is a string - if not isinstance(values[1], str): - raise TypeError(f"encoding must be a string, not {values[1]!r}") - if len(values) == 3: - # check that errors argument is a string - if not isinstance(values[2], str): - raise TypeError("errors must be a string, not %r" % (values[2])) - value = str(*values) - member = str.__new__(cls, value) - member._value_ = value - return member - - @staticmethod - def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str: - """ - Return the lower-cased version of the member name. - """ - return name.lower() - -else: - from enum import StrEnum - @dataclasses.dataclass class IssueResult: @@ -69,7 +29,7 @@ class IssueResult: return f"series: {self.series}; series id: {self.series_id}; issue number: {self.issue_number}; issue id: {self.issue_id}; published: {self.month} {self.year}" -class Action(StrEnum): +class Action(utils.StrEnum): print = auto() delete = auto() copy = auto() @@ -80,14 +40,14 @@ class Action(StrEnum): list_plugins = auto() -class MatchStatus(StrEnum): +class MatchStatus(utils.StrEnum): good_match = auto() no_match = auto() multiple_match = auto() low_confidence_match = auto() -class Status(StrEnum): +class Status(utils.StrEnum): success = auto() match_failure = auto() write_failure = auto() diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py index 726ae6c..e1b79c2 100644 --- a/comictaggerlib/settingswindow.py +++ b/comictaggerlib/settingswindow.py @@ -192,6 +192,8 @@ class SettingsWindow(QtWidgets.QDialog): self.sources = comictaggerlib.ui.talkeruigenerator.generate_source_option_tabs( self.tComicTalkers, self.config, self.talkers ) + self.cbFilenameParser.clear() + self.cbFilenameParser.addItems(utils.Parser) self.connect_signals() self.settings_to_form() self.rename_test() @@ -209,7 +211,7 @@ class SettingsWindow(QtWidgets.QDialog): self.btnTemplateHelp.clicked.connect(self.show_template_help) self.cbxMoveFiles.clicked.connect(self.dir_test) self.leDirectory.textEdited.connect(self.dir_test) - self.cbxComplicatedParser.clicked.connect(self.switch_parser) + self.cbFilenameParser.currentIndexChanged.connect(self.switch_parser) self.btnAddLiteralReplacement.clicked.connect(self.addLiteralReplacement) self.btnAddValueReplacement.clicked.connect(self.addValueReplacement) @@ -244,7 +246,7 @@ class SettingsWindow(QtWidgets.QDialog): self.btnResetSettings.clicked.disconnect() self.btnTemplateHelp.clicked.disconnect() self.cbxChangeExtension.clicked.disconnect() - self.cbxComplicatedParser.clicked.disconnect() + self.cbFilenameParser.currentIndexChanged.disconnect() self.cbxMoveFiles.clicked.disconnect() self.cbxRenameStrict.clicked.disconnect() self.cbxSmartCleanup.clicked.disconnect() @@ -273,9 +275,10 @@ class SettingsWindow(QtWidgets.QDialog): self._filename_parser_test(self.leFilenameParserTest.text()) def _filename_parser_test(self, filename: str) -> None: + self.cbFilenameParser: QtWidgets.QComboBox filename_info = utils.parse_filename( filename=filename, - complicated_parser=self.cbxComplicatedParser.isChecked(), + parser=utils.Parser(self.cbFilenameParser.currentText()), remove_c2c=self.cbxRemoveC2C.isChecked(), remove_fcbd=self.cbxRemoveFCBD.isChecked(), remove_publisher=self.cbxRemovePublisher.isChecked(), @@ -358,8 +361,9 @@ class SettingsWindow(QtWidgets.QDialog): self.lblRenameTest.setText(str(e)) def switch_parser(self) -> None: - complicated = self.cbxComplicatedParser.isChecked() + currentParser = utils.Parser(self.cbFilenameParser.currentText()) + complicated = currentParser == utils.Parser.COMPLICATED self.cbxRemoveC2C.setEnabled(complicated) self.cbxRemoveFCBD.setEnabled(complicated) self.cbxRemovePublisher.setEnabled(complicated) @@ -380,7 +384,7 @@ class SettingsWindow(QtWidgets.QDialog): self.cbxCheckForNewVersion.setChecked(self.config[0].General__check_for_new_version) self.cbxShortMetadataNames.setChecked(self.config[0].General__use_short_metadata_names) - self.cbxComplicatedParser.setChecked(self.config[0].Filename_Parsing__complicated_parser) + self.cbFilenameParser.setCurrentText(self.config[0].Filename_Parsing__filename_parser) self.cbxRemoveC2C.setChecked(self.config[0].Filename_Parsing__remove_c2c) self.cbxRemoveFCBD.setChecked(self.config[0].Filename_Parsing__remove_fcbd) self.cbxRemovePublisher.setChecked(self.config[0].Filename_Parsing__remove_publisher) @@ -507,7 +511,7 @@ class SettingsWindow(QtWidgets.QDialog): self.config[0].Issue_Identifier__series_match_search_thresh = self.sbNameMatchSearchThresh.value() self.config[0].Issue_Identifier__publisher_filter = utils.split(self.tePublisherFilter.toPlainText(), "\n") - self.config[0].Filename_Parsing__complicated_parser = self.cbxComplicatedParser.isChecked() + self.config[0].Filename_Parsing__filename_parser = utils.Parser(self.cbFilenameParser.currentText()) self.config[0].Filename_Parsing__remove_c2c = self.cbxRemoveC2C.isChecked() self.config[0].Filename_Parsing__remove_fcbd = self.cbxRemoveFCBD.isChecked() self.config[0].Filename_Parsing__remove_publisher = self.cbxRemovePublisher.isChecked() diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index e92efae..ca968c4 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -1004,7 +1004,7 @@ class TaggerWindow(QtWidgets.QMainWindow): # copy the form onto metadata object self.form_to_metadata() new_metadata = self.comic_archive.metadata_from_filename( - self.config[0].Filename_Parsing__complicated_parser, + self.config[0].Filename_Parsing__filename_parser, self.config[0].Filename_Parsing__remove_c2c, self.config[0].Filename_Parsing__remove_fcbd, self.config[0].Filename_Parsing__remove_publisher, @@ -1732,7 +1732,7 @@ class TaggerWindow(QtWidgets.QMainWindow): logger.error("Failed to load metadata for %s: %s", ca.path, e) if md.is_empty: md = ca.metadata_from_filename( - self.config[0].Filename_Parsing__complicated_parser, + self.config[0].Filename_Parsing__filename_parser, self.config[0].Filename_Parsing__remove_c2c, self.config[0].Filename_Parsing__remove_fcbd, self.config[0].Filename_Parsing__remove_publisher, diff --git a/comictaggerlib/ui/settingswindow.ui b/comictaggerlib/ui/settingswindow.ui index fa8e864..4a2d206 100644 --- a/comictaggerlib/ui/settingswindow.ui +++ b/comictaggerlib/ui/settingswindow.ui @@ -98,6 +98,9 @@ true + + btnResetSettings + @@ -114,6 +117,9 @@ true + + btnClearCache + @@ -173,6 +179,9 @@ Default Name Match Ratio Threshold: Search: + + sbNameMatchSearchThresh + @@ -183,6 +192,9 @@ Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter + + sbNameMatchIdentifyThresh + @@ -190,6 +202,9 @@ Always use Publisher Filter on "manual" searches: + + cbxUseFilter + @@ -207,6 +222,9 @@ Publisher Filter: + + tePublisherFilter + @@ -308,9 +326,19 @@ - + - Use "Complicated" Parser + Select the filename parser + + + cbFilenameParser + + + + + + + QComboBox::NoInsert @@ -546,6 +574,9 @@ Template: + + leRenameTemplate + @@ -576,6 +607,9 @@ Issue # Zero Padding + + leIssueNumPadding + @@ -629,6 +663,9 @@ Destination Directory: + + leDirectory + @@ -734,6 +771,9 @@ Value Text Replacements + + twValueReplacements + @@ -741,6 +781,9 @@ Literal Text Replacements + + twLiteralReplacements + @@ -776,6 +819,9 @@ RAR program + + leRarExePath + diff --git a/setup.cfg b/setup.cfg index 0681639..3770f87 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,7 @@ install_requires = appdirs==1.4.4 beautifulsoup4>=4.1 chardet>=5.1.0,<6 + comicfn2dict>=0.2.0 importlib-metadata>=3.3.0 isocodes>=2023.11.26 natsort>=8.1.0 From 9a95adf47d5baf8be1ce1c5aa9ae1664de7aaf1b Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sat, 9 Mar 2024 13:02:02 -0800 Subject: [PATCH 2/3] Bump comicfn2dict --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 3770f87..8b30167 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = appdirs==1.4.4 beautifulsoup4>=4.1 chardet>=5.1.0,<6 - comicfn2dict>=0.2.0 + comicfn2dict>=0.2.1 importlib-metadata>=3.3.0 isocodes>=2023.11.26 natsort>=8.1.0 From 9eae71fb621bc78a7c06c9ab1bd5eed7c750a986 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sat, 9 Mar 2024 13:07:49 -0800 Subject: [PATCH 3/3] Disable checkboxes when the complicated parser is not used --- comictaggerlib/settingswindow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py index e1b79c2..1f2cb08 100644 --- a/comictaggerlib/settingswindow.py +++ b/comictaggerlib/settingswindow.py @@ -367,6 +367,8 @@ class SettingsWindow(QtWidgets.QDialog): self.cbxRemoveC2C.setEnabled(complicated) self.cbxRemoveFCBD.setEnabled(complicated) self.cbxRemovePublisher.setEnabled(complicated) + self.cbxProtofoliusIssueNumberScheme.setEnabled(complicated) + self.cbxAllowIssueStartWithLetter.setEnabled(complicated) self.filename_parser_test() def settings_to_form(self) -> None: