From f6be7919d725e06f9da7a9793784e4832e1a5971 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Wed, 6 Sep 2023 04:50:05 -0400 Subject: [PATCH] Implement support for protofolius's permission scheme --- comicapi/comicarchive.py | 70 ++++++++----------- comicapi/filenamelexer.py | 39 +++++++++-- comicapi/filenameparser.py | 31 +++++++- comicapi/issuestring.py | 52 +++++++++----- comicapi/utils.py | 46 ++++++++++++ comictaggerlib/ctsettings/file.py | 21 +++++- .../ctsettings/settngs_namespace.py | 3 +- comictaggerlib/settingswindow.py | 67 ++++++++++++++++++ comictaggerlib/ui/settingswindow.ui | 40 +++++++++++ testing/filenames.py | 51 +++++++++++--- tests/filenameparser_test.py | 15 ++-- tests/issuestring_test.py | 3 + 12 files changed, 352 insertions(+), 86 deletions(-) diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 87a234d..936150f 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -22,7 +22,7 @@ import shutil import sys from typing import cast -from comicapi import filenamelexer, filenameparser, utils +from comicapi import utils from comicapi.archivers import Archiver, UnknownArchiver, ZipArchiver from comicapi.comet import CoMet from comicapi.comicbookinfo import ComicBookInfo @@ -541,53 +541,39 @@ class ComicArchive: remove_fcbd: bool = False, remove_publisher: bool = False, split_words: bool = False, + allow_issue_start_with_letter: bool = False, + protofolius_issue_number_scheme: bool = False, ) -> GenericMetadata: metadata = GenericMetadata() - filename = self.path.name - if split_words: - import wordninja + filename_info = utils.parse_filename( + self.path.name, + complicated_parser=complicated_parser, + remove_c2c=remove_c2c, + remove_fcbd=remove_fcbd, + remove_publisher=remove_publisher, + split_words=split_words, + allow_issue_start_with_letter=allow_issue_start_with_letter, + protofolius_issue_number_scheme=protofolius_issue_number_scheme, + ) + metadata.alternate_number = utils.xlate(filename_info.get("alternate", None)) + metadata.issue = utils.xlate(filename_info.get("issue", None)) + metadata.issue_count = utils.xlate_int(filename_info.get("issue_count", None)) + metadata.publisher = utils.xlate(filename_info.get("publisher", None)) + metadata.series = utils.xlate(filename_info.get("series", None)) + metadata.title = utils.xlate(filename_info.get("title", None)) + metadata.volume = utils.xlate_int(filename_info.get("volume", None)) + metadata.volume_count = utils.xlate_int(filename_info.get("volume_count", None)) + metadata.year = utils.xlate_int(filename_info.get("year", None)) - filename = " ".join(wordninja.split(self.path.stem)) + self.path.suffix - - if complicated_parser: - lex = filenamelexer.Lex(filename) - p = filenameparser.Parse( - lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher - ) - metadata.alternate_number = utils.xlate(p.filename_info["alternate"]) - metadata.issue = utils.xlate(p.filename_info["issue"]) - metadata.issue_count = utils.xlate_int(p.filename_info["issue_count"]) - metadata.publisher = utils.xlate(p.filename_info["publisher"]) - metadata.series = utils.xlate(p.filename_info["series"]) - metadata.title = utils.xlate(p.filename_info["title"]) - metadata.volume = utils.xlate_int(p.filename_info["volume"]) - metadata.volume_count = utils.xlate_int(p.filename_info["volume_count"]) - metadata.year = utils.xlate_int(p.filename_info["year"]) - - metadata.scan_info = utils.xlate(p.filename_info["remainder"]) - metadata.format = "FCBD" if p.filename_info["fcbd"] else None - if p.filename_info["annual"]: - metadata.format = "Annual" - else: - fnp = filenameparser.FileNameParser() - fnp.parse_filename(filename) - - if fnp.issue: - metadata.issue = fnp.issue - if fnp.series: - metadata.series = fnp.series - if fnp.volume: - metadata.volume = utils.xlate_int(fnp.volume) - if fnp.year: - metadata.year = utils.xlate_int(fnp.year) - if fnp.issue_count: - metadata.issue_count = utils.xlate_int(fnp.issue_count) - if fnp.remainder: - metadata.scan_info = fnp.remainder + metadata.scan_info = utils.xlate(filename_info.get("remainder", None)) + metadata.format = "FCBD" if filename_info.get("fcbd", None) else None + if filename_info.get("annual", None): + metadata.format = "Annual" + if filename_info.get("format", None): + metadata.format = filename_info["format"] metadata.is_empty = False - return metadata def export_as_zip(self, zip_filename: pathlib.Path) -> bool: diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py index 0b40954..868fcb2 100644 --- a/comicapi/filenamelexer.py +++ b/comicapi/filenamelexer.py @@ -87,7 +87,7 @@ class Item: class Lexer: - def __init__(self, string: str) -> None: + def __init__(self, string: str, allow_issue_start_with_letter: bool = False) -> None: self.input: str = string # The string being scanned # The next lexing function to enter self.state: Callable[[Lexer], Callable | None] | None = None # type: ignore[type-arg] @@ -98,6 +98,7 @@ class Lexer: self.brace_depth: int = 0 # Nesting depth of { } self.sbrace_depth: int = 0 # Nesting depth of [ ] self.items: list[Item] = [] + self.allow_issue_start_with_letter = allow_issue_start_with_letter # Next returns the next rune in the input. def get(self) -> str: @@ -196,7 +197,7 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty return lex_space elif r == ".": r = lex.peek() - if r < "0" or "9" < r: + if not r.isdigit(): lex.emit(ItemType.Dot) return lex_filename @@ -204,15 +205,17 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty return lex_number elif r == "'": r = lex.peek() - if r in "0123456789": + if r.isdigit(): return lex_number lex.emit(ItemType.Text) # TODO: Change to Text elif "0" <= r <= "9": lex.backup() return lex_number elif r == "#": - if "0" <= lex.peek() <= "9": - return lex_number + if lex.allow_issue_start_with_letter and is_alpha_numeric(lex.peek()): + return lex_issue_number + elif lex.peek().isdigit() or lex.peek() in "-+.": + return lex_issue_number lex.emit(ItemType.Symbol) elif is_operator(r): if r == "-" and lex.peek() == "-": @@ -329,6 +332,28 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type return lex_filename +def lex_issue_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type: ignore[type-arg] + # Only called when lex.input[lex.start] == "#" + original_start = lex.pos + found_number = False + while True: + r = lex.get() + if is_alpha_numeric(r): + if r.isnumeric(): + found_number = True + else: + lex.backup() + break + + if not found_number: + lex.pos = original_start + lex.emit(ItemType.Symbol) + else: + lex.emit(ItemType.IssueNumber) + + return lex_filename + + def is_space(character: str) -> bool: return character in "_ \t" @@ -346,7 +371,7 @@ def is_symbol(character: str) -> bool: return unicodedata.category(character)[0] in "PS" -def Lex(filename: str) -> Lexer: - lex = Lexer(string=os.path.basename(filename)) +def Lex(filename: str, allow_issue_start_with_letter: bool = False) -> Lexer: + lex = Lexer(os.path.basename(filename), allow_issue_start_with_letter) lex.run() return lex diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 7c6681e..85d6262 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -324,6 +324,21 @@ class FilenameInfo(TypedDict, total=False): volume: str volume_count: str year: str + format: str + + +protofolius_issue_number_scheme = { + "B": "biography/best of", + "C": "compact edition", + "E": "entrtainment/puzzle edition", + "F": "familiy book edition", + "J": "jubileum (anniversary) edition", + "P": "pocket edition", + "N": "newly brought out/restyled edition", + "O": "old editions (or oblong format)", + "S": "special edition", + "X": "X-rated edition", +} eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "") @@ -341,6 +356,7 @@ class Parser: remove_c2c: bool = False, remove_fcbd: bool = False, remove_publisher: bool = False, + protofolius_issue_number_scheme: bool = False, ) -> None: self.state: Callable[[Parser], Callable | None] | None = None # type: ignore[type-arg] self.pos = -1 @@ -366,6 +382,7 @@ class Parser: self.remove_c2c = remove_c2c self.remove_fcbd = remove_fcbd self.remove_publisher = remove_publisher + self.protofolius_issue_number_scheme = protofolius_issue_number_scheme self.remove_from_remainder = [] if remove_c2c: @@ -923,6 +940,16 @@ def resolve_issue(p: Parser) -> None: if "volume" in p.filename_info: p.filename_info["issue"] = p.filename_info["volume"] + if ( + "issue" in p.filename_info + and p.protofolius_issue_number_scheme + and len(p.filename_info["issue"]) > 1 + and p.filename_info["issue"][0].isalpha() + and p.filename_info["issue"][0].upper() in protofolius_issue_number_scheme + and p.filename_info["issue"][1].isnumeric() + ): + p.filename_info["format"] = protofolius_issue_number_scheme[p.filename_info["issue"][0].upper()] + def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg] resolve_year(p) @@ -941,7 +968,7 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty p.filename_info["series"] = join_title(p.series_parts) p.used_items.extend(p.series_parts) else: - p.filename_info["series"] = p.filename_info["issue"] + p.filename_info["series"] = p.filename_info.get("issue", "") if "free comic book" in p.filename_info["series"].casefold(): p.filename_info["fcbd"] = True @@ -1137,6 +1164,7 @@ def Parse( remove_c2c: bool = False, remove_fcbd: bool = False, remove_publisher: bool = False, + protofolius_issue_number_scheme: bool = False, ) -> Parser: p = Parser( lexer_result=lexer_result, @@ -1144,6 +1172,7 @@ def Parse( remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher, + protofolius_issue_number_scheme=protofolius_issue_number_scheme, ) p.run() return p diff --git a/comicapi/issuestring.py b/comicapi/issuestring.py index 149e5b1..b2cda1d 100644 --- a/comicapi/issuestring.py +++ b/comicapi/issuestring.py @@ -32,6 +32,7 @@ class IssueString: self.num = None self.suffix = "" + self.prefix = "" if text is None: return @@ -41,18 +42,25 @@ class IssueString: if len(text) == 0: return + for idx, r in enumerate(text): + if not r.isalpha(): + break + self.prefix = text[:idx] + self.num, self.suffix = self.get_number(text[idx:]) + + def get_number(self, text: str) -> tuple[float | None, str]: + num, suffix = None, "" + start = 0 # skip the minus sign if it's first - if text[0] == "-": + if text[0] in ("-", "+"): start = 1 - else: - start = 0 # if it's still not numeric at start skip it if text[start].isdigit() or text[start] == ".": # walk through the string, look for split point (the first non-numeric) decimal_count = 0 for idx in range(start, len(text)): - if text[idx] not in "0123456789.": + if not (text[idx].isdigit() or text[idx] in "."): break # special case: also split on second "." if text[idx] == ".": @@ -71,42 +79,48 @@ class IssueString: if idx == 1 and start == 1: idx = 0 - part1 = text[0:idx] - part2 = text[idx : len(text)] - - if part1 != "": - self.num = float(part1) - self.suffix = part2 + if text[0:idx]: + num = float(text[0:idx]) + suffix = text[idx : len(text)] else: - self.suffix = text + suffix = text + return num, suffix def as_string(self, pad: int = 0) -> str: - # return the float, left side zero-padded, with suffix attached + """return the number, left side zero-padded, with suffix attached""" + + # if there is no number return the text if self.num is None: - return self.suffix + return self.prefix + self.suffix + # negative is added back in last negative = self.num < 0 - num_f = abs(self.num) + # used for padding num_int = int(num_f) - num_s = str(num_int) - if float(num_int) != num_f: - num_s = str(num_f) - num_s += self.suffix + if num_f.is_integer(): + num_s = str(num_int) + else: + num_s = str(num_f) # create padding padding = "" + # we only pad the whole number part, we don't care about the decimal length = len(str(num_int)) if length < pad: padding = "0" * (pad - length) + # add the padding to the front num_s = padding + num_s + + # finally add the negative back in if negative: num_s = "-" + num_s - return num_s + # return the prefix + formatted number + suffix + return self.prefix + num_s + self.suffix def as_float(self) -> float | None: # return the float, with no suffix diff --git a/comicapi/utils.py b/comicapi/utils.py index f2c362e..b7d4075 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -26,6 +26,7 @@ from shutil import which # noqa: F401 from typing import Any import comicapi.data +from comicapi import filenamelexer, filenameparser try: import icu @@ -60,6 +61,51 @@ def os_sorted(lst: Iterable) -> Iterable: return sorted(lst, key=key) +def parse_filename( + filename: str, + complicated_parser: bool = False, + remove_c2c: bool = False, + remove_fcbd: bool = False, + remove_publisher: bool = False, + split_words: bool = False, + allow_issue_start_with_letter: bool = False, + protofolius_issue_number_scheme: bool = False, +) -> filenameparser.FilenameInfo: + if split_words: + import wordninja + + filename, ext = os.path.splitext(filename) + filename = " ".join(wordninja.split(filename)) + ext + + if complicated_parser: + lex = filenamelexer.Lex(filename, allow_issue_start_with_letter) + p = filenameparser.Parse( + lex.items, + remove_c2c=remove_c2c, + remove_fcbd=remove_fcbd, + remove_publisher=remove_publisher, + protofolius_issue_number_scheme=protofolius_issue_number_scheme, + ) + return p.filename_info + else: + fnp = filenameparser.FileNameParser() + fnp.parse_filename(filename) + fni = filenameparser.FilenameInfo() + if fnp.issue: + fni["issue"] = fnp.issue + if fnp.series: + fni["series"] = fnp.series + if fnp.volume: + fni["volume"] = fnp.volume + if fnp.year: + fni["year"] = fnp.year + if fnp.issue_count: + fni["issue_count"] = fnp.issue_count + if fnp.remainder: + fni["remainder"] = fnp.remainder + return fni + + def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str: split_notes, split_str, untouched_notes = (existing_notes or "").rpartition(split) if split_notes or split_str: diff --git a/comictaggerlib/ctsettings/file.py b/comictaggerlib/ctsettings/file.py index 5a3e0db..860fcc4 100644 --- a/comictaggerlib/ctsettings/file.py +++ b/comictaggerlib/ctsettings/file.py @@ -119,6 +119,18 @@ def filename(parser: settngs.Manager) -> None: action=argparse.BooleanOptionalAction, help="Attempts to remove publisher names from filenames, currently limited to Marvel and DC. Requires --complicated-parser", ) + parser.add_setting( + "--protofolius-issue-number-scheme", + default=False, + action=argparse.BooleanOptionalAction, + help="Use an issue number scheme devised by protofolius for encoding format informatino as a letter in front of an issue number. Implies --allow-issue-start-with-letter. Requires --complicated-parser", + ) + parser.add_setting( + "--allow-issue-start-with-letter", + default=False, + action=argparse.BooleanOptionalAction, + help="Allows an issue number to start with a single letter (e.g. '#X01'). Requires --complicated-parser", + ) def talker(parser: settngs.Manager) -> None: @@ -220,7 +232,7 @@ def autotag(parser: settngs.Manager) -> None: parser.add_setting("remove_archive_after_successful_match", default=False, cmdline=False) -def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]: +def parse_filter(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]: new_filter = [] remove = [] for x in config[0].Issue_Identifier_publisher_filter: @@ -235,6 +247,13 @@ def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_n if x in new_filter: new_filter.remove(x) config[0].Issue_Identifier_publisher_filter = new_filter + return config + + +def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]: + config = parse_filter(config) + if config[0].Filename_Parsing_protofolius_issue_number_scheme: + config[0].Filename_Parsing_allow_issue_start_with_letter = True config[0].File_Rename_replacements = Replacements( [Replacement(x[0], x[1], x[2]) for x in config[0].File_Rename_replacements[0]], diff --git a/comictaggerlib/ctsettings/settngs_namespace.py b/comictaggerlib/ctsettings/settngs_namespace.py index 9d55fe6..64e432c 100644 --- a/comictaggerlib/ctsettings/settngs_namespace.py +++ b/comictaggerlib/ctsettings/settngs_namespace.py @@ -31,7 +31,6 @@ class settngs_namespace(settngs.TypedNS): Runtime_Options_summary: bool Runtime_Options_raw: bool Runtime_Options_recursive: bool - Runtime_Options_script: str Runtime_Options_split_words: bool Runtime_Options_dryrun: bool Runtime_Options_darkmode: bool @@ -70,6 +69,8 @@ class settngs_namespace(settngs.TypedNS): Filename_Parsing_remove_c2c: bool Filename_Parsing_remove_fcbd: bool Filename_Parsing_remove_publisher: bool + Filename_Parsing_protofolius_issue_number_scheme: bool + Filename_Parsing_allow_issue_start_with_letter: bool Sources_source: str Sources_remove_html_tables: bool diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py index e8a4516..aaffe75 100644 --- a/comictaggerlib/settingswindow.py +++ b/comictaggerlib/settingswindow.py @@ -195,6 +195,8 @@ class SettingsWindow(QtWidgets.QDialog): self.settings_to_form() self.rename_test() self.dir_test() + self.leFilenameParserTest.setText(self.lblRenameTest.text()) + self.filename_parser_test() # Set General as start tab self.tabWidget.setCurrentIndex(0) @@ -222,6 +224,15 @@ class SettingsWindow(QtWidgets.QDialog): self.twLiteralReplacements.cellChanged.connect(self.rename_test) self.twValueReplacements.cellChanged.connect(self.rename_test) + self.leFilenameParserTest.textEdited.connect(self.filename_parser_test) + self.cbxRemoveC2C.clicked.connect(self.filename_parser_test) + self.cbxRemoveFCBD.clicked.connect(self.filename_parser_test) + self.cbxRemovePublisher.clicked.connect(self.filename_parser_test) + self.cbxProtofoliusIssueNumberScheme.clicked.connect(self.filename_parser_test) + self.cbxProtofoliusIssueNumberScheme.clicked.connect(self.protofolius_clicked) + self.cbxAllowIssueStartWithLetter.clicked.connect(self.filename_parser_test) + self.cbxSplitWords.clicked.connect(self.filename_parser_test) + def disconnect_signals(self) -> None: self.btnAddLiteralReplacement.clicked.disconnect() self.btnAddValueReplacement.clicked.disconnect() @@ -241,6 +252,55 @@ class SettingsWindow(QtWidgets.QDialog): self.leRenameTemplate.textEdited.disconnect() self.twLiteralReplacements.cellChanged.disconnect() self.twValueReplacements.cellChanged.disconnect() + self.leFilenameParserTest.textEdited.disconnect() + self.cbxRemoveC2C.clicked.disconnect() + self.cbxRemoveFCBD.clicked.disconnect() + self.cbxRemovePublisher.clicked.disconnect() + self.cbxProtofoliusIssueNumberScheme.clicked.disconnect() + self.cbxAllowIssueStartWithLetter.clicked.disconnect() + self.cbxSplitWords.clicked.disconnect() + + def protofolius_clicked(self, *args: Any, **kwargs: Any) -> None: + if self.cbxProtofoliusIssueNumberScheme.isChecked(): + self.cbxAllowIssueStartWithLetter.setEnabled(False) + self.cbxAllowIssueStartWithLetter.setChecked(True) + else: + self.cbxAllowIssueStartWithLetter.setEnabled(True) + self.filename_parser_test() + + def filename_parser_test(self, *args: Any, **kwargs: Any) -> None: + self._filename_parser_test(self.leFilenameParserTest.text()) + + def _filename_parser_test(self, filename: str) -> None: + filename_info = utils.parse_filename( + filename=filename, + complicated_parser=self.cbxComplicatedParser.isChecked(), + remove_c2c=self.cbxRemoveC2C.isChecked(), + remove_fcbd=self.cbxRemoveFCBD.isChecked(), + remove_publisher=self.cbxRemovePublisher.isChecked(), + split_words=self.cbxSplitWords.isChecked(), + allow_issue_start_with_letter=self.cbxAllowIssueStartWithLetter.isChecked(), + protofolius_issue_number_scheme=self.cbxProtofoliusIssueNumberScheme.isChecked(), + ) + report = "" + for item in ( + "series", + "issue", + "issue_count", + "title", + "volume", + "volume_count", + "year", + "alternate", + "publisher", + "archive", + "remainder", + "annual", + "c2c", + "fcbd", + ): + report += f"{item.title().replace('_', ' ')}: {dict(filename_info)[item]}\n" + self.lblFilenameParserTest.setText(report) def addLiteralReplacement(self) -> None: self.insertRow(self.twLiteralReplacements, self.twLiteralReplacements.rowCount(), Replacement("", "", False)) @@ -319,6 +379,9 @@ class SettingsWindow(QtWidgets.QDialog): self.cbxRemoveC2C.setChecked(self.config[0].Filename_Parsing_remove_c2c) self.cbxRemoveFCBD.setChecked(self.config[0].Filename_Parsing_remove_fcbd) self.cbxRemovePublisher.setChecked(self.config[0].Filename_Parsing_remove_publisher) + self.cbxProtofoliusIssueNumberScheme.setChecked(self.config[0].Filename_Parsing_protofolius_issue_number_scheme) + self.cbxAllowIssueStartWithLetter.setChecked(self.config[0].Filename_Parsing_allow_issue_start_with_letter) + self.switch_parser() self.cbxClearFormBeforePopulating.setChecked(self.config[0].Issue_Identifier_clear_form_before_populating) @@ -434,6 +497,10 @@ class SettingsWindow(QtWidgets.QDialog): self.config[0].Filename_Parsing_remove_c2c = self.cbxRemoveC2C.isChecked() self.config[0].Filename_Parsing_remove_fcbd = self.cbxRemoveFCBD.isChecked() self.config[0].Filename_Parsing_remove_publisher = self.cbxRemovePublisher.isChecked() + self.config[0].Filename_Parsing_allow_issue_start_with_letter = self.cbxAllowIssueStartWithLetter.isChecked() + self.config.values.Filename_Parsing_protofolius_issue_number_scheme = ( + self.cbxProtofoliusIssueNumberScheme.isChecked() + ) self.config[0].Issue_Identifier_clear_form_before_populating = self.cbxClearFormBeforePopulating.isChecked() self.config[0].Issue_Identifier_always_use_publisher_filter = self.cbxUseFilter.isChecked() diff --git a/comictaggerlib/ui/settingswindow.ui b/comictaggerlib/ui/settingswindow.ui index 43a1561..615b6a5 100644 --- a/comictaggerlib/ui/settingswindow.ui +++ b/comictaggerlib/ui/settingswindow.ui @@ -318,6 +318,46 @@ + + + + Use protofolius's issue number scheme + + + + + + + Allow issue numbers to start with a letter + + + + + + + + + + + + + !Preview only! Attempts to split words before parsing the filename. e.g. 'judgedredd' to 'judge dredd' + + + + + + + + + + Qt::PlainText + + + Qt::LinksAccessibleByMouse|Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse + + + diff --git a/testing/filenames.py b/testing/filenames.py index ce62d84..bf6f89a 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -23,6 +23,23 @@ datadir = pathlib.Path(__file__).parent / "data" cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz" names = [ + ( + "batman #B01 title (DC).cbz", + "protofolius_issue_number_scheme", + { + "issue": "B1", + "series": "batman", + "title": "title", + "publisher": "DC", + "volume": "", + "year": "", + "remainder": "", + "issue_count": "", + "alternate": "", + "format": "biography/best of", + }, + (False, True), + ), ( "batman #3 title (DC).cbz", "honorific and publisher in series", @@ -724,15 +741,33 @@ names = [ ), ] -fnames = [] +oldfnames = [] +newfnames = [] for p in names: - pp = list(p) - pp[3] = p[3][0] - fnames.append(tuple(pp)) - if "#" in p[0]: - pp[0] = p[0].replace("#", "") - pp[3] = p[3][1] - fnames.append(tuple(pp)) + filename, reason, info, xfail = p + nxfail = xfail[0] + newfnames.append(pytest.param(filename, reason, info, nxfail)) + oldfnames.append( + pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser")) + ) + if "#" in filename: + filename = filename.replace("#", "") + nxfail = xfail[1] + if reason == "protofolius_issue_number_scheme": + newfnames.append( + pytest.param( + filename, + reason, + info, + nxfail, + marks=pytest.mark.xfail(condition=nxfail, reason="protofolius_issue_number_scheme"), + ) + ) + else: + newfnames.append(pytest.param(filename, reason, info, nxfail)) + oldfnames.append( + pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser")) + ) rnames = [ ( diff --git a/tests/filenameparser_test.py b/tests/filenameparser_test.py index 06f9305..f457cef 100644 --- a/tests/filenameparser_test.py +++ b/tests/filenameparser_test.py @@ -2,18 +2,21 @@ from __future__ import annotations import pytest +import comicapi.filenamelexer import comicapi.filenameparser -from testing.filenames import fnames +from testing.filenames import newfnames, oldfnames -@pytest.mark.parametrize("filename, reason, expected, xfail", fnames) +@pytest.mark.parametrize("filename, reason, expected, xfail", newfnames) def test_file_name_parser_new(filename, reason, expected, xfail): + lex = comicapi.filenamelexer.Lex(filename, "protofolius_issue_number_scheme" == reason) p = comicapi.filenameparser.Parse( - comicapi.filenamelexer.Lex(filename).items, + lex.items, first_is_alt=True, remove_c2c=True, remove_fcbd=True, remove_publisher=True, + protofolius_issue_number_scheme="protofolius_issue_number_scheme" == reason, ) fp = p.filename_info @@ -30,13 +33,13 @@ def test_file_name_parser_new(filename, reason, expected, xfail): assert fp == expected -@pytest.mark.parametrize("filename, reason, expected, xfail", fnames) +@pytest.mark.parametrize("filename, reason, expected, xfail", oldfnames) def test_file_name_parser(filename, reason, expected, xfail): p = comicapi.filenameparser.FileNameParser() p.parse_filename(filename) fp = p.__dict__ # These are currently not tracked in this parser - for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder"]: + for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder", "format"]: if s in expected: del expected[s] @@ -44,6 +47,4 @@ def test_file_name_parser(filename, reason, expected, xfail): if "remainder" in fp: del fp["remainder"] - if xfail and fp != expected: - pytest.xfail("old parser") assert fp == expected diff --git a/tests/issuestring_test.py b/tests/issuestring_test.py index 3597686..d657736 100644 --- a/tests/issuestring_test.py +++ b/tests/issuestring_test.py @@ -12,6 +12,9 @@ issues = [ ("1", 1.0, "001"), ("22.BEY", 22.0, "022.BEY"), ("22A", 22.0, "022A"), + ("A22A", 22.0, "A022A"), + ("A22", 22.0, "A022"), + ("A22½", 22.5, "A022½"), ("22-A", 22.0, "022-A"), ("", None, ""), ]