From 29ddc3779a88a9ca943f26b3d750d34e8592230b Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Mon, 23 Oct 2023 21:08:55 -0700 Subject: [PATCH] Ensure FilenameInfo is always filled out fixes #556 --- comicapi/filenameparser.py | 61 +++++++++---------- comicapi/utils.py | 48 +++++++++++---- comictaggerlib/settingswindow.py | 1 + testing/filenames.py | 100 +++++++++++++++++++++++++++---- tests/filenameparser_test.py | 36 +++++------ 5 files changed, 171 insertions(+), 75 deletions(-) diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 7741019..35249b7 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -309,7 +309,7 @@ class FileNameParser: self.issue = issuestring.IssueString(self.issue).as_string() -class FilenameInfo(TypedDict, total=False): +class FilenameInfo(TypedDict): alternate: str annual: bool archive: str @@ -364,7 +364,23 @@ class Parser: self.firstItem = True self.skip = False self.alt = False - self.filename_info: FilenameInfo = {"series": ""} + self.filename_info = FilenameInfo( + alternate="", + annual=False, + archive="", + c2c=False, + fcbd=False, + issue="", + issue_count="", + publisher="", + remainder="", + series="", + title="", + volume="", + volume_count="", + year="", + format="", + ) self.issue_number_at = None self.issue_number_marked = False self.issue_number_passed = False @@ -700,8 +716,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg] item = p.input[p.pos] - if "issue" in p.filename_info: - if "alternate" in p.filename_info: + if p.filename_info["issue"]: + if p.filename_info["alternate"]: p.filename_info["alternate"] += "," + item.val p.filename_info["alternate"] = item.val else: @@ -908,7 +924,7 @@ def resolve_year(p: Parser) -> None: # Sort by likely_year boolean p.year_candidates.sort(key=itemgetter(0)) - if "issue" not in p.filename_info: + if not p.filename_info["issue"]: year = p.year_candidates.pop(0) if year[1]: p.filename_info["issue"] = year[2].val @@ -930,7 +946,7 @@ def resolve_year(p: Parser) -> None: p.used_items.append(selected_year[2]) # (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it - if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]: + if not p.filename_info["volume"] and p.year_candidates and p.year_candidates[-1][0]: year = p.year_candidates[-1] if year[2] not in p.series_parts and year[2] not in p.title_parts: vol = p.year_candidates.pop()[2] @@ -953,7 +969,7 @@ def resolve_year(p: Parser) -> None: def resolve_issue(p: Parser) -> None: # If we don't have an issue try to find it in the series - if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number: + if not p.filename_info["issue"] and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number: issue_num = p.series_parts.pop() # If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099 @@ -975,23 +991,23 @@ def resolve_issue(p: Parser) -> None: p.used_items.append(issue_num) p.issue_number_at = issue_num.pos - if "issue" in p.filename_info: + if p.filename_info["issue"]: p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string() - if "volume" in p.filename_info: + if p.filename_info["volume"]: p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0") - if "issue" not in p.filename_info: + if not p.filename_info["issue"]: # We have an alternate move it to the issue - if "alternate" in p.filename_info: + if p.filename_info["alternate"]: p.filename_info["issue"] = p.filename_info["alternate"] p.filename_info["alternate"] = "" - if "volume" in p.filename_info: + if p.filename_info["volume"]: p.filename_info["issue"] = p.filename_info["volume"] if ( - "issue" in p.filename_info + p.filename_info["issue"] and p.protofolius_issue_number_scheme and len(p.filename_info["issue"]) > 1 and p.filename_info["issue"][0].isalpha() @@ -1030,25 +1046,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty p.filename_info["remainder"] = get_remainder(p) - # Ensure keys always exist - for s in [ - "alternate", - "issue", - "archive", - "series", - "title", - "volume", - "year", - "remainder", - "issue_count", - "volume_count", - "publisher", - ]: - if s not in p.filename_info: - p.filename_info[s] = "" # type: ignore - for s in ["fcbd", "c2c", "annual"]: - if s not in p.filename_info: - p.filename_info[s] = False # type: ignore return None diff --git a/comicapi/utils.py b/comicapi/utils.py index b7d4075..e53d41c 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -71,6 +71,24 @@ def parse_filename( allow_issue_start_with_letter: bool = False, protofolius_issue_number_scheme: bool = False, ) -> filenameparser.FilenameInfo: + if not filename: + return filenameparser.FilenameInfo( + alternate="", + annual=False, + archive="", + c2c=False, + fcbd=False, + issue="", + issue_count="", + publisher="", + remainder="", + series="", + title="", + volume="", + volume_count="", + year="", + format="", + ) if split_words: import wordninja @@ -90,19 +108,23 @@ def parse_filename( else: fnp = filenameparser.FileNameParser() fnp.parse_filename(filename) - fni = filenameparser.FilenameInfo() - if fnp.issue: - fni["issue"] = fnp.issue - if fnp.series: - fni["series"] = fnp.series - if fnp.volume: - fni["volume"] = fnp.volume - if fnp.year: - fni["year"] = fnp.year - if fnp.issue_count: - fni["issue_count"] = fnp.issue_count - if fnp.remainder: - fni["remainder"] = fnp.remainder + fni = filenameparser.FilenameInfo( + alternate="", + annual=False, + archive="", + c2c=False, + fcbd=False, + issue=fnp.issue, + issue_count=fnp.issue_count, + publisher="", + remainder=fnp.remainder, + series=fnp.series, + title="", + volume=fnp.volume, + volume_count="", + year=fnp.year, + format="", + ) return fni diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py index aaffe75..7bf6cc0 100644 --- a/comictaggerlib/settingswindow.py +++ b/comictaggerlib/settingswindow.py @@ -361,6 +361,7 @@ class SettingsWindow(QtWidgets.QDialog): self.cbxRemoveC2C.setEnabled(complicated) self.cbxRemoveFCBD.setEnabled(complicated) self.cbxRemovePublisher.setEnabled(complicated) + self.filename_parser_test() def settings_to_form(self) -> None: self.disconnect_signals() diff --git a/testing/filenames.py b/testing/filenames.py index 3c1812f..a2325bc 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -22,10 +22,10 @@ import pytest datadir = pathlib.Path(__file__).parent / "data" cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz" -names = [ +names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [ ( "Drystan & Esyllt #3", - "Shortened word followed by a number eg No. 13, Mr. 13", + "&", { "issue": "3", "series": "Drystan & Esyllt", @@ -57,6 +57,7 @@ names = [ "Karl May #001 Old Shatterhand.cbr", "Month in series", { + "archive": "cbr", "issue": "1", "series": "Karl May", "title": "Old Shatterhand", @@ -193,6 +194,7 @@ names = [ "batman #B01 title (DC).cbz", "protofolius_issue_number_scheme", { + "archive": "cbz", "issue": "B1", "series": "batman", "title": "title", @@ -210,6 +212,7 @@ names = [ "batman #3 title (DC).cbz", "publisher in parenthesis", { + "archive": "cbz", "issue": "3", "series": "batman", "title": "title", @@ -226,6 +229,7 @@ names = [ "batman #3 title DC.cbz", "publisher in title", { + "archive": "cbz", "issue": "3", "series": "batman", "title": "title DC", @@ -242,6 +246,7 @@ names = [ "ms. Marvel #3.cbz", "honorific and publisher in series", { + "archive": "cbz", "issue": "3", "series": "ms. Marvel", "title": "", @@ -258,6 +263,7 @@ names = [ f"action comics #{datetime.datetime.now().year}.cbz", "issue number is current year (digits == 4)", { + "archive": "cbz", "issue": f"{datetime.datetime.now().year}", "series": "action comics", "title": "", @@ -274,6 +280,7 @@ names = [ "action comics 1024.cbz", "issue number is current year (digits == 4)", { + "archive": "cbz", "issue": "1024", "series": "action comics", "title": "", @@ -290,6 +297,7 @@ names = [ "Action Comics 1001 (2018).cbz", "issue number is current year (digits == 4)", { + "archive": "cbz", "issue": "1001", "series": "Action Comics", "title": "", @@ -306,6 +314,7 @@ names = [ "january jones #2.cbz", "month in series", { + "archive": "cbz", "issue": "2", "series": "january jones", "title": "", @@ -321,6 +330,7 @@ names = [ "#52.cbz", "issue number only", { + "archive": "cbz", "issue": "52", "series": "52", "title": "", @@ -336,6 +346,7 @@ names = [ "52 Monster_Island_v1_#2__repaired__c2c.cbz", "leading alternate", { + "archive": "cbz", "issue": "2", "series": "Monster Island", "title": "", @@ -352,6 +363,7 @@ names = [ "Monster_Island_v1_#2__repaired__c2c.cbz", "Example from userguide", { + "archive": "cbz", "issue": "2", "series": "Monster Island", "title": "", @@ -367,6 +379,7 @@ names = [ "Monster Island v1 #3 (1957) -- The Revenge Of King Klong (noads).cbz", "Example from userguide", { + "archive": "cbz", "issue": "3", "series": "Monster Island", "title": "", @@ -381,6 +394,7 @@ names = [ "Foobar-Man Annual #121 - The Wrath of Foobar-Man, Part 1 of 2.cbz", "Example from userguide", { + "archive": "cbz", "issue": "121", "series": "Foobar-Man Annual", "title": "The Wrath of Foobar-Man, Part 1 of 2", @@ -396,6 +410,7 @@ names = [ "Plastic Man v1 #002 (1942).cbz", "Example from userguide", { + "archive": "cbz", "issue": "2", "series": "Plastic Man", "title": "", @@ -410,6 +425,7 @@ names = [ "Blue Beetle #02.cbr", "Example from userguide", { + "archive": "cbr", "issue": "2", "series": "Blue Beetle", "title": "", @@ -424,6 +440,7 @@ names = [ "Monster Island vol. 2 #2.cbz", "Example from userguide", { + "archive": "cbz", "issue": "2", "series": "Monster Island", "title": "", @@ -438,6 +455,7 @@ names = [ "Crazy Weird Comics #2 (of 2) (1969).rar", "Example from userguide", { + "archive": "rar", "issue": "2", "series": "Crazy Weird Comics", "title": "", @@ -452,6 +470,7 @@ names = [ "Super Strange Yarns (1957) #92 (1969).cbz", "Example from userguide", { + "archive": "cbz", "issue": "92", "series": "Super Strange Yarns", "title": "", @@ -466,6 +485,7 @@ names = [ "Action Spy Tales v1965 #3.cbr", "Example from userguide", { + "archive": "cbr", "issue": "3", "series": "Action Spy Tales", "title": "", @@ -480,6 +500,7 @@ names = [ " X-Men-V1-#067.cbr", "hyphen separated with hyphen in series", # only parses correctly because v1 designates the volume { + "archive": "cbr", "issue": "67", "series": "X-Men", "title": "", @@ -494,6 +515,7 @@ names = [ "Amazing Spider-Man #078.BEY (2022) (Digital) (Zone-Empire).cbr", "number issue with extra", { + "archive": "cbr", "issue": "78.BEY", "series": "Amazing Spider-Man", "title": "", @@ -508,6 +530,7 @@ names = [ "Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr", "title after #issue", { + "archive": "cbr", "issue": "2", "series": "Angel Wings", "title": "Black Widow", @@ -522,6 +545,7 @@ names = [ "Aquaman - Green Arrow - Deep Target #01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr", "issue count", { + "archive": "cbr", "issue": "1", "series": "Aquaman - Green Arrow - Deep Target", "title": "", @@ -536,6 +560,7 @@ names = [ "Aquaman 80th Anniversary 100-Page Super Spectacular (2021) #001 (2021) (Digital) (BlackManta-Empire).cbz", "numbers in series", { + "archive": "cbz", "issue": "1", "series": "Aquaman 80th Anniversary 100-Page Super Spectacular", "title": "", @@ -550,6 +575,7 @@ names = [ "Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr", "FCBD date", { + "archive": "cbr", "issue": "", "series": "Avatar - The Last Airbender - The Legend of Korra", "title": "", @@ -565,6 +591,7 @@ names = [ "Avengers By Brian Michael Bendis volume 03 (2013) (Digital) (F2) (Kileko-Empire).cbz", "volume without issue", { + "archive": "cbz", "issue": "3", "series": "Avengers By Brian Michael Bendis", "title": "", @@ -579,6 +606,7 @@ names = [ "Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz", "volume without issue", { + "archive": "cbz", "issue": "3", "series": "Avengers By Brian Michael Bendis", "title": "", @@ -593,6 +621,7 @@ names = [ "Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr", "year in title without issue", { + "archive": "cbr", "issue": "", "series": "Batman '89", "title": "", @@ -607,6 +636,7 @@ names = [ "Batman_-_Superman_#020_(2021)_(digital)_(NeverAngel-Empire).cbr", "underscores", { + "archive": "cbr", "issue": "20", "series": "Batman - Superman", "title": "", @@ -621,6 +651,7 @@ names = [ "Black Widow #009 (2021) (Digital) (Zone-Empire).cbr", "standard", { + "archive": "cbr", "issue": "9", "series": "Black Widow", "title": "", @@ -635,6 +666,7 @@ names = [ "Blade Runner 2029 #006 (2021) (3 covers) (digital) (Son of Ultron-Empire).cbr", "year before issue", { + "archive": "cbr", "issue": "6", "series": "Blade Runner 2029", "title": "", @@ -649,6 +681,7 @@ names = [ "Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr", "FCBD year and (year)", { + "archive": "cbr", "issue": "", "series": "Blade Runner Free Comic Book Day 2021", "title": "", @@ -664,6 +697,7 @@ names = [ "Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr", "book", { + "archive": "cbr", "issue": "3", "series": "Bloodshot", "title": "Book 03", @@ -678,6 +712,7 @@ names = [ "book of eli #1 (2020) (digital) (Son of Ultron-Empire).cbr", "book", { + "archive": "cbr", "issue": "1", "series": "book of eli", "title": "", @@ -692,6 +727,7 @@ names = [ "Cyberpunk 2077 - You Have My Word #02 (2021) (digital) (Son of Ultron-Empire).cbr", "title", { + "archive": "cbr", "issue": "2", "series": "Cyberpunk 2077", "title": "You Have My Word", @@ -706,6 +742,7 @@ names = [ "Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr", "volume count", { + "archive": "cbr", "issue": "8", "series": "Elephantmen 2259", "title": "Simple Truth", @@ -721,6 +758,7 @@ names = [ "Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz", "'.' in name", { + "archive": "cbz", "issue": "", "series": "Free Comic Book Day - Avengers Hulk", "title": "", @@ -730,12 +768,13 @@ names = [ "issue_count": "", "fcbd": True, }, - (True,), + (True, False), ), ( "Goblin (2021) (digital) (Son of Ultron-Empire).cbr", "no-issue", { + "archive": "cbr", "issue": "", "series": "Goblin", "title": "", @@ -744,12 +783,13 @@ names = [ "remainder": "(digital) (Son of Ultron-Empire)", "issue_count": "", }, - (False,), + (False, False), ), ( "Marvel Previews #002 (January 2022) (Digital-Empire).cbr", "(month year)", { + "archive": "cbr", "issue": "2", "series": "Marvel Previews", "title": "", @@ -765,6 +805,7 @@ names = [ "Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr", "volume then issue", { + "archive": "cbr", "issue": "90", "series": "Marvel Two In One", "title": "", @@ -781,6 +822,7 @@ names = [ "Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz", "number ends series, no-issue", { + "archive": "cbz", "issue": "", "series": "Star Wars - War of the Bounty Hunters - IG-88", "title": "", @@ -789,12 +831,13 @@ names = [ "remainder": "(Digital) (Kileko-Empire)", "issue_count": "", }, - (True,), + (True, False), ), ( "Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz", "number ends series", { + "archive": "cbz", "issue": "1", "series": "Star Wars - War of the Bounty Hunters - IG-88", "title": "", @@ -809,6 +852,7 @@ names = [ "The Defenders v1 #058 (1978) (digital).cbz", "", { + "archive": "cbz", "issue": "58", "series": "The Defenders", "title": "", @@ -823,6 +867,7 @@ names = [ "The Defenders v1 Annual #01 (1976) (Digital) (Minutemen-Slayer).cbr", " v in series", { + "archive": "cbr", "issue": "1", "series": "The Defenders Annual", "title": "", @@ -838,6 +883,7 @@ names = [ "The Magic Order 2 #06 (2022) (Digital) (Zone-Empire)[__913302__].cbz", "ending id", { + "archive": "cbz", "issue": "6", "series": "The Magic Order 2", "title": "", @@ -852,6 +898,7 @@ names = [ "Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr", "issue separates title", { + "archive": "cbr", "issue": "1", "series": "Wonder Woman", "title": "Wonder Woman Day Special Edition", @@ -866,6 +913,7 @@ names = [ "Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz", "date-range, no paren, braces", { + "archive": "cbz", "issue": "49", "series": "Wonder Woman", "title": "digital", # Don't have a way to get rid of this @@ -881,6 +929,7 @@ names = [ "X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz", "full-date, issue in parenthesis", { + "archive": "cbz", "issue": "2", "series": "X-Men", "title": "", @@ -895,6 +944,7 @@ names = [ "Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz", "title", { + "archive": "cbz", "issue": "1", "series": "Cory Doctorow's Futuristic Tales of the Here and Now", "title": "Anda's Game", @@ -911,10 +961,34 @@ oldfnames = [] newfnames = [] for p in names: filename, reason, info, xfail = p + filenameinfo = dict( + alternate="", + annual=False, + archive="", + c2c=False, + fcbd=False, + issue="", + issue_count="", + publisher="", + remainder="", + series="", + title="", + volume="", + volume_count="", + year="", + format="", + ) + filenameinfo.update(info) nxfail = xfail[0] - newfnames.append(pytest.param(filename, reason, info, nxfail)) + newfnames.append(pytest.param(filename, reason, filenameinfo.copy(), nxfail)) oldfnames.append( - pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser")) + pytest.param( + filename, + reason, + filenameinfo.copy(), + nxfail, + marks=pytest.mark.xfail(condition=nxfail, reason="old parser"), + ) ) if "#" in filename: filename = filename.replace("#", "") @@ -924,15 +998,21 @@ for p in names: pytest.param( filename, reason, - info, + filenameinfo.copy(), nxfail, marks=pytest.mark.xfail(condition=nxfail, reason=reason), ) ) else: - newfnames.append(pytest.param(filename, reason, info, nxfail)) + newfnames.append(pytest.param(filename, reason, filenameinfo.copy(), nxfail)) oldfnames.append( - pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser")) + pytest.param( + filename, + reason, + filenameinfo.copy(), + nxfail, + marks=pytest.mark.xfail(condition=nxfail, reason="old parser"), + ) ) rnames = [ diff --git a/tests/filenameparser_test.py b/tests/filenameparser_test.py index f457cef..e7cfec8 100644 --- a/tests/filenameparser_test.py +++ b/tests/filenameparser_test.py @@ -4,6 +4,7 @@ import pytest import comicapi.filenamelexer import comicapi.filenameparser +import comicapi.utils from testing.filenames import newfnames, oldfnames @@ -20,31 +21,26 @@ def test_file_name_parser_new(filename, reason, expected, xfail): ) fp = p.filename_info - for s in ["archive"]: - if s in fp: - del fp[s] - for s in ["alternate", "publisher", "volume_count"]: - if s not in expected: - expected[s] = "" - for s in ["fcbd", "c2c", "annual"]: - if s not in expected: - expected[s] = False - assert fp == expected @pytest.mark.parametrize("filename, reason, expected, xfail", oldfnames) def test_file_name_parser(filename, reason, expected, xfail): - p = comicapi.filenameparser.FileNameParser() - p.parse_filename(filename) - fp = p.__dict__ + fp = comicapi.utils.parse_filename(filename) # These are currently not tracked in this parser - for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder", "format"]: - if s in expected: - del expected[s] - - # The remainder is not considered compatible between parsers - if "remainder" in fp: - del fp["remainder"] + for s in [ + "title", + "alternate", + "publisher", + "fcbd", + "c2c", + "annual", + "volume_count", + "remainder", + "format", + "archive", + ]: + del expected[s] + del fp[s] assert fp == expected