Ensure FilenameInfo is always filled out fixes #556

This commit is contained in:
Timmy Welch 2023-10-23 21:08:55 -07:00
parent 7842109ca2
commit 29ddc3779a
5 changed files with 171 additions and 75 deletions

View File

@ -309,7 +309,7 @@ class FileNameParser:
self.issue = issuestring.IssueString(self.issue).as_string()
class FilenameInfo(TypedDict, total=False):
class FilenameInfo(TypedDict):
alternate: str
annual: bool
archive: str
@ -364,7 +364,23 @@ class Parser:
self.firstItem = True
self.skip = False
self.alt = False
self.filename_info: FilenameInfo = {"series": ""}
self.filename_info = FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
format="",
)
self.issue_number_at = None
self.issue_number_marked = False
self.issue_number_passed = False
@ -700,8 +716,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
item = p.input[p.pos]
if "issue" in p.filename_info:
if "alternate" in p.filename_info:
if p.filename_info["issue"]:
if p.filename_info["alternate"]:
p.filename_info["alternate"] += "," + item.val
p.filename_info["alternate"] = item.val
else:
@ -908,7 +924,7 @@ def resolve_year(p: Parser) -> None:
# Sort by likely_year boolean
p.year_candidates.sort(key=itemgetter(0))
if "issue" not in p.filename_info:
if not p.filename_info["issue"]:
year = p.year_candidates.pop(0)
if year[1]:
p.filename_info["issue"] = year[2].val
@ -930,7 +946,7 @@ def resolve_year(p: Parser) -> None:
p.used_items.append(selected_year[2])
# (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]:
if not p.filename_info["volume"] and p.year_candidates and p.year_candidates[-1][0]:
year = p.year_candidates[-1]
if year[2] not in p.series_parts and year[2] not in p.title_parts:
vol = p.year_candidates.pop()[2]
@ -953,7 +969,7 @@ def resolve_year(p: Parser) -> None:
def resolve_issue(p: Parser) -> None:
# If we don't have an issue try to find it in the series
if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
if not p.filename_info["issue"] and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
issue_num = p.series_parts.pop()
# If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
@ -975,23 +991,23 @@ def resolve_issue(p: Parser) -> None:
p.used_items.append(issue_num)
p.issue_number_at = issue_num.pos
if "issue" in p.filename_info:
if p.filename_info["issue"]:
p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
if "volume" in p.filename_info:
if p.filename_info["volume"]:
p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
if "issue" not in p.filename_info:
if not p.filename_info["issue"]:
# We have an alternate move it to the issue
if "alternate" in p.filename_info:
if p.filename_info["alternate"]:
p.filename_info["issue"] = p.filename_info["alternate"]
p.filename_info["alternate"] = ""
if "volume" in p.filename_info:
if p.filename_info["volume"]:
p.filename_info["issue"] = p.filename_info["volume"]
if (
"issue" in p.filename_info
p.filename_info["issue"]
and p.protofolius_issue_number_scheme
and len(p.filename_info["issue"]) > 1
and p.filename_info["issue"][0].isalpha()
@ -1030,25 +1046,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
p.filename_info["remainder"] = get_remainder(p)
# Ensure keys always exist
for s in [
"alternate",
"issue",
"archive",
"series",
"title",
"volume",
"year",
"remainder",
"issue_count",
"volume_count",
"publisher",
]:
if s not in p.filename_info:
p.filename_info[s] = "" # type: ignore
for s in ["fcbd", "c2c", "annual"]:
if s not in p.filename_info:
p.filename_info[s] = False # type: ignore
return None

View File

@ -71,6 +71,24 @@ def parse_filename(
allow_issue_start_with_letter: bool = False,
protofolius_issue_number_scheme: bool = False,
) -> filenameparser.FilenameInfo:
if not filename:
return filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
format="",
)
if split_words:
import wordninja
@ -90,19 +108,23 @@ def parse_filename(
else:
fnp = filenameparser.FileNameParser()
fnp.parse_filename(filename)
fni = filenameparser.FilenameInfo()
if fnp.issue:
fni["issue"] = fnp.issue
if fnp.series:
fni["series"] = fnp.series
if fnp.volume:
fni["volume"] = fnp.volume
if fnp.year:
fni["year"] = fnp.year
if fnp.issue_count:
fni["issue_count"] = fnp.issue_count
if fnp.remainder:
fni["remainder"] = fnp.remainder
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue=fnp.issue,
issue_count=fnp.issue_count,
publisher="",
remainder=fnp.remainder,
series=fnp.series,
title="",
volume=fnp.volume,
volume_count="",
year=fnp.year,
format="",
)
return fni

View File

@ -361,6 +361,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.cbxRemoveC2C.setEnabled(complicated)
self.cbxRemoveFCBD.setEnabled(complicated)
self.cbxRemovePublisher.setEnabled(complicated)
self.filename_parser_test()
def settings_to_form(self) -> None:
self.disconnect_signals()

View File

@ -22,10 +22,10 @@ import pytest
datadir = pathlib.Path(__file__).parent / "data"
cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz"
names = [
names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [
(
"Drystan & Esyllt #3",
"Shortened word followed by a number eg No. 13, Mr. 13",
"&",
{
"issue": "3",
"series": "Drystan & Esyllt",
@ -57,6 +57,7 @@ names = [
"Karl May #001 Old Shatterhand.cbr",
"Month in series",
{
"archive": "cbr",
"issue": "1",
"series": "Karl May",
"title": "Old Shatterhand",
@ -193,6 +194,7 @@ names = [
"batman #B01 title (DC).cbz",
"protofolius_issue_number_scheme",
{
"archive": "cbz",
"issue": "B1",
"series": "batman",
"title": "title",
@ -210,6 +212,7 @@ names = [
"batman #3 title (DC).cbz",
"publisher in parenthesis",
{
"archive": "cbz",
"issue": "3",
"series": "batman",
"title": "title",
@ -226,6 +229,7 @@ names = [
"batman #3 title DC.cbz",
"publisher in title",
{
"archive": "cbz",
"issue": "3",
"series": "batman",
"title": "title DC",
@ -242,6 +246,7 @@ names = [
"ms. Marvel #3.cbz",
"honorific and publisher in series",
{
"archive": "cbz",
"issue": "3",
"series": "ms. Marvel",
"title": "",
@ -258,6 +263,7 @@ names = [
f"action comics #{datetime.datetime.now().year}.cbz",
"issue number is current year (digits == 4)",
{
"archive": "cbz",
"issue": f"{datetime.datetime.now().year}",
"series": "action comics",
"title": "",
@ -274,6 +280,7 @@ names = [
"action comics 1024.cbz",
"issue number is current year (digits == 4)",
{
"archive": "cbz",
"issue": "1024",
"series": "action comics",
"title": "",
@ -290,6 +297,7 @@ names = [
"Action Comics 1001 (2018).cbz",
"issue number is current year (digits == 4)",
{
"archive": "cbz",
"issue": "1001",
"series": "Action Comics",
"title": "",
@ -306,6 +314,7 @@ names = [
"january jones #2.cbz",
"month in series",
{
"archive": "cbz",
"issue": "2",
"series": "january jones",
"title": "",
@ -321,6 +330,7 @@ names = [
"#52.cbz",
"issue number only",
{
"archive": "cbz",
"issue": "52",
"series": "52",
"title": "",
@ -336,6 +346,7 @@ names = [
"52 Monster_Island_v1_#2__repaired__c2c.cbz",
"leading alternate",
{
"archive": "cbz",
"issue": "2",
"series": "Monster Island",
"title": "",
@ -352,6 +363,7 @@ names = [
"Monster_Island_v1_#2__repaired__c2c.cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "2",
"series": "Monster Island",
"title": "",
@ -367,6 +379,7 @@ names = [
"Monster Island v1 #3 (1957) -- The Revenge Of King Klong (noads).cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "3",
"series": "Monster Island",
"title": "",
@ -381,6 +394,7 @@ names = [
"Foobar-Man Annual #121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "121",
"series": "Foobar-Man Annual",
"title": "The Wrath of Foobar-Man, Part 1 of 2",
@ -396,6 +410,7 @@ names = [
"Plastic Man v1 #002 (1942).cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "2",
"series": "Plastic Man",
"title": "",
@ -410,6 +425,7 @@ names = [
"Blue Beetle #02.cbr",
"Example from userguide",
{
"archive": "cbr",
"issue": "2",
"series": "Blue Beetle",
"title": "",
@ -424,6 +440,7 @@ names = [
"Monster Island vol. 2 #2.cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "2",
"series": "Monster Island",
"title": "",
@ -438,6 +455,7 @@ names = [
"Crazy Weird Comics #2 (of 2) (1969).rar",
"Example from userguide",
{
"archive": "rar",
"issue": "2",
"series": "Crazy Weird Comics",
"title": "",
@ -452,6 +470,7 @@ names = [
"Super Strange Yarns (1957) #92 (1969).cbz",
"Example from userguide",
{
"archive": "cbz",
"issue": "92",
"series": "Super Strange Yarns",
"title": "",
@ -466,6 +485,7 @@ names = [
"Action Spy Tales v1965 #3.cbr",
"Example from userguide",
{
"archive": "cbr",
"issue": "3",
"series": "Action Spy Tales",
"title": "",
@ -480,6 +500,7 @@ names = [
" X-Men-V1-#067.cbr",
"hyphen separated with hyphen in series", # only parses correctly because v1 designates the volume
{
"archive": "cbr",
"issue": "67",
"series": "X-Men",
"title": "",
@ -494,6 +515,7 @@ names = [
"Amazing Spider-Man #078.BEY (2022) (Digital) (Zone-Empire).cbr",
"number issue with extra",
{
"archive": "cbr",
"issue": "78.BEY",
"series": "Amazing Spider-Man",
"title": "",
@ -508,6 +530,7 @@ names = [
"Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
"title after #issue",
{
"archive": "cbr",
"issue": "2",
"series": "Angel Wings",
"title": "Black Widow",
@ -522,6 +545,7 @@ names = [
"Aquaman - Green Arrow - Deep Target #01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
"issue count",
{
"archive": "cbr",
"issue": "1",
"series": "Aquaman - Green Arrow - Deep Target",
"title": "",
@ -536,6 +560,7 @@ names = [
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) #001 (2021) (Digital) (BlackManta-Empire).cbz",
"numbers in series",
{
"archive": "cbz",
"issue": "1",
"series": "Aquaman 80th Anniversary 100-Page Super Spectacular",
"title": "",
@ -550,6 +575,7 @@ names = [
"Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr",
"FCBD date",
{
"archive": "cbr",
"issue": "",
"series": "Avatar - The Last Airbender - The Legend of Korra",
"title": "",
@ -565,6 +591,7 @@ names = [
"Avengers By Brian Michael Bendis volume 03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
"volume without issue",
{
"archive": "cbz",
"issue": "3",
"series": "Avengers By Brian Michael Bendis",
"title": "",
@ -579,6 +606,7 @@ names = [
"Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
"volume without issue",
{
"archive": "cbz",
"issue": "3",
"series": "Avengers By Brian Michael Bendis",
"title": "",
@ -593,6 +621,7 @@ names = [
"Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr",
"year in title without issue",
{
"archive": "cbr",
"issue": "",
"series": "Batman '89",
"title": "",
@ -607,6 +636,7 @@ names = [
"Batman_-_Superman_#020_(2021)_(digital)_(NeverAngel-Empire).cbr",
"underscores",
{
"archive": "cbr",
"issue": "20",
"series": "Batman - Superman",
"title": "",
@ -621,6 +651,7 @@ names = [
"Black Widow #009 (2021) (Digital) (Zone-Empire).cbr",
"standard",
{
"archive": "cbr",
"issue": "9",
"series": "Black Widow",
"title": "",
@ -635,6 +666,7 @@ names = [
"Blade Runner 2029 #006 (2021) (3 covers) (digital) (Son of Ultron-Empire).cbr",
"year before issue",
{
"archive": "cbr",
"issue": "6",
"series": "Blade Runner 2029",
"title": "",
@ -649,6 +681,7 @@ names = [
"Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr",
"FCBD year and (year)",
{
"archive": "cbr",
"issue": "",
"series": "Blade Runner Free Comic Book Day 2021",
"title": "",
@ -664,6 +697,7 @@ names = [
"Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr",
"book",
{
"archive": "cbr",
"issue": "3",
"series": "Bloodshot",
"title": "Book 03",
@ -678,6 +712,7 @@ names = [
"book of eli #1 (2020) (digital) (Son of Ultron-Empire).cbr",
"book",
{
"archive": "cbr",
"issue": "1",
"series": "book of eli",
"title": "",
@ -692,6 +727,7 @@ names = [
"Cyberpunk 2077 - You Have My Word #02 (2021) (digital) (Son of Ultron-Empire).cbr",
"title",
{
"archive": "cbr",
"issue": "2",
"series": "Cyberpunk 2077",
"title": "You Have My Word",
@ -706,6 +742,7 @@ names = [
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
"volume count",
{
"archive": "cbr",
"issue": "8",
"series": "Elephantmen 2259",
"title": "Simple Truth",
@ -721,6 +758,7 @@ names = [
"Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz",
"'.' in name",
{
"archive": "cbz",
"issue": "",
"series": "Free Comic Book Day - Avengers Hulk",
"title": "",
@ -730,12 +768,13 @@ names = [
"issue_count": "",
"fcbd": True,
},
(True,),
(True, False),
),
(
"Goblin (2021) (digital) (Son of Ultron-Empire).cbr",
"no-issue",
{
"archive": "cbr",
"issue": "",
"series": "Goblin",
"title": "",
@ -744,12 +783,13 @@ names = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
(False,),
(False, False),
),
(
"Marvel Previews #002 (January 2022) (Digital-Empire).cbr",
"(month year)",
{
"archive": "cbr",
"issue": "2",
"series": "Marvel Previews",
"title": "",
@ -765,6 +805,7 @@ names = [
"Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr",
"volume then issue",
{
"archive": "cbr",
"issue": "90",
"series": "Marvel Two In One",
"title": "",
@ -781,6 +822,7 @@ names = [
"Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz",
"number ends series, no-issue",
{
"archive": "cbz",
"issue": "",
"series": "Star Wars - War of the Bounty Hunters - IG-88",
"title": "",
@ -789,12 +831,13 @@ names = [
"remainder": "(Digital) (Kileko-Empire)",
"issue_count": "",
},
(True,),
(True, False),
),
(
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz",
"number ends series",
{
"archive": "cbz",
"issue": "1",
"series": "Star Wars - War of the Bounty Hunters - IG-88",
"title": "",
@ -809,6 +852,7 @@ names = [
"The Defenders v1 #058 (1978) (digital).cbz",
"",
{
"archive": "cbz",
"issue": "58",
"series": "The Defenders",
"title": "",
@ -823,6 +867,7 @@ names = [
"The Defenders v1 Annual #01 (1976) (Digital) (Minutemen-Slayer).cbr",
" v in series",
{
"archive": "cbr",
"issue": "1",
"series": "The Defenders Annual",
"title": "",
@ -838,6 +883,7 @@ names = [
"The Magic Order 2 #06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
"ending id",
{
"archive": "cbz",
"issue": "6",
"series": "The Magic Order 2",
"title": "",
@ -852,6 +898,7 @@ names = [
"Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
"issue separates title",
{
"archive": "cbr",
"issue": "1",
"series": "Wonder Woman",
"title": "Wonder Woman Day Special Edition",
@ -866,6 +913,7 @@ names = [
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
"date-range, no paren, braces",
{
"archive": "cbz",
"issue": "49",
"series": "Wonder Woman",
"title": "digital", # Don't have a way to get rid of this
@ -881,6 +929,7 @@ names = [
"X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz",
"full-date, issue in parenthesis",
{
"archive": "cbz",
"issue": "2",
"series": "X-Men",
"title": "",
@ -895,6 +944,7 @@ names = [
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz",
"title",
{
"archive": "cbz",
"issue": "1",
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
"title": "Anda's Game",
@ -911,10 +961,34 @@ oldfnames = []
newfnames = []
for p in names:
filename, reason, info, xfail = p
filenameinfo = dict(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
format="",
)
filenameinfo.update(info)
nxfail = xfail[0]
newfnames.append(pytest.param(filename, reason, info, nxfail))
newfnames.append(pytest.param(filename, reason, filenameinfo.copy(), nxfail))
oldfnames.append(
pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser"))
pytest.param(
filename,
reason,
filenameinfo.copy(),
nxfail,
marks=pytest.mark.xfail(condition=nxfail, reason="old parser"),
)
)
if "#" in filename:
filename = filename.replace("#", "")
@ -924,15 +998,21 @@ for p in names:
pytest.param(
filename,
reason,
info,
filenameinfo.copy(),
nxfail,
marks=pytest.mark.xfail(condition=nxfail, reason=reason),
)
)
else:
newfnames.append(pytest.param(filename, reason, info, nxfail))
newfnames.append(pytest.param(filename, reason, filenameinfo.copy(), nxfail))
oldfnames.append(
pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser"))
pytest.param(
filename,
reason,
filenameinfo.copy(),
nxfail,
marks=pytest.mark.xfail(condition=nxfail, reason="old parser"),
)
)
rnames = [

View File

@ -4,6 +4,7 @@ import pytest
import comicapi.filenamelexer
import comicapi.filenameparser
import comicapi.utils
from testing.filenames import newfnames, oldfnames
@ -20,31 +21,26 @@ def test_file_name_parser_new(filename, reason, expected, xfail):
)
fp = p.filename_info
for s in ["archive"]:
if s in fp:
del fp[s]
for s in ["alternate", "publisher", "volume_count"]:
if s not in expected:
expected[s] = ""
for s in ["fcbd", "c2c", "annual"]:
if s not in expected:
expected[s] = False
assert fp == expected
@pytest.mark.parametrize("filename, reason, expected, xfail", oldfnames)
def test_file_name_parser(filename, reason, expected, xfail):
p = comicapi.filenameparser.FileNameParser()
p.parse_filename(filename)
fp = p.__dict__
fp = comicapi.utils.parse_filename(filename)
# These are currently not tracked in this parser
for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder", "format"]:
if s in expected:
del expected[s]
# The remainder is not considered compatible between parsers
if "remainder" in fp:
del fp["remainder"]
for s in [
"title",
"alternate",
"publisher",
"fcbd",
"c2c",
"annual",
"volume_count",
"remainder",
"format",
"archive",
]:
del expected[s]
del fp[s]
assert fp == expected