Improve issue number handling regarding the '#'

This commit is contained in:
Timmy Welch 2022-12-31 02:15:17 -08:00
parent fb4786159d
commit f83f72fa12
No known key found for this signature in database
2 changed files with 107 additions and 187 deletions

View File

@ -132,8 +132,8 @@ class FileNameParser:
else:
# only one word? Check to see if there is a digit, if so use it as the issue number and the series
if any(char.isnumeric() for char in word_list[0][0]):
issue = word_list[0][0]
return issue, start, end
issue = word_list[0][0].removeprefix("#")
return issue, word_list[0][1], word_list[0][2]
# Now try to search for the likely issue number word in the list
@ -172,6 +172,8 @@ class FileNameParser:
if issue_start != 0:
filename = filename[:issue_start]
else:
filename = filename.lstrip("#")
# in case there is no issue number, remove some obvious stuff
if "--" in filename:
@ -232,7 +234,7 @@ class FileNameParser:
if volume:
series = re.sub(r"\s+v(|ol|olume)$", "", series)
return series, volume.strip()
return series.strip().strip("-_.").strip(), volume.strip()
def get_year(self, filename: str, issue_end: int) -> str:
@ -428,37 +430,35 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
p.firstItem = False
return parse_issue_number
# The issue number should hopefully not be in parentheses
if p.in_something == 0:
# Issue number is not 4 digits e.g. a year
# If this is still used in 7978 years, something is terribly wrong
if len(item.val.lstrip("0")) != 4:
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
# It is common to use '89 to refer to an annual reprint from 1989
if item.val[0] != "'":
# Issue number is not 4 digits e.g. a year
# If this is still used in 7978 years, something is terribly wrong
if len(item.val.lstrip("0")) != 4:
# An issue number starting with # Was not found and no previous number was found
if p.issue_number_at is None:
# Series has already been started/parsed,
# filters out leading alternate numbers leading alternate number
if len(p.series_parts) > 0:
return parse_issue_number
# An issue number starting with # Was not found and no previous number was found
if p.issue_number_at is None:
# Series has already been started/parsed,
# filters out leading alternate numbers leading alternate number
if len(p.series_parts) > 0:
return parse_issue_number
else:
p.operator_rejected.append(item)
# operator rejected used later to add back to the series/title
# It is more likely to be a year if it is inside parentheses.
if p.in_something > 0:
likely_year = True
likely_issue_number = len(item.val) < 4
likely_year = len(item.val.lstrip("0")) == 4
likely_issue_number = not likely_year
# If numbers are directly followed by text it most likely isn't a year e.g. 2048px
if p.peek().typ == filenamelexer.ItemType.Text:
likely_year = False
likely_issue_number = p.in_something <= 0
likely_issue_number = p.in_something == 0
# Is either a full year '2001' or a short year "'89"
if len(item.val) == 4 or item.val[0] == "'":
if len(item.val.lstrip("0")) == 4 or item.val[0] == "'":
series = " ".join([x.val for x in p.series_parts])
if p.series_parts and series.casefold().endswith("free comic book day"):
likely_issue_number = False
@ -784,10 +784,13 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
p.filename_info["volume"] = t2do.convert(item.val)
break
# This is 6 in '1 of 6'
if series[current_part] and series[current_part][-1].val.casefold() == "of":
series[current_part].append(item)
if p.peek().typ == filenamelexer.ItemType.Space:
p.get()
# We have 2 numbers, add the first to the series and then go back to parse
if p.peek().typ == filenamelexer.ItemType.Number:
if p.peek().typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
series[current_part].append(item)
break
@ -795,9 +798,6 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
p.backup() # Whitespace
p.backup() # The number
break
# This is 6 in '1 of 6'
if series[current_part] and series[current_part][-1].val.casefold() == "of":
series[current_part].append(item)
# We have 1 number break here, it's possible it's the issue
else:
@ -1102,7 +1102,7 @@ def get_number(p: Parser, index: int) -> filenamelexer.Item | None:
filenamelexer.ItemType.Space,
]:
continue
if i.typ == filenamelexer.ItemType.Number:
if i.typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
# We got our number, time to leave
return i
# This is not a number and not an ignorable type, give up looking for the number this count belongs to

View File

@ -22,9 +22,9 @@ import pytest
datadir = pathlib.Path(__file__).parent / "data"
cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz"
fnames = [
names = [
(
"batman 3 title (DC).cbz",
"batman #3 title (DC).cbz",
"honorific and publisher in series",
{
"issue": "3",
@ -37,10 +37,10 @@ fnames = [
"issue_count": "",
"alternate": "",
},
True,
(False, True),
),
(
"batman 3 title DC.cbz",
"batman #3 title DC.cbz",
"honorific and publisher in series",
{
"issue": "3",
@ -53,10 +53,10 @@ fnames = [
"issue_count": "",
"alternate": "",
},
True,
(False, True),
),
(
"ms. Marvel 3.cbz",
"ms. Marvel #3.cbz",
"honorific and publisher in series",
{
"issue": "3",
@ -69,23 +69,7 @@ fnames = [
"issue_count": "",
"alternate": "",
},
False,
),
(
f"action comics {datetime.datetime.now().year}.cbz",
"issue number is current year (digits == 4)",
{
"issue": f"{datetime.datetime.now().year}",
"series": "action comics",
"title": "",
"publisher": "",
"volume": "",
"year": "",
"remainder": "",
"issue_count": "",
"alternate": "",
},
False,
(False, False),
),
(
f"action comics #{datetime.datetime.now().year}.cbz",
@ -101,10 +85,10 @@ fnames = [
"issue_count": "",
"alternate": "",
},
False,
(False, False),
),
(
"january jones 2.cbz",
"january jones #2.cbz",
"month in series",
{
"issue": "2",
@ -116,10 +100,10 @@ fnames = [
"issue_count": "",
"alternate": "",
},
False,
(False, False),
),
(
"52.cbz",
"#52.cbz",
"issue number only",
{
"issue": "52",
@ -131,10 +115,10 @@ fnames = [
"issue_count": "",
"alternate": "",
},
False,
(False, False),
),
(
"52 Monster_Island_v1_2__repaired__c2c.cbz",
"52 Monster_Island_v1_#2__repaired__c2c.cbz",
"leading alternate",
{
"issue": "2",
@ -147,10 +131,10 @@ fnames = [
"alternate": "52",
"c2c": True,
},
True,
(True, True),
),
(
"Monster_Island_v1_2__repaired__c2c.cbz",
"Monster_Island_v1_#2__repaired__c2c.cbz",
"Example from userguide",
{
"issue": "2",
@ -162,10 +146,10 @@ fnames = [
"issue_count": "",
"c2c": True,
},
False,
(False, False),
),
(
"Monster Island v1 3 (1957) -- The Revenge Of King Klong (noads).cbz",
"Monster Island v1 #3 (1957) -- The Revenge Of King Klong (noads).cbz",
"Example from userguide",
{
"issue": "3",
@ -176,10 +160,10 @@ fnames = [
"remainder": "The Revenge Of King Klong (noads)",
"issue_count": "",
},
False,
(False, False),
),
(
"Foobar-Man Annual 121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
"Foobar-Man Annual #121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
"Example from userguide",
{
"issue": "121",
@ -191,10 +175,10 @@ fnames = [
"issue_count": "",
"annual": True,
},
True,
(False, True),
),
(
"Plastic Man v1 002 (1942).cbz",
"Plastic Man v1 #002 (1942).cbz",
"Example from userguide",
{
"issue": "2",
@ -205,10 +189,10 @@ fnames = [
"remainder": "",
"issue_count": "",
},
False,
(False, False),
),
(
"Blue Beetle 02.cbr",
"Blue Beetle #02.cbr",
"Example from userguide",
{
"issue": "2",
@ -219,7 +203,7 @@ fnames = [
"remainder": "",
"issue_count": "",
},
False,
(False, False),
),
(
"Monster Island vol. 2 #2.cbz",
@ -233,10 +217,10 @@ fnames = [
"remainder": "",
"issue_count": "",
},
False,
(False, False),
),
(
"Crazy Weird Comics 2 (of 2) (1969).rar",
"Crazy Weird Comics #2 (of 2) (1969).rar",
"Example from userguide",
{
"issue": "2",
@ -247,7 +231,7 @@ fnames = [
"remainder": "",
"issue_count": "2",
},
False,
(False, False),
),
(
"Super Strange Yarns (1957) #92 (1969).cbz",
@ -261,7 +245,7 @@ fnames = [
"remainder": "",
"issue_count": "",
},
False,
(False, False),
),
(
"Action Spy Tales v1965 #3.cbr",
@ -275,10 +259,10 @@ fnames = [
"remainder": "",
"issue_count": "",
},
False,
(False, False),
),
(
" X-Men-V1-067.cbr",
" X-Men-V1-#067.cbr",
"hyphen separated with hyphen in series", # only parses correctly because v1 designates the volume
{
"issue": "67",
@ -289,10 +273,10 @@ fnames = [
"remainder": "",
"issue_count": "",
},
True,
(False, False),
),
(
"Amazing Spider-Man 078.BEY (2022) (Digital) (Zone-Empire).cbr",
"Amazing Spider-Man #078.BEY (2022) (Digital) (Zone-Empire).cbr",
"number issue with extra",
{
"issue": "78.BEY",
@ -303,21 +287,7 @@ fnames = [
"remainder": "(Digital) (Zone-Empire)",
"issue_count": "",
},
False,
),
(
"Angel Wings 02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
"title after issue",
{
"issue": "2",
"series": "Angel Wings",
"title": "Black Widow",
"volume": "",
"year": "2015",
"remainder": "(Scanlation) (phillywilly)",
"issue_count": "",
},
True,
(False, False),
),
(
"Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
@ -331,10 +301,10 @@ fnames = [
"remainder": "(Scanlation) (phillywilly)",
"issue_count": "",
},
False,
(False, True),
),
(
"Aquaman - Green Arrow - Deep Target 01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
"Aquaman - Green Arrow - Deep Target #01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
"issue count",
{
"issue": "1",
@ -345,10 +315,10 @@ fnames = [
"issue_count": "7",
"remainder": "(digital) (Son of Ultron-Empire)",
},
False,
(False, False),
),
(
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) 001 (2021) (Digital) (BlackManta-Empire).cbz",
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) #001 (2021) (Digital) (BlackManta-Empire).cbz",
"numbers in series",
{
"issue": "1",
@ -359,7 +329,7 @@ fnames = [
"remainder": "(Digital) (BlackManta-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr",
@ -374,7 +344,7 @@ fnames = [
"issue_count": "",
"fcbd": True,
},
True,
(True, False),
),
(
"Avengers By Brian Michael Bendis volume 03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
@ -388,7 +358,7 @@ fnames = [
"remainder": "(Digital) (F2) (Kileko-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
@ -402,7 +372,7 @@ fnames = [
"remainder": "(Digital) (F2) (Kileko-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr",
@ -416,10 +386,10 @@ fnames = [
"remainder": "(Webrip) (The Last Kryptonian-DCP)",
"issue_count": "",
},
False,
(False, False),
),
(
"Batman_-_Superman_020_(2021)_(digital)_(NeverAngel-Empire).cbr",
"Batman_-_Superman_#020_(2021)_(digital)_(NeverAngel-Empire).cbr",
"underscores",
{
"issue": "20",
@ -430,10 +400,10 @@ fnames = [
"remainder": "(digital) (NeverAngel-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Black Widow 009 (2021) (Digital) (Zone-Empire).cbr",
"Black Widow #009 (2021) (Digital) (Zone-Empire).cbr",
"standard",
{
"issue": "9",
@ -444,10 +414,10 @@ fnames = [
"remainder": "(Digital) (Zone-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Blade Runner 2029 006 (2021) (3 covers) (digital) (Son of Ultron-Empire).cbr",
"Blade Runner 2029 #006 (2021) (3 covers) (digital) (Son of Ultron-Empire).cbr",
"year before issue",
{
"issue": "6",
@ -458,7 +428,7 @@ fnames = [
"remainder": "(3 covers) (digital) (Son of Ultron-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr",
@ -473,7 +443,7 @@ fnames = [
"issue_count": "",
"fcbd": True,
},
True,
(True, False),
),
(
"Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr",
@ -487,13 +457,13 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
True,
(True, False),
),
(
"book of eli (2020) (digital) (Son of Ultron-Empire).cbr",
"book of eli #1 (2020) (digital) (Son of Ultron-Empire).cbr",
"book",
{
"issue": "",
"issue": "1",
"series": "book of eli",
"title": "",
"volume": "",
@ -501,10 +471,10 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"Cyberpunk 2077 - You Have My Word 02 (2021) (digital) (Son of Ultron-Empire).cbr",
"Cyberpunk 2077 - You Have My Word #02 (2021) (digital) (Son of Ultron-Empire).cbr",
"title",
{
"issue": "2",
@ -515,22 +485,7 @@ fnames = [
"issue_count": "",
"remainder": "(digital) (Son of Ultron-Empire)",
},
True,
),
(
"Elephantmen 2259 008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
"volume count",
{
"issue": "8",
"series": "Elephantmen 2259",
"title": "Simple Truth",
"volume": "3",
"year": "2021",
"volume_count": "6",
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
True,
(True, True),
),
(
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
@ -545,7 +500,7 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
True,
(True, True),
),
(
"Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz",
@ -560,7 +515,7 @@ fnames = [
"issue_count": "",
"fcbd": True,
},
True,
(True,),
),
(
"Goblin (2021) (digital) (Son of Ultron-Empire).cbr",
@ -574,10 +529,10 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
False,
(False,),
),
(
"Marvel Previews 002 (January 2022) (Digital-Empire).cbr",
"Marvel Previews #002 (January 2022) (Digital-Empire).cbr",
"(month year)",
{
"issue": "2",
@ -589,23 +544,7 @@ fnames = [
"remainder": "(Digital-Empire)",
"issue_count": "",
},
True,
),
(
"Marvel Two In One V1 090 c2c (Comixbear-DCP).cbr",
"volume issue ctc",
{
"issue": "90",
"series": "Marvel Two In One",
"title": "",
"publisher": "Marvel",
"volume": "1",
"year": "",
"remainder": "(Comixbear-DCP)",
"issue_count": "",
"c2c": True,
},
True,
(True, True),
),
(
"Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr",
@ -621,7 +560,7 @@ fnames = [
"issue_count": "",
"c2c": True,
},
False,
(False, True),
),
(
"Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz",
@ -635,7 +574,7 @@ fnames = [
"remainder": "(Digital) (Kileko-Empire)",
"issue_count": "",
},
True,
(True,),
),
(
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz",
@ -649,10 +588,10 @@ fnames = [
"remainder": "(Digital) (Kileko-Empire)",
"issue_count": "",
},
False,
(False, False),
),
(
"The Defenders v1 058 (1978) (digital).cbz",
"The Defenders v1 #058 (1978) (digital).cbz",
"",
{
"issue": "58",
@ -663,10 +602,10 @@ fnames = [
"remainder": "(digital)",
"issue_count": "",
},
False,
(False, False),
),
(
"The Defenders v1 Annual 01 (1976) (Digital) (Minutemen-Slayer).cbr",
"The Defenders v1 Annual #01 (1976) (Digital) (Minutemen-Slayer).cbr",
" v in series",
{
"issue": "1",
@ -678,10 +617,10 @@ fnames = [
"issue_count": "",
"annual": True,
},
True,
(True, True),
),
(
"The Magic Order 2 06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
"The Magic Order 2 #06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
"ending id",
{
"issue": "6",
@ -692,21 +631,7 @@ fnames = [
"remainder": "(Digital) (Zone-Empire)[913302]", # Don't really care about double underscores
"issue_count": "",
},
False,
),
(
"Wonder Woman 001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
"issue separates title",
{
"issue": "1",
"series": "Wonder Woman",
"title": "Wonder Woman Day Special Edition",
"volume": "",
"year": "2021",
"remainder": "(digital-Empire)",
"issue_count": "",
},
True,
(False, False),
),
(
"Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
@ -720,22 +645,7 @@ fnames = [
"remainder": "(digital-Empire)",
"issue_count": "",
},
False,
),
(
"Wonder Woman 49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
"date-range, no paren, braces",
{
"issue": "49",
"series": "Wonder Woman",
"title": "digital", # Don't have a way to get rid of this
"publisher": "DC",
"volume": "",
"year": "1951",
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
"issue_count": "",
},
True,
(False, True),
),
(
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
@ -750,7 +660,7 @@ fnames = [
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
"issue_count": "",
},
True,
(True, True),
),
(
"X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz",
@ -764,11 +674,11 @@ fnames = [
"remainder": "(digital) (Glorith-HD)",
"issue_count": "",
},
True,
(True, True),
),
(
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz",
"full-date, issue in parenthesis",
"title",
{
"issue": "1",
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
@ -778,10 +688,20 @@ fnames = [
"remainder": "",
"issue_count": "",
},
True,
(True, True),
),
]
fnames = []
for p in names:
pp = list(p)
pp[3] = p[3][0]
fnames.append(tuple(pp))
if "#" in p[0]:
pp[0] = p[0].replace("#", "")
pp[3] = p[3][1]
fnames.append(tuple(pp))
rnames = [
(
"{series!c} {price} {year}", # Capitalize