Merge branch 'protofolius_issue_scheme' into develop
This commit is contained in:
commit
abfd97d915
@ -23,7 +23,7 @@ import sys
|
||||
import traceback
|
||||
from typing import cast
|
||||
|
||||
from comicapi import filenamelexer, filenameparser, utils
|
||||
from comicapi import utils
|
||||
from comicapi.archivers import Archiver, UnknownArchiver, ZipArchiver
|
||||
from comicapi.comet import CoMet
|
||||
from comicapi.comicbookinfo import ComicBookInfo
|
||||
@ -558,53 +558,39 @@ class ComicArchive:
|
||||
remove_fcbd: bool = False,
|
||||
remove_publisher: bool = False,
|
||||
split_words: bool = False,
|
||||
allow_issue_start_with_letter: bool = False,
|
||||
protofolius_issue_number_scheme: bool = False,
|
||||
) -> GenericMetadata:
|
||||
metadata = GenericMetadata()
|
||||
|
||||
filename = self.path.name
|
||||
if split_words:
|
||||
import wordninja
|
||||
filename_info = utils.parse_filename(
|
||||
self.path.name,
|
||||
complicated_parser=complicated_parser,
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
split_words=split_words,
|
||||
allow_issue_start_with_letter=allow_issue_start_with_letter,
|
||||
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
|
||||
)
|
||||
metadata.alternate_number = utils.xlate(filename_info.get("alternate", None))
|
||||
metadata.issue = utils.xlate(filename_info.get("issue", None))
|
||||
metadata.issue_count = utils.xlate_int(filename_info.get("issue_count", None))
|
||||
metadata.publisher = utils.xlate(filename_info.get("publisher", None))
|
||||
metadata.series = utils.xlate(filename_info.get("series", None))
|
||||
metadata.title = utils.xlate(filename_info.get("title", None))
|
||||
metadata.volume = utils.xlate_int(filename_info.get("volume", None))
|
||||
metadata.volume_count = utils.xlate_int(filename_info.get("volume_count", None))
|
||||
metadata.year = utils.xlate_int(filename_info.get("year", None))
|
||||
|
||||
filename = " ".join(wordninja.split(self.path.stem)) + self.path.suffix
|
||||
|
||||
if complicated_parser:
|
||||
lex = filenamelexer.Lex(filename)
|
||||
p = filenameparser.Parse(
|
||||
lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
|
||||
)
|
||||
metadata.alternate_number = utils.xlate(p.filename_info["alternate"])
|
||||
metadata.issue = utils.xlate(p.filename_info["issue"])
|
||||
metadata.issue_count = utils.xlate_int(p.filename_info["issue_count"])
|
||||
metadata.publisher = utils.xlate(p.filename_info["publisher"])
|
||||
metadata.series = utils.xlate(p.filename_info["series"])
|
||||
metadata.title = utils.xlate(p.filename_info["title"])
|
||||
metadata.volume = utils.xlate_int(p.filename_info["volume"])
|
||||
metadata.volume_count = utils.xlate_int(p.filename_info["volume_count"])
|
||||
metadata.year = utils.xlate_int(p.filename_info["year"])
|
||||
|
||||
metadata.scan_info = utils.xlate(p.filename_info["remainder"])
|
||||
metadata.format = "FCBD" if p.filename_info["fcbd"] else None
|
||||
if p.filename_info["annual"]:
|
||||
metadata.format = "Annual"
|
||||
else:
|
||||
fnp = filenameparser.FileNameParser()
|
||||
fnp.parse_filename(filename)
|
||||
|
||||
if fnp.issue:
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series:
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume:
|
||||
metadata.volume = utils.xlate_int(fnp.volume)
|
||||
if fnp.year:
|
||||
metadata.year = utils.xlate_int(fnp.year)
|
||||
if fnp.issue_count:
|
||||
metadata.issue_count = utils.xlate_int(fnp.issue_count)
|
||||
if fnp.remainder:
|
||||
metadata.scan_info = fnp.remainder
|
||||
metadata.scan_info = utils.xlate(filename_info.get("remainder", None))
|
||||
metadata.format = "FCBD" if filename_info.get("fcbd", None) else None
|
||||
if filename_info.get("annual", None):
|
||||
metadata.format = "Annual"
|
||||
if filename_info.get("format", None):
|
||||
metadata.format = filename_info["format"]
|
||||
|
||||
metadata.is_empty = False
|
||||
|
||||
return metadata
|
||||
|
||||
def export_as_zip(self, zip_filename: pathlib.Path) -> bool:
|
||||
|
@ -81,13 +81,14 @@ class Item:
|
||||
self.typ: ItemType = typ
|
||||
self.pos: int = pos
|
||||
self.val: str = val
|
||||
self.no_space = False
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"{self.val}: index: {self.pos}: {self.typ}"
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, string: str) -> None:
|
||||
def __init__(self, string: str, allow_issue_start_with_letter: bool = False) -> None:
|
||||
self.input: str = string # The string being scanned
|
||||
# The next lexing function to enter
|
||||
self.state: Callable[[Lexer], Callable | None] | None = None # type: ignore[type-arg]
|
||||
@ -98,6 +99,7 @@ class Lexer:
|
||||
self.brace_depth: int = 0 # Nesting depth of { }
|
||||
self.sbrace_depth: int = 0 # Nesting depth of [ ]
|
||||
self.items: list[Item] = []
|
||||
self.allow_issue_start_with_letter = allow_issue_start_with_letter
|
||||
|
||||
# Next returns the next rune in the input.
|
||||
def get(self) -> str:
|
||||
@ -143,23 +145,14 @@ class Lexer:
|
||||
self.backup()
|
||||
|
||||
def scan_number(self) -> bool:
|
||||
digits = "0123456789"
|
||||
digits = "0123456789.,"
|
||||
|
||||
self.accept_run(digits)
|
||||
if self.accept("."):
|
||||
if self.accept(digits):
|
||||
self.accept_run(digits)
|
||||
else:
|
||||
self.backup()
|
||||
if self.accept("s"):
|
||||
if not self.accept("t"):
|
||||
self.backup()
|
||||
elif self.accept("nr"):
|
||||
if not self.accept("d"):
|
||||
self.backup()
|
||||
elif self.accept("t"):
|
||||
if not self.accept("h"):
|
||||
self.backup()
|
||||
if self.input[self.pos] == ".":
|
||||
self.backup()
|
||||
while self.get().isalpha():
|
||||
...
|
||||
self.backup()
|
||||
|
||||
return True
|
||||
|
||||
@ -196,23 +189,21 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r < "0" or "9" < r:
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
|
||||
lex.backup()
|
||||
return lex_number
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
elif r == "'":
|
||||
r = lex.peek()
|
||||
if r in "0123456789":
|
||||
if r.isdigit():
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
if "0" <= lex.peek() <= "9":
|
||||
return lex_number
|
||||
if lex.allow_issue_start_with_letter and is_alpha_numeric(lex.peek()):
|
||||
return lex_issue_number
|
||||
elif lex.peek().isdigit() or lex.peek() in "-+.":
|
||||
return lex_issue_number
|
||||
lex.emit(ItemType.Symbol)
|
||||
elif is_operator(r):
|
||||
if r == "-" and lex.peek() == "-":
|
||||
@ -329,6 +320,28 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_issue_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type: ignore[type-arg]
|
||||
# Only called when lex.input[lex.start] == "#"
|
||||
original_start = lex.pos
|
||||
found_number = False
|
||||
while True:
|
||||
r = lex.get()
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric():
|
||||
found_number = True
|
||||
else:
|
||||
lex.backup()
|
||||
break
|
||||
|
||||
if not found_number:
|
||||
lex.pos = original_start
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
lex.emit(ItemType.IssueNumber)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def is_space(character: str) -> bool:
|
||||
return character in "_ \t"
|
||||
|
||||
@ -346,7 +359,7 @@ def is_symbol(character: str) -> bool:
|
||||
return unicodedata.category(character)[0] in "PS"
|
||||
|
||||
|
||||
def Lex(filename: str) -> Lexer:
|
||||
lex = Lexer(string=os.path.basename(filename))
|
||||
def Lex(filename: str, allow_issue_start_with_letter: bool = False) -> Lexer:
|
||||
lex = Lexer(os.path.basename(filename), allow_issue_start_with_letter)
|
||||
lex.run()
|
||||
return lex
|
||||
|
@ -324,6 +324,21 @@ class FilenameInfo(TypedDict, total=False):
|
||||
volume: str
|
||||
volume_count: str
|
||||
year: str
|
||||
format: str
|
||||
|
||||
|
||||
protofolius_issue_number_scheme = {
|
||||
"B": "biography/best of",
|
||||
"C": "compact edition",
|
||||
"E": "entrtainment/puzzle edition",
|
||||
"F": "familiy book edition",
|
||||
"J": "jubileum (anniversary) edition",
|
||||
"P": "pocket edition",
|
||||
"N": "newly brought out/restyled edition",
|
||||
"O": "old editions (or oblong format)",
|
||||
"S": "special edition",
|
||||
"X": "X-rated edition",
|
||||
}
|
||||
|
||||
|
||||
eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
|
||||
@ -341,6 +356,7 @@ class Parser:
|
||||
remove_c2c: bool = False,
|
||||
remove_fcbd: bool = False,
|
||||
remove_publisher: bool = False,
|
||||
protofolius_issue_number_scheme: bool = False,
|
||||
) -> None:
|
||||
self.state: Callable[[Parser], Callable | None] | None = None # type: ignore[type-arg]
|
||||
self.pos = -1
|
||||
@ -350,6 +366,8 @@ class Parser:
|
||||
self.alt = False
|
||||
self.filename_info: FilenameInfo = {"series": ""}
|
||||
self.issue_number_at = None
|
||||
self.issue_number_marked = False
|
||||
self.issue_number_passed = False
|
||||
self.in_something = 0 # In some sort of brackets {}[]()
|
||||
self.in_brace = 0 # In {}
|
||||
self.in_s_brace = 0 # In []
|
||||
@ -366,6 +384,7 @@ class Parser:
|
||||
self.remove_c2c = remove_c2c
|
||||
self.remove_fcbd = remove_fcbd
|
||||
self.remove_publisher = remove_publisher
|
||||
self.protofolius_issue_number_scheme = protofolius_issue_number_scheme
|
||||
|
||||
self.remove_from_remainder = []
|
||||
if remove_c2c:
|
||||
@ -377,6 +396,7 @@ class Parser:
|
||||
for i, item in enumerate(self.input):
|
||||
if item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
self.issue_number_at = i
|
||||
self.issue_number_marked = True
|
||||
|
||||
# Get returns the next Item in the input.
|
||||
def get(self) -> filenamelexer.Item:
|
||||
@ -395,11 +415,11 @@ class Parser:
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
# Peek_back returns but does not step back the previous Item in the input.
|
||||
def peek_back(self) -> filenamelexer.Item:
|
||||
if int(self.pos) == 0:
|
||||
def peek_back(self, length: int = 1) -> filenamelexer.Item:
|
||||
if int(self.pos) - length < 0:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos - 1]
|
||||
return self.input[self.pos - length]
|
||||
|
||||
# Backup steps back one Item.
|
||||
def backup(self) -> None:
|
||||
@ -413,7 +433,6 @@ class Parser:
|
||||
|
||||
def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
||||
item: filenamelexer.Item = p.get()
|
||||
|
||||
# We're done, time to do final processing
|
||||
if item.typ == filenamelexer.ItemType.EOF:
|
||||
return parse_finish
|
||||
@ -429,7 +448,7 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
|
||||
# Issue number is not 4 digits e.g. a year
|
||||
# If this is still used in 7978 years, something is terribly wrong
|
||||
if len(item.val.lstrip("0")) != 4:
|
||||
if len(item.val.lstrip("0")) < 4:
|
||||
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
|
||||
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
|
||||
# It is common to use '89 to refer to an annual reprint from 1989
|
||||
@ -443,7 +462,6 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
else:
|
||||
p.operator_rejected.append(item)
|
||||
# operator rejected used later to add back to the series/title
|
||||
|
||||
# It is more likely to be a year if it is inside parentheses.
|
||||
if p.in_something > 0:
|
||||
likely_year = len(item.val.lstrip("0")) == 4
|
||||
@ -500,23 +518,30 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
likely_issue_number = likely_issue_number and item.val[0] != "'"
|
||||
p.year_candidates.append((likely_year, likely_issue_number, item))
|
||||
# Ensures that IG-88 gets added back to the series/title
|
||||
elif (
|
||||
p.in_something == 0
|
||||
and p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
if p.in_something == 0:
|
||||
if p.peek_back().typ in (filenamelexer.ItemType.IssueNumber, filenamelexer.ItemType.Number) or (
|
||||
p.peek_back().typ == filenamelexer.ItemType.Space
|
||||
and p.peek_back(2).typ in (filenamelexer.ItemType.IssueNumber, filenamelexer.ItemType.Number)
|
||||
):
|
||||
return parse_series
|
||||
if (
|
||||
p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
|
||||
p.get()
|
||||
return parse_series
|
||||
p.get()
|
||||
return parse_series
|
||||
|
||||
# Number with a leading hash e.g. #003
|
||||
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
p.issue_number_passed = True
|
||||
return parse_issue_number
|
||||
|
||||
# Matches FCBD. Not added to p.used_items so it will show in "remainder"
|
||||
@ -706,23 +731,24 @@ def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None:
|
||||
|
||||
def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
||||
item = p.input[p.pos]
|
||||
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
series[0].append(item)
|
||||
|
||||
current_part = 0
|
||||
prev_space = False
|
||||
|
||||
title_parts: list[filenamelexer.Item] = []
|
||||
series_parts: list[filenamelexer.Item] = []
|
||||
|
||||
prev_space = False
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
|
||||
# We stop parsing the series when certain things come up if nothing was done with them continue where we left off
|
||||
if p.peek_back().typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.Calendar]:
|
||||
series_parts = p.series_parts
|
||||
p.series_parts = []
|
||||
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
if item.typ == filenamelexer.ItemType.Text:
|
||||
p.backup()
|
||||
else:
|
||||
series[0].append(item)
|
||||
# Skip is only true if we have come across '--' or '__'
|
||||
while not p.skip:
|
||||
item = p.get()
|
||||
@ -738,9 +764,16 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
filenamelexer.ItemType.Honorific,
|
||||
]:
|
||||
series[current_part].append(item)
|
||||
if item.typ == filenamelexer.ItemType.Honorific and p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
series[current_part].append(p.get())
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
if p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
dot = p.get()
|
||||
if item.typ == filenamelexer.ItemType.Honorific or (
|
||||
p.peek().typ == filenamelexer.ItemType.Space
|
||||
and item.typ in (filenamelexer.ItemType.Text, filenamelexer.ItemType.Publisher)
|
||||
):
|
||||
series[current_part].append(dot)
|
||||
else:
|
||||
p.backup()
|
||||
if item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
|
||||
# Handle Volume
|
||||
@ -784,9 +817,12 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
p.filename_info["volume"] = t2do.convert(item.val)
|
||||
break
|
||||
|
||||
# This is 6 in '1 of 6'
|
||||
if series[current_part] and series[current_part][-1].val.casefold() == "of":
|
||||
series[current_part].append(item)
|
||||
count = get_number(p, p.pos + 1)
|
||||
# this is an issue or volume number
|
||||
if count is not None:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
# We have 2 numbers, add the first to the series and then go back to parse
|
||||
@ -794,24 +830,52 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
series[current_part].append(item)
|
||||
break
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
p.backup() # The number
|
||||
break
|
||||
# the issue number has been marked and passed, keep it as a part of the series
|
||||
if (
|
||||
p.issue_number_marked
|
||||
and p.issue_number_passed
|
||||
or p.issue_number_at is not None
|
||||
and not p.issue_number_marked
|
||||
):
|
||||
# We already have an issue number, this should be a part of the series
|
||||
series[current_part].append(item)
|
||||
else:
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
p.backup() # The number
|
||||
break
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
# the issue number has been #marked or passed, keep it as a part of the series
|
||||
if (
|
||||
p.issue_number_marked
|
||||
and p.issue_number_passed
|
||||
or p.issue_number_at is not None
|
||||
and not p.issue_number_marked
|
||||
):
|
||||
# We already have an issue number, this should be a part of the series
|
||||
series[current_part].append(item)
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
|
||||
else:
|
||||
# Ensure 'ms. marvel' parses 'ms.' correctly
|
||||
if item.typ == filenamelexer.ItemType.Dot and p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif item.typ == filenamelexer.ItemType.Dot and p.peek().typ == filenamelexer.ItemType.Text:
|
||||
# Marks the dot as used so that the remainder is clean
|
||||
p.used_items.append(item)
|
||||
if item.typ == filenamelexer.ItemType.Dot:
|
||||
if p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
elif (
|
||||
p.peek().typ == filenamelexer.ItemType.Number
|
||||
or p.peek_back().typ == filenamelexer.ItemType.Text
|
||||
and len(p.peek_back().val) == 1
|
||||
):
|
||||
series[current_part].append(item)
|
||||
item.no_space = True
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif p.peek().typ in (filenamelexer.ItemType.Text,):
|
||||
# Marks the dot as used so that the remainder is clean
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
break
|
||||
@ -926,6 +990,16 @@ def resolve_issue(p: Parser) -> None:
|
||||
if "volume" in p.filename_info:
|
||||
p.filename_info["issue"] = p.filename_info["volume"]
|
||||
|
||||
if (
|
||||
"issue" in p.filename_info
|
||||
and p.protofolius_issue_number_scheme
|
||||
and len(p.filename_info["issue"]) > 1
|
||||
and p.filename_info["issue"][0].isalpha()
|
||||
and p.filename_info["issue"][0].upper() in protofolius_issue_number_scheme
|
||||
and p.filename_info["issue"][1].isnumeric()
|
||||
):
|
||||
p.filename_info["format"] = protofolius_issue_number_scheme[p.filename_info["issue"][0].upper()]
|
||||
|
||||
|
||||
def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
||||
resolve_year(p)
|
||||
@ -944,7 +1018,7 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
p.used_items.extend(p.series_parts)
|
||||
else:
|
||||
p.filename_info["series"] = p.filename_info["issue"]
|
||||
p.filename_info["series"] = p.filename_info.get("issue", "")
|
||||
|
||||
if "free comic book" in p.filename_info["series"].casefold():
|
||||
p.filename_info["fcbd"] = True
|
||||
@ -1051,7 +1125,7 @@ def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | Non
|
||||
|
||||
# 'of' is only special if it is inside a parenthesis.
|
||||
elif item.val.casefold() == "of":
|
||||
i = get_number(p, index)
|
||||
i = get_number_rev(p, index)
|
||||
if i is not None:
|
||||
if p.in_something > 0:
|
||||
if p.issue_number_at is None:
|
||||
@ -1087,7 +1161,7 @@ def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | Non
|
||||
|
||||
|
||||
# Gets 03 in '03 of 6'
|
||||
def get_number(p: Parser, index: int) -> filenamelexer.Item | None:
|
||||
def get_number_rev(p: Parser, index: int) -> filenamelexer.Item | None:
|
||||
# Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
||||
rev = p.input[:index]
|
||||
rev.reverse()
|
||||
@ -1105,6 +1179,36 @@ def get_number(p: Parser, index: int) -> filenamelexer.Item | None:
|
||||
# We got our number, time to leave
|
||||
return i
|
||||
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Gets 6 in '03 of 6'
|
||||
def get_number(p: Parser, index: int) -> filenamelexer.Item | None:
|
||||
# Go forward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
||||
filename = p.input[index:]
|
||||
of_found = False
|
||||
|
||||
for i in filename:
|
||||
# We don't care about these types, we are looking to see if there is a number that is possibly different from
|
||||
# the issue number for this count
|
||||
if i.typ in [
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
filenamelexer.ItemType.Space,
|
||||
]:
|
||||
continue
|
||||
if i.val == "of":
|
||||
of_found = True
|
||||
continue
|
||||
if i.typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
|
||||
# We got our number, time to leave
|
||||
if of_found:
|
||||
return i
|
||||
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
@ -1122,11 +1226,21 @@ def join_title(lst: list[filenamelexer.Item]) -> str:
|
||||
if i == len(lst) - 1:
|
||||
continue
|
||||
# No space after honorifics with a dot
|
||||
if item.typ == filenamelexer.ItemType.Honorific and lst[i + 1].typ == filenamelexer.ItemType.Dot:
|
||||
if (
|
||||
item.typ in (filenamelexer.ItemType.Honorific, filenamelexer.ItemType.Text)
|
||||
and lst[i + 1].typ == filenamelexer.ItemType.Dot
|
||||
):
|
||||
continue
|
||||
if item.no_space:
|
||||
continue
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [filenamelexer.ItemType.Operator, filenamelexer.ItemType.Symbol]:
|
||||
continue
|
||||
# exept if followed by a dollarsign
|
||||
if not (
|
||||
lst[i].typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]
|
||||
and lst[i + 1].val == "$"
|
||||
):
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
title += " "
|
||||
@ -1140,6 +1254,7 @@ def Parse(
|
||||
remove_c2c: bool = False,
|
||||
remove_fcbd: bool = False,
|
||||
remove_publisher: bool = False,
|
||||
protofolius_issue_number_scheme: bool = False,
|
||||
) -> Parser:
|
||||
p = Parser(
|
||||
lexer_result=lexer_result,
|
||||
@ -1147,6 +1262,7 @@ def Parse(
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
|
||||
)
|
||||
p.run()
|
||||
return p
|
||||
|
@ -32,6 +32,7 @@ class IssueString:
|
||||
|
||||
self.num = None
|
||||
self.suffix = ""
|
||||
self.prefix = ""
|
||||
|
||||
if text is None:
|
||||
return
|
||||
@ -41,18 +42,25 @@ class IssueString:
|
||||
if len(text) == 0:
|
||||
return
|
||||
|
||||
for idx, r in enumerate(text):
|
||||
if not r.isalpha():
|
||||
break
|
||||
self.prefix = text[:idx]
|
||||
self.num, self.suffix = self.get_number(text[idx:])
|
||||
|
||||
def get_number(self, text: str) -> tuple[float | None, str]:
|
||||
num, suffix = None, ""
|
||||
start = 0
|
||||
# skip the minus sign if it's first
|
||||
if text[0] == "-":
|
||||
if text[0] in ("-", "+"):
|
||||
start = 1
|
||||
else:
|
||||
start = 0
|
||||
|
||||
# if it's still not numeric at start skip it
|
||||
if text[start].isdigit() or text[start] == ".":
|
||||
# walk through the string, look for split point (the first non-numeric)
|
||||
decimal_count = 0
|
||||
for idx in range(start, len(text)):
|
||||
if text[idx] not in "0123456789.":
|
||||
if not (text[idx].isdigit() or text[idx] in "."):
|
||||
break
|
||||
# special case: also split on second "."
|
||||
if text[idx] == ".":
|
||||
@ -71,42 +79,48 @@ class IssueString:
|
||||
if idx == 1 and start == 1:
|
||||
idx = 0
|
||||
|
||||
part1 = text[0:idx]
|
||||
part2 = text[idx : len(text)]
|
||||
|
||||
if part1 != "":
|
||||
self.num = float(part1)
|
||||
self.suffix = part2
|
||||
if text[0:idx]:
|
||||
num = float(text[0:idx])
|
||||
suffix = text[idx : len(text)]
|
||||
else:
|
||||
self.suffix = text
|
||||
suffix = text
|
||||
return num, suffix
|
||||
|
||||
def as_string(self, pad: int = 0) -> str:
|
||||
# return the float, left side zero-padded, with suffix attached
|
||||
"""return the number, left side zero-padded, with suffix attached"""
|
||||
|
||||
# if there is no number return the text
|
||||
if self.num is None:
|
||||
return self.suffix
|
||||
return self.prefix + self.suffix
|
||||
|
||||
# negative is added back in last
|
||||
negative = self.num < 0
|
||||
|
||||
num_f = abs(self.num)
|
||||
|
||||
# used for padding
|
||||
num_int = int(num_f)
|
||||
num_s = str(num_int)
|
||||
if float(num_int) != num_f:
|
||||
num_s = str(num_f)
|
||||
|
||||
num_s += self.suffix
|
||||
if num_f.is_integer():
|
||||
num_s = str(num_int)
|
||||
else:
|
||||
num_s = str(num_f)
|
||||
|
||||
# create padding
|
||||
padding = ""
|
||||
# we only pad the whole number part, we don't care about the decimal
|
||||
length = len(str(num_int))
|
||||
if length < pad:
|
||||
padding = "0" * (pad - length)
|
||||
|
||||
# add the padding to the front
|
||||
num_s = padding + num_s
|
||||
|
||||
# finally add the negative back in
|
||||
if negative:
|
||||
num_s = "-" + num_s
|
||||
|
||||
return num_s
|
||||
# return the prefix + formatted number + suffix
|
||||
return self.prefix + num_s + self.suffix
|
||||
|
||||
def as_float(self) -> float | None:
|
||||
# return the float, with no suffix
|
||||
|
@ -26,6 +26,7 @@ from shutil import which # noqa: F401
|
||||
from typing import Any
|
||||
|
||||
import comicapi.data
|
||||
from comicapi import filenamelexer, filenameparser
|
||||
|
||||
try:
|
||||
import icu
|
||||
@ -60,6 +61,51 @@ def os_sorted(lst: Iterable) -> Iterable:
|
||||
return sorted(lst, key=key)
|
||||
|
||||
|
||||
def parse_filename(
|
||||
filename: str,
|
||||
complicated_parser: bool = False,
|
||||
remove_c2c: bool = False,
|
||||
remove_fcbd: bool = False,
|
||||
remove_publisher: bool = False,
|
||||
split_words: bool = False,
|
||||
allow_issue_start_with_letter: bool = False,
|
||||
protofolius_issue_number_scheme: bool = False,
|
||||
) -> filenameparser.FilenameInfo:
|
||||
if split_words:
|
||||
import wordninja
|
||||
|
||||
filename, ext = os.path.splitext(filename)
|
||||
filename = " ".join(wordninja.split(filename)) + ext
|
||||
|
||||
if complicated_parser:
|
||||
lex = filenamelexer.Lex(filename, allow_issue_start_with_letter)
|
||||
p = filenameparser.Parse(
|
||||
lex.items,
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
|
||||
)
|
||||
return p.filename_info
|
||||
else:
|
||||
fnp = filenameparser.FileNameParser()
|
||||
fnp.parse_filename(filename)
|
||||
fni = filenameparser.FilenameInfo()
|
||||
if fnp.issue:
|
||||
fni["issue"] = fnp.issue
|
||||
if fnp.series:
|
||||
fni["series"] = fnp.series
|
||||
if fnp.volume:
|
||||
fni["volume"] = fnp.volume
|
||||
if fnp.year:
|
||||
fni["year"] = fnp.year
|
||||
if fnp.issue_count:
|
||||
fni["issue_count"] = fnp.issue_count
|
||||
if fnp.remainder:
|
||||
fni["remainder"] = fnp.remainder
|
||||
return fni
|
||||
|
||||
|
||||
def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str:
|
||||
split_notes, split_str, untouched_notes = (existing_notes or "").rpartition(split)
|
||||
if split_notes or split_str:
|
||||
|
@ -119,6 +119,18 @@ def filename(parser: settngs.Manager) -> None:
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="Attempts to remove publisher names from filenames, currently limited to Marvel and DC. Requires --complicated-parser",
|
||||
)
|
||||
parser.add_setting(
|
||||
"--protofolius-issue-number-scheme",
|
||||
default=False,
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="Use an issue number scheme devised by protofolius for encoding format informatino as a letter in front of an issue number. Implies --allow-issue-start-with-letter. Requires --complicated-parser",
|
||||
)
|
||||
parser.add_setting(
|
||||
"--allow-issue-start-with-letter",
|
||||
default=False,
|
||||
action=argparse.BooleanOptionalAction,
|
||||
help="Allows an issue number to start with a single letter (e.g. '#X01'). Requires --complicated-parser",
|
||||
)
|
||||
|
||||
|
||||
def talker(parser: settngs.Manager) -> None:
|
||||
@ -220,7 +232,7 @@ def autotag(parser: settngs.Manager) -> None:
|
||||
parser.add_setting("remove_archive_after_successful_match", default=False, cmdline=False)
|
||||
|
||||
|
||||
def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
|
||||
def parse_filter(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
|
||||
new_filter = []
|
||||
remove = []
|
||||
for x in config[0].Issue_Identifier_publisher_filter:
|
||||
@ -235,6 +247,13 @@ def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_n
|
||||
if x in new_filter:
|
||||
new_filter.remove(x)
|
||||
config[0].Issue_Identifier_publisher_filter = new_filter
|
||||
return config
|
||||
|
||||
|
||||
def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
|
||||
config = parse_filter(config)
|
||||
if config[0].Filename_Parsing_protofolius_issue_number_scheme:
|
||||
config[0].Filename_Parsing_allow_issue_start_with_letter = True
|
||||
|
||||
config[0].File_Rename_replacements = Replacements(
|
||||
[Replacement(x[0], x[1], x[2]) for x in config[0].File_Rename_replacements[0]],
|
||||
|
@ -69,6 +69,8 @@ class settngs_namespace(settngs.TypedNS):
|
||||
Filename_Parsing_remove_c2c: bool
|
||||
Filename_Parsing_remove_fcbd: bool
|
||||
Filename_Parsing_remove_publisher: bool
|
||||
Filename_Parsing_protofolius_issue_number_scheme: bool
|
||||
Filename_Parsing_allow_issue_start_with_letter: bool
|
||||
|
||||
Sources_source: str
|
||||
Sources_remove_html_tables: bool
|
||||
|
@ -195,6 +195,8 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.settings_to_form()
|
||||
self.rename_test()
|
||||
self.dir_test()
|
||||
self.leFilenameParserTest.setText(self.lblRenameTest.text())
|
||||
self.filename_parser_test()
|
||||
|
||||
# Set General as start tab
|
||||
self.tabWidget.setCurrentIndex(0)
|
||||
@ -222,6 +224,15 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.twLiteralReplacements.cellChanged.connect(self.rename_test)
|
||||
self.twValueReplacements.cellChanged.connect(self.rename_test)
|
||||
|
||||
self.leFilenameParserTest.textEdited.connect(self.filename_parser_test)
|
||||
self.cbxRemoveC2C.clicked.connect(self.filename_parser_test)
|
||||
self.cbxRemoveFCBD.clicked.connect(self.filename_parser_test)
|
||||
self.cbxRemovePublisher.clicked.connect(self.filename_parser_test)
|
||||
self.cbxProtofoliusIssueNumberScheme.clicked.connect(self.filename_parser_test)
|
||||
self.cbxProtofoliusIssueNumberScheme.clicked.connect(self.protofolius_clicked)
|
||||
self.cbxAllowIssueStartWithLetter.clicked.connect(self.filename_parser_test)
|
||||
self.cbxSplitWords.clicked.connect(self.filename_parser_test)
|
||||
|
||||
def disconnect_signals(self) -> None:
|
||||
self.btnAddLiteralReplacement.clicked.disconnect()
|
||||
self.btnAddValueReplacement.clicked.disconnect()
|
||||
@ -241,6 +252,55 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.leRenameTemplate.textEdited.disconnect()
|
||||
self.twLiteralReplacements.cellChanged.disconnect()
|
||||
self.twValueReplacements.cellChanged.disconnect()
|
||||
self.leFilenameParserTest.textEdited.disconnect()
|
||||
self.cbxRemoveC2C.clicked.disconnect()
|
||||
self.cbxRemoveFCBD.clicked.disconnect()
|
||||
self.cbxRemovePublisher.clicked.disconnect()
|
||||
self.cbxProtofoliusIssueNumberScheme.clicked.disconnect()
|
||||
self.cbxAllowIssueStartWithLetter.clicked.disconnect()
|
||||
self.cbxSplitWords.clicked.disconnect()
|
||||
|
||||
def protofolius_clicked(self, *args: Any, **kwargs: Any) -> None:
|
||||
if self.cbxProtofoliusIssueNumberScheme.isChecked():
|
||||
self.cbxAllowIssueStartWithLetter.setEnabled(False)
|
||||
self.cbxAllowIssueStartWithLetter.setChecked(True)
|
||||
else:
|
||||
self.cbxAllowIssueStartWithLetter.setEnabled(True)
|
||||
self.filename_parser_test()
|
||||
|
||||
def filename_parser_test(self, *args: Any, **kwargs: Any) -> None:
|
||||
self._filename_parser_test(self.leFilenameParserTest.text())
|
||||
|
||||
def _filename_parser_test(self, filename: str) -> None:
|
||||
filename_info = utils.parse_filename(
|
||||
filename=filename,
|
||||
complicated_parser=self.cbxComplicatedParser.isChecked(),
|
||||
remove_c2c=self.cbxRemoveC2C.isChecked(),
|
||||
remove_fcbd=self.cbxRemoveFCBD.isChecked(),
|
||||
remove_publisher=self.cbxRemovePublisher.isChecked(),
|
||||
split_words=self.cbxSplitWords.isChecked(),
|
||||
allow_issue_start_with_letter=self.cbxAllowIssueStartWithLetter.isChecked(),
|
||||
protofolius_issue_number_scheme=self.cbxProtofoliusIssueNumberScheme.isChecked(),
|
||||
)
|
||||
report = ""
|
||||
for item in (
|
||||
"series",
|
||||
"issue",
|
||||
"issue_count",
|
||||
"title",
|
||||
"volume",
|
||||
"volume_count",
|
||||
"year",
|
||||
"alternate",
|
||||
"publisher",
|
||||
"archive",
|
||||
"remainder",
|
||||
"annual",
|
||||
"c2c",
|
||||
"fcbd",
|
||||
):
|
||||
report += f"{item.title().replace('_', ' ')}: {dict(filename_info)[item]}\n"
|
||||
self.lblFilenameParserTest.setText(report)
|
||||
|
||||
def addLiteralReplacement(self) -> None:
|
||||
self.insertRow(self.twLiteralReplacements, self.twLiteralReplacements.rowCount(), Replacement("", "", False))
|
||||
@ -319,6 +379,9 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.cbxRemoveC2C.setChecked(self.config[0].Filename_Parsing_remove_c2c)
|
||||
self.cbxRemoveFCBD.setChecked(self.config[0].Filename_Parsing_remove_fcbd)
|
||||
self.cbxRemovePublisher.setChecked(self.config[0].Filename_Parsing_remove_publisher)
|
||||
self.cbxProtofoliusIssueNumberScheme.setChecked(self.config[0].Filename_Parsing_protofolius_issue_number_scheme)
|
||||
self.cbxAllowIssueStartWithLetter.setChecked(self.config[0].Filename_Parsing_allow_issue_start_with_letter)
|
||||
|
||||
self.switch_parser()
|
||||
|
||||
self.cbxClearFormBeforePopulating.setChecked(self.config[0].Issue_Identifier_clear_form_before_populating)
|
||||
@ -434,6 +497,10 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.config[0].Filename_Parsing_remove_c2c = self.cbxRemoveC2C.isChecked()
|
||||
self.config[0].Filename_Parsing_remove_fcbd = self.cbxRemoveFCBD.isChecked()
|
||||
self.config[0].Filename_Parsing_remove_publisher = self.cbxRemovePublisher.isChecked()
|
||||
self.config[0].Filename_Parsing_allow_issue_start_with_letter = self.cbxAllowIssueStartWithLetter.isChecked()
|
||||
self.config.values.Filename_Parsing_protofolius_issue_number_scheme = (
|
||||
self.cbxProtofoliusIssueNumberScheme.isChecked()
|
||||
)
|
||||
|
||||
self.config[0].Issue_Identifier_clear_form_before_populating = self.cbxClearFormBeforePopulating.isChecked()
|
||||
self.config[0].Issue_Identifier_always_use_publisher_filter = self.cbxUseFilter.isChecked()
|
||||
|
@ -318,6 +318,46 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxProtofoliusIssueNumberScheme">
|
||||
<property name="text">
|
||||
<string>Use protofolius's issue number scheme</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxAllowIssueStartWithLetter">
|
||||
<property name="text">
|
||||
<string>Allow issue numbers to start with a letter</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QGroupBox" name="groupBox_3">
|
||||
<layout class="QVBoxLayout" name="verticalLayout_8">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxSplitWords">
|
||||
<property name="text">
|
||||
<string>!Preview only! Attempts to split words before parsing the filename. e.g. 'judgedredd' to 'judge dredd'</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="leFilenameParserTest"/>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="lblFilenameParserTest">
|
||||
<property name="textFormat">
|
||||
<enum>Qt::PlainText</enum>
|
||||
</property>
|
||||
<property name="textInteractionFlags">
|
||||
<set>Qt::LinksAccessibleByMouse|Qt::TextSelectableByKeyboard|Qt::TextSelectableByMouse</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -23,6 +23,21 @@ datadir = pathlib.Path(__file__).parent / "data"
|
||||
cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz"
|
||||
|
||||
names = [
|
||||
(
|
||||
"Michel Vaillant #5 Nr. 13 aan de start",
|
||||
"Shortened word followed by a number eg No. 13, Mr. 13",
|
||||
{
|
||||
"issue": "5",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "Nr. 13 aan de start",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Karl May #001 Old Shatterhand.cbr",
|
||||
"Month in series",
|
||||
@ -39,9 +54,146 @@ names = [
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #8 De 8ste man",
|
||||
"Non english ordinal",
|
||||
{
|
||||
"issue": "8",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "De 8ste man",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #13 Mach 1 voor Steve Warson",
|
||||
"number in title",
|
||||
{
|
||||
"issue": "13",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "Mach 1 voor Steve Warson",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #19 5 Meisjes in de race",
|
||||
"number starting title",
|
||||
{
|
||||
"issue": "19",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "5 Meisjes in de race",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #34 Steve Warson gaat K.O.",
|
||||
"acronym",
|
||||
{
|
||||
"issue": "34",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "Steve Warson gaat K.O.",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #40 F.1 in oproer",
|
||||
"acronym with numbers",
|
||||
{
|
||||
"issue": "40",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "F.1 in oproer",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #42 300 kmu door Parijs",
|
||||
"number starting title",
|
||||
{
|
||||
"issue": "42",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "300 kmu door Parijs",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #52 F 3000",
|
||||
"title ends with number",
|
||||
{
|
||||
"issue": "52",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "F 3000",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #66 100.000.000 $ voor Steve Warson",
|
||||
"number separator is . and dollarsign after number",
|
||||
{
|
||||
"issue": "66",
|
||||
"series": "Michel Vaillant",
|
||||
"title": "100.000.000 $ voor Steve Warson",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"batman #B01 title (DC).cbz",
|
||||
"protofolius_issue_number_scheme",
|
||||
{
|
||||
"issue": "B1",
|
||||
"series": "batman",
|
||||
"title": "title",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
"format": "biography/best of",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"batman #3 title (DC).cbz",
|
||||
"honorific and publisher in series",
|
||||
"publisher in parenthesis",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
@ -57,7 +209,7 @@ names = [
|
||||
),
|
||||
(
|
||||
"batman #3 title DC.cbz",
|
||||
"honorific and publisher in series",
|
||||
"publisher in title",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
@ -740,15 +892,33 @@ names = [
|
||||
),
|
||||
]
|
||||
|
||||
fnames = []
|
||||
oldfnames = []
|
||||
newfnames = []
|
||||
for p in names:
|
||||
pp = list(p)
|
||||
pp[3] = p[3][0]
|
||||
fnames.append(tuple(pp))
|
||||
if "#" in p[0]:
|
||||
pp[0] = p[0].replace("#", "")
|
||||
pp[3] = p[3][1]
|
||||
fnames.append(tuple(pp))
|
||||
filename, reason, info, xfail = p
|
||||
nxfail = xfail[0]
|
||||
newfnames.append(pytest.param(filename, reason, info, nxfail))
|
||||
oldfnames.append(
|
||||
pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser"))
|
||||
)
|
||||
if "#" in filename:
|
||||
filename = filename.replace("#", "")
|
||||
nxfail = xfail[1]
|
||||
if reason in ("protofolius_issue_number_scheme", "number starting title"):
|
||||
newfnames.append(
|
||||
pytest.param(
|
||||
filename,
|
||||
reason,
|
||||
info,
|
||||
nxfail,
|
||||
marks=pytest.mark.xfail(condition=nxfail, reason=reason),
|
||||
)
|
||||
)
|
||||
else:
|
||||
newfnames.append(pytest.param(filename, reason, info, nxfail))
|
||||
oldfnames.append(
|
||||
pytest.param(filename, reason, info, nxfail, marks=pytest.mark.xfail(condition=nxfail, reason="old parser"))
|
||||
)
|
||||
|
||||
rnames = [
|
||||
(
|
||||
|
@ -2,18 +2,21 @@ from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
import comicapi.filenamelexer
|
||||
import comicapi.filenameparser
|
||||
from testing.filenames import fnames
|
||||
from testing.filenames import newfnames, oldfnames
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename, reason, expected, xfail", fnames)
|
||||
@pytest.mark.parametrize("filename, reason, expected, xfail", newfnames)
|
||||
def test_file_name_parser_new(filename, reason, expected, xfail):
|
||||
lex = comicapi.filenamelexer.Lex(filename, "protofolius_issue_number_scheme" == reason)
|
||||
p = comicapi.filenameparser.Parse(
|
||||
comicapi.filenamelexer.Lex(filename).items,
|
||||
lex.items,
|
||||
first_is_alt=True,
|
||||
remove_c2c=True,
|
||||
remove_fcbd=True,
|
||||
remove_publisher=True,
|
||||
protofolius_issue_number_scheme="protofolius_issue_number_scheme" == reason,
|
||||
)
|
||||
fp = p.filename_info
|
||||
|
||||
@ -30,13 +33,13 @@ def test_file_name_parser_new(filename, reason, expected, xfail):
|
||||
assert fp == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename, reason, expected, xfail", fnames)
|
||||
@pytest.mark.parametrize("filename, reason, expected, xfail", oldfnames)
|
||||
def test_file_name_parser(filename, reason, expected, xfail):
|
||||
p = comicapi.filenameparser.FileNameParser()
|
||||
p.parse_filename(filename)
|
||||
fp = p.__dict__
|
||||
# These are currently not tracked in this parser
|
||||
for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder"]:
|
||||
for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count", "remainder", "format"]:
|
||||
if s in expected:
|
||||
del expected[s]
|
||||
|
||||
@ -44,6 +47,4 @@ def test_file_name_parser(filename, reason, expected, xfail):
|
||||
if "remainder" in fp:
|
||||
del fp["remainder"]
|
||||
|
||||
if xfail and fp != expected:
|
||||
pytest.xfail("old parser")
|
||||
assert fp == expected
|
||||
|
@ -12,6 +12,9 @@ issues = [
|
||||
("1", 1.0, "001"),
|
||||
("22.BEY", 22.0, "022.BEY"),
|
||||
("22A", 22.0, "022A"),
|
||||
("A22A", 22.0, "A022A"),
|
||||
("A22", 22.0, "A022"),
|
||||
("A22½", 22.5, "A022½"),
|
||||
("22-A", 22.0, "022-A"),
|
||||
("", None, ""),
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user