diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py
index 660a8dc..b4cc108 100644
--- a/comicapi/comicarchive.py
+++ b/comicapi/comicarchive.py
@@ -42,10 +42,10 @@ try:
except ImportError:
pil_available = False
+from comicapi import filenamelexer, filenameparser
from comicapi.comet import CoMet
from comicapi.comicbookinfo import ComicBookInfo
from comicapi.comicinfoxml import ComicInfoXml
-from comicapi.filenameparser import FileNameParser
from comicapi.genericmetadata import GenericMetadata, PageType
logger = logging.getLogger(__name__)
@@ -1127,25 +1127,46 @@ class ComicArchive:
data = self.get_page(idx)
p["ImageSize"] = str(len(data))
- def metadata_from_filename(self, parse_scan_info=True):
+ def metadata_from_filename(
+ self, complicated_parser=False, remove_c2c=False, remove_fcbd=False, remove_publisher=False
+ ):
metadata = GenericMetadata()
- fnp = FileNameParser()
- fnp.parse_filename(self.path)
+ if complicated_parser:
+ lex = filenamelexer.Lex(self.path)
+ p = filenameparser.Parse(
+ lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
+ )
+ metadata.alternate_number = p.filename_info["alternate"] or None
+ metadata.issue = p.filename_info["issue"] or None
+ metadata.issue_count = p.filename_info["issue_count"] or None
+ metadata.publisher = p.filename_info["publisher"] or None
+ metadata.series = p.filename_info["series"] or None
+ metadata.title = p.filename_info["title"] or None
+ metadata.volume = p.filename_info["volume"] or None
+ metadata.volume_count = p.filename_info["volume_count"] or None
+ metadata.year = p.filename_info["year"] or None
- if fnp.issue != "":
- metadata.issue = fnp.issue
- if fnp.series != "":
- metadata.series = fnp.series
- if fnp.volume != "":
- metadata.volume = fnp.volume
- if fnp.year != "":
- metadata.year = fnp.year
- if fnp.issue_count != "":
- metadata.issue_count = fnp.issue_count
- if parse_scan_info:
- if fnp.remainder != "":
+ metadata.scan_info = p.filename_info["remainder"] or None
+ metadata.format = "FCBD" if p.filename_info["fcbd"] else None
+ if p.filename_info["annual"]:
+ metadata.format = "Annual"
+ else:
+ fnp = filenameparser.FileNameParser()
+ fnp.parse_filename(self.path)
+
+ if fnp.issue:
+ metadata.issue = fnp.issue
+ if fnp.series:
+ metadata.series = fnp.series
+ if fnp.volume:
+ metadata.volume = fnp.volume
+ if fnp.year:
+ metadata.year = fnp.year
+ if fnp.issue_count:
+ metadata.issue_count = fnp.issue_count
+ if fnp.remainder:
metadata.scan_info = fnp.remainder
metadata.is_empty = False
diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py
new file mode 100644
index 0000000..ed3f27a
--- /dev/null
+++ b/comicapi/filenamelexer.py
@@ -0,0 +1,353 @@
+import calendar
+import os
+import unicodedata
+from enum import Enum, auto
+
+
+class ItemType(Enum):
+ Error = auto() # Error occurred; value is text of error
+ EOF = auto()
+ Text = auto() # Text
+ LeftParen = auto() # '(' inside action
+ Number = auto() # Simple number
+ IssueNumber = auto() # Preceded by a # Symbol
+ RightParen = auto() # ')' inside action
+ Space = auto() # Run of spaces separating arguments
+ Dot = auto()
+ LeftBrace = auto()
+ RightBrace = auto()
+ LeftSBrace = auto()
+ RightSBrace = auto()
+ Symbol = auto()
+ Skip = auto() # __ or -- no title, issue or series information beyond
+ Operator = auto()
+ Calendar = auto()
+ InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
+ ArchiveType = auto()
+ Honorific = auto()
+ Keywords = auto()
+ FCBD = auto()
+ ComicType = auto()
+ Publisher = auto()
+ C2C = auto()
+
+
+braces = [
+ ItemType.LeftBrace,
+ ItemType.LeftParen,
+ ItemType.LeftSBrace,
+ ItemType.RightBrace,
+ ItemType.RightParen,
+ ItemType.RightSBrace,
+]
+
+eof = chr(0)
+
+key = {
+ "fcbd": ItemType.FCBD,
+ "freecomicbookday": ItemType.FCBD,
+ "cbr": ItemType.ArchiveType,
+ "cbz": ItemType.ArchiveType,
+ "cbt": ItemType.ArchiveType,
+ "cb7": ItemType.ArchiveType,
+ "rar": ItemType.ArchiveType,
+ "zip": ItemType.ArchiveType,
+ "tar": ItemType.ArchiveType,
+ "7z": ItemType.ArchiveType,
+ "annual": ItemType.ComicType,
+ "book": ItemType.ComicType,
+ "volume": ItemType.InfoSpecifier,
+ "vol.": ItemType.InfoSpecifier,
+ "vol": ItemType.InfoSpecifier,
+ "v": ItemType.InfoSpecifier,
+ "of": ItemType.InfoSpecifier,
+ "dc": ItemType.Publisher,
+ "marvel": ItemType.Publisher,
+ "covers": ItemType.InfoSpecifier,
+ "c2c": ItemType.C2C,
+ "mr": ItemType.Honorific,
+ "ms": ItemType.Honorific,
+ "mrs": ItemType.Honorific,
+ "dr": ItemType.Honorific,
+}
+
+
+class Item:
+ def __init__(self, typ: ItemType, pos: int, val: str):
+ self.typ: ItemType = typ
+ self.pos: int = pos
+ self.val: str = val
+
+ def __repr__(self):
+ return f"{self.val}: index: {self.pos}: {self.typ}"
+
+
+class Lexer:
+ def __init__(self, string):
+ self.input: str = string # The string being scanned
+ self.state = None # The next lexing function to enter
+ self.pos: int = -1 # Current position in the input
+ self.start: int = 0 # Start position of this item
+ self.lastPos: int = 0 # Position of most recent item returned by nextItem
+ self.paren_depth: int = 0 # Nesting depth of ( ) exprs
+ self.brace_depth: int = 0 # Nesting depth of { }
+ self.sbrace_depth: int = 0 # Nesting depth of [ ]
+ self.items = []
+
+ # Next returns the next rune in the input.
+ def get(self) -> str:
+ if int(self.pos) >= len(self.input) - 1:
+ self.pos += 1
+ return eof
+
+ self.pos += 1
+ return self.input[self.pos]
+
+ # Peek returns but does not consume the next rune in the input.
+ def peek(self) -> str:
+ if int(self.pos) >= len(self.input) - 1:
+ return eof
+
+ return self.input[self.pos + 1]
+
+ def backup(self):
+ self.pos -= 1
+
+ # Emit passes an item back to the client.
+ def emit(self, t: ItemType):
+ self.items.append(Item(t, self.start, self.input[self.start : self.pos + 1]))
+ self.start = self.pos + 1
+
+ # Ignore skips over the pending input before this point.
+ def ignore(self):
+ self.start = self.pos
+
+ # Accept consumes the next rune if it's from the valid se:
+ def accept(self, valid: str):
+ if self.get() in valid:
+ return True
+
+ self.backup()
+ return False
+
+ # AcceptRun consumes a run of runes from the valid set.
+ def accept_run(self, valid: str):
+ while self.get() in valid:
+ pass
+
+ self.backup()
+
+ # Errorf returns an error token and terminates the scan by passing
+ # Back a nil pointer that will be the next state, terminating self.nextItem.
+ def errorf(self, message: str):
+ self.items.append(Item(ItemType.Error, self.start, message))
+
+ # NextItem returns the next item from the input.
+ # Called by the parser, not in the lexing goroutine.
+ # def next_item(self) -> Item:
+ # item: Item = self.items.get()
+ # self.lastPos = item.pos
+ # return item
+
+ def scan_number(self):
+ digits = "0123456789"
+
+ self.accept_run(digits)
+ if self.accept("."):
+ if self.accept(digits):
+ self.accept_run(digits)
+ else:
+ self.backup()
+ if self.accept("s"):
+ if not self.accept("t"):
+ self.backup()
+ elif self.accept("nr"):
+ if not self.accept("d"):
+ self.backup()
+ elif self.accept("t"):
+ if not self.accept("h"):
+ self.backup()
+
+ return True
+
+ # Runs the state machine for the lexer.
+ def run(self):
+ self.state = lex_filename
+ while self.state is not None:
+ self.state = self.state(self)
+
+
+# Scans the elements inside action delimiters.
+def lex_filename(lex: Lexer):
+ r = lex.get()
+ if r == eof:
+ if lex.paren_depth != 0:
+ return lex.errorf("unclosed left paren")
+
+ if lex.brace_depth != 0:
+ return lex.errorf("unclosed left paren")
+ lex.emit(ItemType.EOF)
+ return None
+ elif is_space(r):
+ if r == "_" and lex.peek() == "_":
+ lex.get()
+ lex.emit(ItemType.Skip)
+ else:
+ return lex_space
+ elif r == ".":
+ r = lex.peek()
+ if r < "0" or "9" < r:
+ lex.emit(ItemType.Dot)
+ return lex_filename
+
+ lex.backup()
+ return lex_number
+ elif r == "'":
+ r = lex.peek()
+ if r in "0123456789":
+ return lex_number
+ lex.emit(ItemType.Text) # TODO: Change to Text
+ elif "0" <= r <= "9":
+ lex.backup()
+ return lex_number
+ elif r == "#":
+ if "0" <= lex.peek() <= "9":
+ return lex_number
+ lex.emit(ItemType.Symbol)
+ elif is_operator(r):
+ if r == "-" and lex.peek() == "-":
+ lex.get()
+ lex.emit(ItemType.Skip)
+ else:
+ return lex_operator
+ elif is_alpha_numeric(r):
+ lex.backup()
+ return lex_text
+ elif r == "(":
+ lex.emit(ItemType.LeftParen)
+ lex.paren_depth += 1
+ elif r == ")":
+ lex.emit(ItemType.RightParen)
+ lex.paren_depth -= 1
+ if lex.paren_depth < 0:
+ return lex.errorf("unexpected right paren " + r)
+
+ elif r == "{":
+ lex.emit(ItemType.LeftBrace)
+ lex.brace_depth += 1
+ elif r == "}":
+ lex.emit(ItemType.RightBrace)
+ lex.brace_depth -= 1
+ if lex.brace_depth < 0:
+ return lex.errorf("unexpected right brace " + r)
+
+ elif r == "[":
+ lex.emit(ItemType.LeftSBrace)
+ lex.sbrace_depth += 1
+ elif r == "]":
+ lex.emit(ItemType.RightSBrace)
+ lex.sbrace_depth -= 1
+ if lex.sbrace_depth < 0:
+ return lex.errorf("unexpected right brace " + r)
+ elif is_symbol(r):
+ # L.backup()
+ lex.emit(ItemType.Symbol)
+ else:
+ return lex.errorf("unrecognized character in action: " + r)
+
+ return lex_filename
+
+
+def lex_operator(lex: Lexer):
+ lex.accept_run("-|:;")
+ lex.emit(ItemType.Operator)
+ return lex_filename
+
+
+# LexSpace scans a run of space characters.
+# One space has already been seen.
+def lex_space(lex: Lexer):
+ while is_space(lex.peek()):
+ lex.get()
+
+ lex.emit(ItemType.Space)
+ return lex_filename
+
+
+# Lex_text scans an alphanumeric.
+def lex_text(lex: Lexer):
+ while True:
+ r = lex.get()
+ if is_alpha_numeric(r):
+ if r.isnumeric(): # E.g. v1
+ word = lex.input[lex.start : lex.pos]
+ if word.lower() in key and key[word.lower()] == ItemType.InfoSpecifier:
+ lex.backup()
+ lex.emit(key[word.lower()])
+ return lex_filename
+ else:
+ if r == "'" and lex.peek() == "s":
+ lex.get()
+ else:
+ lex.backup()
+ word = lex.input[lex.start : lex.pos + 1]
+ if word.lower() == "vol" and lex.peek() == ".":
+ lex.get()
+ word = lex.input[lex.start : lex.pos + 1]
+
+ if word.lower() in key:
+ lex.emit(key[word.lower()])
+ elif cal(word):
+ lex.emit(ItemType.Calendar)
+ else:
+ lex.emit(ItemType.Text)
+ break
+
+ return lex_filename
+
+
+def cal(value: str):
+ month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()]
+ month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()]
+ day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()]
+ day_name = [i for i, x in enumerate(calendar.day_name) if x == value.title()]
+ return set(month_abbr + month_name + day_abbr + day_name)
+
+
+def lex_number(lex: Lexer):
+ if not lex.scan_number():
+ return lex.errorf("bad number syntax: " + lex.input[lex.start : lex.pos])
+ # Complex number logic removed. Messes with math operations without space
+
+ if lex.input[lex.start] == "#":
+ lex.emit(ItemType.IssueNumber)
+ elif not lex.input[lex.pos].isdigit():
+ # Assume that 80th is just text and not a number
+ lex.emit(ItemType.Text)
+ else:
+ lex.emit(ItemType.Number)
+
+ return lex_filename
+
+
+def is_space(character: str):
+ return character in "_ \t"
+
+
+# IsAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
+def is_alpha_numeric(character: str):
+ return character.isalpha() or character.isnumeric()
+
+
+def is_operator(character: str):
+ return character in "-|:;/\\"
+
+
+def is_symbol(character: str):
+ return unicodedata.category(character)[0] in "PS"
+
+
+def Lex(filename: str):
+ lex = Lexer(string=os.path.basename(filename))
+ lex.run()
+ return lex
diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py
index 9d67231..e4f829d 100644
--- a/comicapi/filenameparser.py
+++ b/comicapi/filenameparser.py
@@ -23,8 +23,17 @@ This should probably be re-written, but, well, it mostly works!
import logging
import os
import re
+from operator import itemgetter
+from typing import TypedDict
from urllib.parse import unquote
+from text2digits import text2digits
+
+from comicapi import filenamelexer, issuestring
+
+t2d = text2digits.Text2Digits(add_ordinal_ending=False)
+t2do = text2digits.Text2Digits(add_ordinal_ending=True)
+
logger = logging.getLogger(__name__)
@@ -68,9 +77,7 @@ class FileNameParser:
if match:
count = match.group()
- count = count.lstrip("0")
-
- return count
+ return count.lstrip("0")
def get_issue_number(self, filename):
"""Returns a tuple of issue number string, and start and end indexes in the filename
@@ -222,7 +229,7 @@ class FileNameParser:
year = ""
# look for four digit number with "(" ")" or "--" around it
- match = re.search(r"(\(\d\d\d\d\))|(--\d\d\d\d--)", filename)
+ match = re.search(r"(\(\d{4}\))|(--\d{4}--)", filename)
if match:
year = match.group()
# remove non-digits
@@ -290,3 +297,814 @@ class FileNameParser:
self.issue = "0"
if self.issue[0] == ".":
self.issue = "0" + self.issue
+
+
+class FilenameInfo(TypedDict, total=False):
+ alternate: str
+ annual: bool
+ archive: str
+ c2c: bool
+ fcbd: bool
+ issue: str
+ issue_count: str
+ publisher: str
+ remainder: str
+ series: str
+ title: str
+ volume: str
+ volume_count: str
+ year: str
+
+
+eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
+
+
+class Parser:
+ """docstring for FilenameParser"""
+
+ def __init__(
+ self,
+ lexer_result: list[filenamelexer.Item],
+ first_is_alt=False,
+ remove_c2c=False,
+ remove_fcbd=False,
+ remove_publisher=False,
+ ):
+ self.state = None
+ self.pos = -1
+
+ self.firstItem = True
+ self.skip = False
+ self.alt = False
+ self.filename_info: FilenameInfo = {"series": ""}
+ self.issue_number_at = None
+ self.in_something = 0 # In some sort of brackets {}[]()
+ self.in_brace = 0 # In {}
+ self.in_s_brace = 0 # In []
+ self.in_paren = 0 # In ()
+ self.year_candidates: list[tuple[bool, filenamelexer.Item]] = []
+ self.series_parts: list[filenamelexer.Item] = []
+ self.title_parts: list[filenamelexer.Item] = []
+ self.used_items: list[filenamelexer.Item] = []
+ self.irrelevant: list[filenamelexer.Item] = []
+ self.operator_rejected: list[filenamelexer.Item] = []
+ self.publisher_removed: list[filenamelexer.Item] = []
+
+ self.first_is_alt = first_is_alt
+ self.remove_c2c = remove_c2c
+ self.remove_fcbd = remove_fcbd
+ self.remove_publisher = remove_publisher
+
+ self.input = lexer_result
+ for i, item in enumerate(self.input):
+ if item.typ == filenamelexer.ItemType.IssueNumber:
+ self.issue_number_at = i
+
+ # Get returns the next Item in the input.
+ def get(self) -> filenamelexer.Item:
+ if int(self.pos) >= len(self.input) - 1:
+ self.pos += 1
+ return eof
+
+ self.pos += 1
+ return self.input[self.pos]
+
+ # Peek returns but does not consume the next Item in the input.
+ def peek(self) -> filenamelexer.Item:
+ if int(self.pos) >= len(self.input) - 1:
+ return eof
+
+ return self.input[self.pos + 1]
+
+ # Peek_back returns but does not step back the previous Item in the input.
+ def peek_back(self) -> filenamelexer.Item:
+ if int(self.pos) == 0:
+ return eof
+
+ return self.input[self.pos - 1]
+
+ # Backup steps back one Item.
+ def backup(self):
+ self.pos -= 1
+
+ def run(self):
+ self.state = parse
+ while self.state is not None:
+ self.state = self.state(self)
+
+
+def parse(p: Parser):
+ item: filenamelexer.Item = p.get()
+
+ # We're done, time to do final processing
+ if item.typ == filenamelexer.ItemType.EOF:
+ return parse_finish
+
+ # Need to figure out if this is the issue number
+ if item.typ == filenamelexer.ItemType.Number:
+ likely_year = False
+ if p.firstItem and p.first_is_alt:
+ # raise Exception("fuck you")
+ p.alt = True
+ return parse_issue_number
+
+ # The issue number should hopefully not be in parentheses
+ if p.in_something == 0:
+ # Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
+ if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
+ # It is common to use '89 to refer to an annual reprint from 1989
+ if item.val[0] != "'":
+ # Issue number is less than 4 digits. very few series go above 999
+ if len(item.val.lstrip("0")) < 4:
+ # An issue number starting with # Was not found and no previous number was found
+ if p.issue_number_at is None:
+ # Series has already been started/parsed, filters out leading alternate numbers leading alternate number
+ if len(p.series_parts) > 0:
+ # Unset first item
+ if p.firstItem:
+ p.firstItem = False
+ return parse_issue_number
+ else:
+ p.operator_rejected.append(item)
+ # operator rejected used later to add back to the series/title
+
+ # It is more likely to be a year if it is inside parentheses.
+ if p.in_something > 0:
+ likely_year = True
+
+ # If numbers are directly followed by text it most likely isn't a year e.g. 2048px
+ if p.peek().typ == filenamelexer.ItemType.Text:
+ likely_year = False
+
+ # Is either a full year '2001' or a short year "'89"
+ if len(item.val) == 4 or item.val[0] == "'":
+ if p.in_something == 0:
+ # Append to series in case it is a part of the title, but only if were not inside parenthesis
+ p.series_parts.append(item)
+
+ # Look for a full date as in 2022-04-22
+ if p.peek().typ in [
+ filenamelexer.ItemType.Symbol,
+ filenamelexer.ItemType.Operator,
+ filenamelexer.ItemType.Dot,
+ ]:
+ op = [p.get()]
+ if p.peek().typ == filenamelexer.ItemType.Number:
+ month = p.get()
+ if p.peek().typ in [
+ filenamelexer.ItemType.Symbol,
+ filenamelexer.ItemType.Operator,
+ filenamelexer.ItemType.Dot,
+ ]:
+ op.append(p.get())
+ if p.peek().typ == filenamelexer.ItemType.Number:
+ day = p.get()
+ fulldate = [month, day, item]
+ p.used_items.extend(op)
+ p.used_items.extend(fulldate)
+ else:
+ p.backup()
+ p.backup()
+ p.backup()
+ # TODO never happens
+ else:
+ p.backup()
+ p.backup()
+ # TODO never happens
+ else:
+ p.backup()
+ # TODO never happens
+
+ p.year_candidates.append((likely_year, item))
+ # Ensures that IG-88 gets added back to the series/title
+ elif (
+ p.in_something == 0
+ and p.peek_back().typ == filenamelexer.ItemType.Operator
+ or p.peek().typ == filenamelexer.ItemType.Operator
+ ):
+ # Were not in something and the next or previous type is an operator, add it to the series
+ p.series_parts.append(item)
+ p.used_items.append(item)
+
+ # Unset first item
+ if p.firstItem:
+ p.firstItem = False
+ p.get()
+ return parse_series
+
+ # Number with a leading hash e.g. #003
+ elif item.typ == filenamelexer.ItemType.IssueNumber:
+ # Unset first item
+ if p.firstItem:
+ p.firstItem = False
+ return parse_issue_number
+
+ # Matches FCBD. Not added to p.used_items so it will show in "remainder"
+ elif item.typ == filenamelexer.ItemType.FCBD:
+ p.filename_info["fcbd"] = True
+
+ # Matches c2c. Not added to p.used_items so it will show in "remainder"
+ elif item.typ == filenamelexer.ItemType.C2C:
+ p.filename_info["c2c"] = True
+
+ # Matches the extension if it is known to be an archive format e.g. cbt,cbz,zip,rar
+ elif item.typ == filenamelexer.ItemType.ArchiveType:
+ p.filename_info["archive"] = item.val.lower()
+ p.used_items.append(item)
+ if p.peek_back().typ == filenamelexer.ItemType.Dot:
+ p.used_items.append(p.peek_back())
+
+ # Allows removing DC from 'Wonder Woman 49 DC Sep-Oct 1951' dependent on publisher being in a static list in the lexer
+ elif item.typ == filenamelexer.ItemType.Publisher:
+ p.filename_info["publisher"] = item.val
+ p.used_items.append(item)
+ if p.firstItem:
+ p.firstItem = False
+ if p.in_something == 0:
+ return parse_series
+ p.publisher_removed.append(item)
+ if p.in_something == 0:
+ return parse_series
+
+ # Attempts to identify the type e.g. annual
+ elif item.typ == filenamelexer.ItemType.ComicType:
+ series_append = True
+
+ if p.peek().typ == filenamelexer.ItemType.Space:
+ p.get()
+
+ if p.series_parts and "free comic book" in (" ".join([x.val for x in p.series_parts]) + " " + item.val).lower():
+ p.filename_info["fcbd"] = True
+ series_append = True
+ # If the next item is a number it's probably the volume
+ elif p.peek().typ == filenamelexer.ItemType.Number or (
+ p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
+ ):
+ number = p.get()
+ # Mark volume info. Text will be added to the title/series later
+ if item.val.lower() in ["book", "tpb"]:
+ p.title_parts.extend([item, number])
+ p.filename_info["volume"] = t2do.convert(number.val)
+ p.filename_info["issue"] = t2do.convert(number.val)
+
+ p.used_items.append(item)
+ series_append = False
+
+ # Annuals usually mean the year
+ elif item.val.lower() in ["annual"]:
+ p.filename_info["annual"] = True
+ num = t2d.convert(number.val)
+ if num.isnumeric() and len(num) == 4:
+ p.year_candidates.append((True, number))
+ else:
+ p.backup()
+
+ elif item.val.lower() in ["annual"]:
+ p.filename_info["annual"] = True
+
+ # If we don't have a reason to exclude it from the series go back to parsing the series immediately
+ if series_append:
+ p.series_parts.append(item)
+ p.used_items.append(item)
+ return parse_series
+
+ # We found text, it's probably the title or series
+ elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
+ # Unset first item
+ if p.firstItem:
+ p.firstItem = False
+ if p.in_something == 0:
+ return parse_series
+
+ # Usually the word 'of' eg 1 (of 6)
+ elif item.typ == filenamelexer.ItemType.InfoSpecifier:
+ return parse_info_specifier
+
+ # Operator is a symbol that acts as some sort of separator eg - : ;
+ elif item.typ == filenamelexer.ItemType.Operator:
+ if p.in_something == 0:
+ p.irrelevant.append(item)
+
+ # Filter out Month and day names in filename
+ elif item.typ == filenamelexer.ItemType.Calendar:
+ # Month and day are currently irrelevant if they are inside parentheses e.g. (January 2002)
+ if p.in_something > 0:
+ p.irrelevant.append(item)
+
+ # assume Sep-Oct is not useful in the series/title
+ elif p.peek().typ in [filenamelexer.ItemType.Symbol, filenamelexer.ItemType.Operator]:
+ p.get()
+ if p.peek().typ == filenamelexer.ItemType.Calendar:
+ p.irrelevant.extend([item, p.input[p.pos], p.get()])
+ else:
+ p.backup()
+ return parse_series
+ # This is text that just happens to also be a month/day
+ else:
+ return parse_series
+
+ # Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
+ elif item.typ == filenamelexer.ItemType.Skip:
+ p.skip = True
+
+ # Keeping track of parentheses depth
+ elif item.typ == filenamelexer.ItemType.LeftParen:
+ p.in_paren += 1
+ p.in_something += 1
+ elif item.typ == filenamelexer.ItemType.LeftBrace:
+ p.in_brace += 1
+ p.in_something += 1
+ elif item.typ == filenamelexer.ItemType.LeftSBrace:
+ p.in_s_brace += 1
+ p.in_something += 1
+
+ elif item.typ == filenamelexer.ItemType.RightParen:
+ p.in_paren -= 1
+ p.in_something -= 1
+ elif item.typ == filenamelexer.ItemType.RightBrace:
+ p.in_brace -= 1
+ p.in_something -= 1
+ elif item.typ == filenamelexer.ItemType.RightSBrace:
+ p.in_s_brace -= 1
+ p.in_something -= 1
+
+ # Unset first item
+ if p.firstItem:
+ p.firstItem = False
+
+ # Brace management, I don't like negative numbers
+ if p.in_paren < 0:
+ p.in_something += p.in_paren * -1
+ if p.in_brace < 0:
+ p.in_something += p.in_brace * -1
+ if p.in_s_brace < 0:
+ p.in_something += p.in_s_brace * -1
+
+ return parse
+
+
+# TODO: What about more esoteric numbers???
+def parse_issue_number(p: Parser):
+ item = p.input[p.pos]
+
+ if "issue" in p.filename_info:
+ if "alternate" in p.filename_info:
+ p.filename_info["alternate"] += "," + item.val
+ p.filename_info["alternate"] = item.val
+ else:
+ if p.alt:
+ p.filename_info["alternate"] = item.val
+ else:
+ p.filename_info["issue"] = item.val
+ p.issue_number_at = item.pos
+ p.used_items.append(item)
+ item = p.get()
+ if item.typ == filenamelexer.ItemType.Dot:
+ p.used_items.append(item)
+ item = p.get()
+ if item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Number]:
+ if p.alt:
+ p.filename_info["alternate"] += "." + item.val
+ else:
+ p.filename_info["issue"] += "." + item.val
+ p.used_items.append(item)
+ else:
+ p.backup()
+ p.backup()
+ else:
+ p.backup()
+ p.alt = False
+ return parse
+
+
+def parse_series(p: Parser):
+ item = p.input[p.pos]
+
+ series: list[list[filenamelexer.Item]] = [[]]
+ # Space and Dots are not useful at the beginning of a title/series
+ if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
+ series[0].append(item)
+
+ current_part = 0
+
+ title_parts: list[filenamelexer.Item] = []
+ series_parts: list[filenamelexer.Item] = []
+
+ prev_space = False
+
+ # 'free comic book day' screws things up. #TODO look into removing book from ComicType?
+
+ # We stop parsing the series when certain things come up if nothing was done with them continue where we left off
+ if (
+ p.series_parts
+ and p.series_parts[-1].val.lower() == "book"
+ or p.peek_back().typ == filenamelexer.ItemType.Number
+ or item.typ == filenamelexer.ItemType.Calendar
+ ):
+ series_parts = p.series_parts
+ p.series_parts = []
+ # Skip is only true if we have come across '--' or '__'
+ while not p.skip:
+ item = p.get()
+
+ # Spaces are evil
+ if item.typ == filenamelexer.ItemType.Space:
+ prev_space = True
+ continue
+ if item.typ in [
+ filenamelexer.ItemType.Text,
+ filenamelexer.ItemType.Symbol,
+ filenamelexer.ItemType.Publisher,
+ filenamelexer.ItemType.Honorific,
+ ]:
+ series[current_part].append(item)
+ if item.typ == filenamelexer.ItemType.Honorific and p.peek().typ == filenamelexer.ItemType.Dot:
+ series[current_part].append(p.get())
+ elif item.typ == filenamelexer.ItemType.Publisher:
+ p.filename_info["publisher"] = item.val
+
+ # Handle Volume
+ elif item.typ == filenamelexer.ItemType.InfoSpecifier:
+ # Exception for 'of'
+ if item.val.lower() == "of":
+ series[current_part].append(item)
+ else:
+ # This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
+ while len(series[current_part]) > 0 and series[current_part][-1].typ not in [
+ filenamelexer.ItemType.Text,
+ filenamelexer.ItemType.Symbol,
+ ]:
+ p.irrelevant.append(series[current_part].pop())
+ p.backup()
+ break
+
+ elif item.typ == filenamelexer.ItemType.Operator:
+ peek = p.peek()
+ # ': ' separates the title from the series, only the last section is considered the title
+ if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
+ series.append([]) # Starts a new section
+ series[current_part].append(item)
+ current_part += 1
+ else:
+ # Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
+ if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
+ item.val = " " + item.val + " "
+ series[current_part].append(item)
+
+ # Stop processing series/title if a skip item is found
+ elif item.typ == filenamelexer.ItemType.Skip:
+ p.backup()
+ break
+
+ elif item.typ == filenamelexer.ItemType.Number:
+ if p.peek().typ == filenamelexer.ItemType.Space:
+ p.get()
+ # We have 2 numbers, add the first to the series and then go back to parse
+ if p.peek().typ == filenamelexer.ItemType.Number:
+ series[current_part].append(item)
+ break
+
+ # We have 1 number break here, it's possible it's the issue
+ p.backup() # Whitespace
+ p.backup() # The number
+ break
+ # This is 6 in '1 of 6'
+ if series[current_part] and series[current_part][-1].val.lower() == "of":
+ series[current_part].append(item)
+
+ # We have 1 number break here, it's possible it's the issue
+ else:
+ p.backup() # The number
+ break
+
+ else:
+ # Ensure 'ms. marvel' parses 'ms.' correctly
+ if item.typ == filenamelexer.ItemType.Dot and p.peek_back().typ == filenamelexer.ItemType.Honorific:
+ series[current_part].append(item)
+ # Allows avengers.hulk to parse correctly
+ elif item.typ == filenamelexer.ItemType.Dot and p.peek().typ == filenamelexer.ItemType.Text:
+ # Marks the dot as used so that the remainder is clean
+ p.used_items.append(item)
+ else:
+ p.backup()
+ break
+
+ prev_space = False
+
+ # We have a title separator e.g. ': "
+ if len(series) > 1:
+ title_parts.extend(series.pop())
+ for s in series:
+ if s and s[-1].typ == filenamelexer.ItemType.Operator:
+ s[-1].val += " " # Ensures that when there are multiple separators that they display properly
+ series_parts.extend(s)
+ p.used_items.append(series_parts.pop())
+ else:
+ series_parts.extend(series[0])
+
+ # If the series has already been set assume all of this is the title.
+ if len(p.series_parts) > 0:
+ p.title_parts.extend(series_parts)
+ p.title_parts.extend(title_parts)
+ else:
+ p.series_parts.extend(series_parts)
+ p.title_parts.extend(title_parts)
+ return parse
+
+
+def resolve_year(p: Parser):
+ if len(p.year_candidates) > 0:
+ # Sort by likely_year boolean
+ p.year_candidates.sort(key=itemgetter(0))
+
+ # Take the last year e.g. (2007) 2099 (2008) becomes 2099 2007 2008 and takes 2008
+ selected_year = p.year_candidates.pop()[1]
+
+ p.filename_info["year"] = selected_year.val
+ p.used_items.append(selected_year)
+
+ # (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
+ if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]:
+ vol = p.year_candidates.pop()[1]
+ p.filename_info["volume"] = vol.val
+ p.used_items.append(vol)
+
+ # Remove volume from series and title
+ if selected_year in p.series_parts:
+ p.series_parts.remove(selected_year)
+ if selected_year in p.title_parts:
+ p.title_parts.remove(selected_year)
+
+ # Remove year from series and title
+ if selected_year in p.series_parts:
+ p.series_parts.remove(selected_year)
+ if selected_year in p.title_parts:
+ p.title_parts.remove(selected_year)
+
+
+def parse_finish(p: Parser):
+ resolve_year(p)
+
+ # If we don't have an issue try to find it in the series
+ if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
+ issue_num = p.series_parts.pop()
+
+ # If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
+ if issue_num in [x[1] for x in p.year_candidates]:
+ p.series_parts.append(issue_num)
+ else:
+ # If this number was rejected because of an operator and the operator is still there add it back e.g. 'IG-88'
+ if (
+ issue_num in p.operator_rejected
+ and p.series_parts
+ and p.series_parts[-1].typ == filenamelexer.ItemType.Operator
+ ):
+ p.series_parts.append(issue_num)
+ # We have no reason to not use this number as the issue number. Specifically happens when parsing 'X-Men-V1-067.cbr'
+ else:
+ p.filename_info["issue"] = issue_num.val
+ p.used_items.append(issue_num)
+ p.issue_number_at = issue_num.pos
+
+ # Remove publishers, currently only marvel and dc are defined,
+ # this is an option specifically because this can drastically screw up parsing
+ if p.remove_publisher:
+ for item in p.publisher_removed:
+ if item in p.series_parts:
+ p.series_parts.remove(item)
+ if item in p.title_parts:
+ p.title_parts.remove(item)
+
+ p.filename_info["series"] = join_title(p.series_parts)
+ p.used_items.extend(p.series_parts)
+
+ p.filename_info["title"] = join_title(p.title_parts)
+ p.used_items.extend(p.title_parts)
+
+ if "issue" in p.filename_info:
+ p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
+
+ if "volume" in p.filename_info:
+ p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
+
+ if "issue" not in p.filename_info:
+ # We have an alternate move it to the issue
+ if "alternate" in p.filename_info:
+ p.filename_info["issue"] = p.filename_info["alternate"]
+ p.filename_info["alternate"] = ""
+ else:
+ # TODO: This never happens
+ inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items and x.typ != eof.typ]
+ if len(inp) == 1 and inp[0].typ == filenamelexer.ItemType.Number:
+ p.filename_info["issue"] = inp[0].val
+ p.used_items.append(inp[0])
+
+ remove_items = []
+ if p.remove_fcbd:
+ remove_items.append(filenamelexer.ItemType.FCBD)
+ if p.remove_c2c:
+ remove_items.append(filenamelexer.ItemType.C2C)
+
+ p.irrelevant.extend([x for x in p.input if x.typ in remove_items])
+
+ p.filename_info["remainder"] = get_remainder(p)
+
+ # Ensure keys always exist
+ for s in [
+ "alternate",
+ "issue",
+ "archive",
+ "series",
+ "title",
+ "volume",
+ "year",
+ "remainder",
+ "issue_count",
+ "volume_count",
+ "publisher",
+ ]:
+ if s not in p.filename_info:
+ p.filename_info[s] = ""
+ for s in ["fcbd", "c2c", "annual"]:
+ if s not in p.filename_info:
+ p.filename_info[s] = False
+
+
+def get_remainder(p: Parser):
+ remainder = ""
+ rem = []
+
+ # Remove used items and irrelevant items e.g. the series and useless operators
+ inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items]
+ for i, item in enumerate(inp):
+ # No double space or space next to parentheses
+ if item.typ in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Skip]:
+ if (
+ i > 0
+ and inp[i - 1].typ
+ not in [
+ filenamelexer.ItemType.Space,
+ filenamelexer.ItemType.LeftBrace,
+ filenamelexer.ItemType.LeftParen,
+ filenamelexer.ItemType.LeftSBrace,
+ ]
+ and i + 1 < len(inp)
+ and inp[i + 1].typ
+ not in [
+ filenamelexer.ItemType.RightBrace,
+ filenamelexer.ItemType.RightParen,
+ filenamelexer.ItemType.RightSBrace,
+ ]
+ ):
+ remainder += " "
+
+ # Strip off useless opening parenthesis
+ elif (
+ item.typ
+ in [
+ filenamelexer.ItemType.Space,
+ filenamelexer.ItemType.RightBrace,
+ filenamelexer.ItemType.RightParen,
+ filenamelexer.ItemType.RightSBrace,
+ ]
+ and i > 0
+ and inp[i - 1].typ
+ in [
+ filenamelexer.ItemType.LeftBrace,
+ filenamelexer.ItemType.LeftParen,
+ filenamelexer.ItemType.LeftSBrace,
+ ]
+ ):
+ remainder = remainder.rstrip("[{(")
+ continue
+
+ # Add the next item
+ else:
+ rem.append(item)
+ remainder += item.val
+
+ # Remove empty parentheses
+ remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
+ return remainder.strip()
+
+
+def parse_info_specifier(p: Parser):
+ item = p.input[p.pos]
+ index = p.pos
+
+ if p.peek().typ == filenamelexer.ItemType.Space:
+ p.get()
+
+ # Handles 'book 3' and 'book three'
+ if p.peek().typ == filenamelexer.ItemType.Number or (
+ p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
+ ):
+
+ number = p.get()
+ if item.val.lower() in ["volume", "vol", "vol.", "v"]:
+ p.filename_info["volume"] = t2do.convert(number.val)
+ p.used_items.append(item)
+ p.used_items.append(number)
+
+ # 'of' is only special if it is inside a parenthesis.
+ elif item.val.lower() == "of":
+ i = get_number(p, index)
+ if p.in_something > 0:
+ if p.issue_number_at is None:
+ # TODO: Figure out what to do here if it ever happens
+ p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
+ p.used_items.append(item)
+ p.used_items.append(number)
+
+ # This is definitely the issue number
+ elif p.issue_number_at == i.pos:
+ p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
+ p.used_items.append(item)
+ p.used_items.append(number)
+
+ # This is not for the issue number it is not in either the issue or the title, assume it is the volume number and count
+ elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
+ p.filename_info["volume"] = i.val
+ p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
+ p.used_items.append(i)
+ p.used_items.append(item)
+ p.used_items.append(number)
+ else:
+ # TODO: Figure out what to do here if it ever happens
+ pass
+ else:
+ # Lets 'The Wrath of Foobar-Man, Part 1 of 2' parse correctly as the title
+ if i is not None:
+ p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
+
+ if not p.in_something:
+ return parse_series
+ return parse
+
+
+# Gets 03 in '03 of 6'
+def get_number(p: Parser, index: int):
+ # Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
+ rev = p.input[:index]
+ rev.reverse()
+ for i in rev:
+ # We don't care about these types, we are looking to see if there is a number that is possibly different from the issue number for this count
+ if i.typ in [
+ filenamelexer.ItemType.LeftParen,
+ filenamelexer.ItemType.LeftBrace,
+ filenamelexer.ItemType.LeftSBrace,
+ filenamelexer.ItemType.Space,
+ ]:
+ continue
+ if i.typ == filenamelexer.ItemType.Number:
+ # We got our number, time to leave
+ return i
+ # This is not a number and not an ignorable type, give up looking for the number this count belongs to
+
+ return None
+
+
+def join_title(lst: list[filenamelexer.Item]):
+ title = ""
+ for i, item in enumerate(lst):
+ if i + 1 == len(lst) and item.val == ",": # We ignore commas on the end
+ continue
+ title += item.val # Add the next item
+ # No space after operators
+ if item.typ == filenamelexer.ItemType.Operator:
+ continue
+ # No trailing space
+ if i == len(lst) - 1:
+ continue
+ # No space after honorifics with a dot
+ if item.typ == filenamelexer.ItemType.Honorific and lst[i + 1].typ == filenamelexer.ItemType.Dot:
+ continue
+ # No space if the next item is an operator or symbol
+ if lst[i + 1].typ in [
+ filenamelexer.ItemType.Operator,
+ filenamelexer.ItemType.Symbol,
+ ]:
+ continue
+
+ # Add a space
+ title += " "
+
+ return title
+
+
+def Parse(
+ lexer_result: list[filenamelexer.Item],
+ first_is_alt=False,
+ remove_c2c=False,
+ remove_fcbd=False,
+ remove_publisher=False,
+):
+ p = Parser(
+ lexer_result=lexer_result,
+ first_is_alt=first_is_alt,
+ remove_c2c=remove_c2c,
+ remove_fcbd=remove_fcbd,
+ remove_publisher=remove_publisher,
+ )
+ p.run()
+ return p
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 03fc7e9..764cc24 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -32,11 +32,13 @@ logger = logging.getLogger(__name__)
class AutoTagMatchWindow(QtWidgets.QDialog):
volume_id = 0
- def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func):
+ def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func, settings):
super().__init__(parent)
uic.loadUi(ComicTaggerSettings.get_ui_file("matchselectionwindow.ui"), self)
+ self.settings = settings
+
self.current_match_set: Optional[MultipleMatch] = None
self.altCoverWidget = CoverImageWidget(self.altCoverContainer, CoverImageWidget.AltCoverMode)
@@ -221,7 +223,12 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.style)
if md.is_empty:
- md = ca.metadata_from_filename()
+ md = ca.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ self.settings.remove_publisher,
+ )
# now get the particular issue data
cv_md = self.fetch_func(match)
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 43e02ae..f6faf26 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -101,7 +101,7 @@ def display_match_set_for_choice(label, match_set: MultipleMatch, opts, settings
# save the data!
# we know at this point, that the file is all good to go
ca = match_set.ca
- md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style))
+ md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style), settings)
cv_md = actual_issue_data_fetch(match_set.matches[int(i)], settings, opts)
md.overlay(cv_md)
actual_metadata_save(ca, opts, md)
@@ -164,13 +164,17 @@ def cli_mode(opts, settings):
post_process_matches(match_results, opts, settings)
-def create_local_metadata(opts, ca: ComicArchive, has_desired_tags):
+def create_local_metadata(opts, ca: ComicArchive, has_desired_tags, settings):
md = GenericMetadata()
md.set_default_page_list(ca.get_number_of_pages())
# now, overlay the parsed filename info
if opts.parse_filename:
- md.overlay(ca.metadata_from_filename())
+ md.overlay(
+ ca.metadata_from_filename(
+ settings.complicated_parser, settings.remove_c2c, settings.remove_fcbd, settings.remove_publisher
+ )
+ )
if has_desired_tags:
md = ca.read_metadata(opts.data_style)
@@ -319,7 +323,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
if batch_mode:
print(f"Processing {ca.path}...")
- md = create_local_metadata(opts, ca, has[opts.data_style])
+ md = create_local_metadata(opts, ca, has[opts.data_style], settings)
if md.issue is None or md.issue == "":
if opts.assume_issue_is_one_if_not_set:
md.issue = "1"
@@ -430,7 +434,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
else:
use_tags = False
- md = create_local_metadata(opts, ca, use_tags)
+ md = create_local_metadata(opts, ca, use_tags, settings)
if md.series is None:
logger.error(msg_hdr + "Can't rename without series name")
diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py
index b3c6ed7..954d0b6 100644
--- a/comictaggerlib/issueidentifier.py
+++ b/comictaggerlib/issueidentifier.py
@@ -63,6 +63,7 @@ class IssueIdentifier:
result_multiple_good_matches = 5
def __init__(self, comic_archive: ComicArchive, settings):
+ self.settings = settings
self.comic_archive: ComicArchive = comic_archive
self.image_hasher = 1
@@ -192,7 +193,12 @@ class IssueIdentifier:
internal_metadata = ca.read_cbi()
# try to get some metadata from filename
- md_from_filename = ca.metadata_from_filename()
+ md_from_filename = ca.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ self.settings.remove_publisher,
+ )
# preference order:
# 1. Additional metadata
diff --git a/comictaggerlib/renamewindow.py b/comictaggerlib/renamewindow.py
index a2be891..630a45c 100644
--- a/comictaggerlib/renamewindow.py
+++ b/comictaggerlib/renamewindow.py
@@ -81,7 +81,12 @@ class RenameWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.data_style)
if md.is_empty:
- md = ca.metadata_from_filename(self.settings.parse_scan_info)
+ md = ca.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ self.settings.remove_publisher,
+ )
self.renamer.set_metadata(md)
self.renamer.move = self.settings.rename_move_dir
diff --git a/comictaggerlib/settings.py b/comictaggerlib/settings.py
index 584e954..1222a3e 100644
--- a/comictaggerlib/settings.py
+++ b/comictaggerlib/settings.py
@@ -88,7 +88,10 @@ class ComicTaggerSettings:
self.ask_about_usage_stats = True
# filename parsing settings
- self.parse_scan_info = True
+ self.complicated_parser = False
+ self.remove_c2c = False
+ self.remove_fcbd = False
+ self.remove_publisher = False
# Comic Vine settings
self.use_series_start_as_volume = False
@@ -161,7 +164,10 @@ class ComicTaggerSettings:
self.ask_about_usage_stats = True
# filename parsing settings
- self.parse_scan_info = True
+ self.complicated_parser = False
+ self.remove_c2c = False
+ self.remove_fcbd = False
+ self.remove_publisher = False
# Comic Vine settings
self.use_series_start_as_volume = False
@@ -287,8 +293,14 @@ class ComicTaggerSettings:
if self.config.has_option("identifier", "id_publisher_filter"):
self.id_publisher_filter = self.config.get("identifier", "id_publisher_filter")
- if self.config.has_option("filenameparser", "parse_scan_info"):
- self.parse_scan_info = self.config.getboolean("filenameparser", "parse_scan_info")
+ if self.config.has_option("filenameparser", "complicated_parser"):
+ self.complicated_parser = self.config.getboolean("filenameparser", "complicated_parser")
+ if self.config.has_option("filenameparser", "remove_c2c"):
+ self.remove_c2c = self.config.getboolean("filenameparser", "remove_c2c")
+ if self.config.has_option("filenameparser", "remove_fcbd"):
+ self.remove_fcbd = self.config.getboolean("filenameparser", "remove_fcbd")
+ if self.config.has_option("filenameparser", "remove_publisher"):
+ self.remove_publisher = self.config.getboolean("filenameparser", "remove_publisher")
if self.config.has_option("dialogflags", "ask_about_cbi_in_rar"):
self.ask_about_cbi_in_rar = self.config.getboolean("dialogflags", "ask_about_cbi_in_rar")
@@ -419,7 +431,10 @@ class ComicTaggerSettings:
if not self.config.has_section("filenameparser"):
self.config.add_section("filenameparser")
- self.config.set("filenameparser", "parse_scan_info", self.parse_scan_info)
+ self.config.set("filenameparser", "complicated_parser", self.complicated_parser)
+ self.config.set("filenameparser", "remove_c2c", self.remove_c2c)
+ self.config.set("filenameparser", "remove_fcbd", self.remove_fcbd)
+ self.config.set("filenameparser", "remove_publisher", self.remove_publisher)
if not self.config.has_section("comicvine"):
self.config.add_section("comicvine")
diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py
index 409953a..be2ce3c 100644
--- a/comictaggerlib/settingswindow.py
+++ b/comictaggerlib/settingswindow.py
@@ -182,6 +182,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.cbxMoveFiles.clicked.connect(self.rename_test)
self.cbxRenameStrict.clicked.connect(self.rename_test)
self.leDirectory.textEdited.connect(self.rename_test)
+ self.cbxComplicatedParser.clicked.connect(self.switch_parser)
def rename_test(self):
self.rename__test(self.leRenameTemplate.text())
@@ -199,6 +200,13 @@ class SettingsWindow(QtWidgets.QDialog):
self.rename_error = e
self.lblRenameTest.setText(str(e))
+ def switch_parser(self):
+ complicated = self.cbxComplicatedParser.isChecked()
+
+ self.cbxRemoveC2C.setEnabled(complicated)
+ self.cbxRemoveFCBD.setEnabled(complicated)
+ self.cbxRemovePublisher.setEnabled(complicated)
+
def settings_to_form(self):
# Copy values from settings to form
self.leRarExePath.setText(self.settings.rar_exe_path)
@@ -208,8 +216,11 @@ class SettingsWindow(QtWidgets.QDialog):
if self.settings.check_for_new_version:
self.cbxCheckForNewVersion.setCheckState(QtCore.Qt.CheckState.Checked)
- if self.settings.parse_scan_info:
- self.cbxParseScanInfo.setCheckState(QtCore.Qt.CheckState.Checked)
+ self.cbxComplicatedParser.setChecked(self.settings.complicated_parser)
+ self.cbxRemoveC2C.setChecked(self.settings.remove_c2c)
+ self.cbxRemoveFCBD.setChecked(self.settings.remove_fcbd)
+ self.cbxRemovePublisher.setChecked(self.settings.remove_publisher)
+ self.switch_parser()
if self.settings.use_series_start_as_volume:
self.cbxUseSeriesStartAsVolume.setCheckState(QtCore.Qt.CheckState.Checked)
@@ -291,7 +302,10 @@ class SettingsWindow(QtWidgets.QDialog):
self.settings.id_length_delta_thresh = int(self.leNameLengthDeltaThresh.text())
self.settings.id_publisher_filter = str(self.tePublisherFilter.toPlainText())
- self.settings.parse_scan_info = self.cbxParseScanInfo.isChecked()
+ self.settings.complicated_parser = self.cbxComplicatedParser.isChecked()
+ self.settings.remove_c2c = self.cbxRemoveC2C.isChecked()
+ self.settings.remove_fcbd = self.cbxRemoveFCBD.isChecked()
+ self.settings.remove_publisher = self.cbxRemovePublisher.isChecked()
self.settings.use_series_start_as_volume = self.cbxUseSeriesStartAsVolume.isChecked()
self.settings.clear_form_before_populating_from_cv = self.cbxClearFormBeforePopulating.isChecked()
diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py
index 8e6f5b6..f7de3f3 100644
--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@@ -557,7 +557,12 @@ Please choose options below, and select OK.
def actual_load_current_archive(self):
if self.metadata.is_empty:
- self.metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
+ self.metadata = self.comic_archive.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ remove_publisher=self.settings.remove_publisher,
+ )
if len(self.metadata.pages) == 0:
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
@@ -928,7 +933,12 @@ Please choose options below, and select OK.
if self.comic_archive is not None:
# copy the form onto metadata object
self.form_to_metadata()
- new_metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
+ new_metadata = self.comic_archive.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ remove_publisher=self.settings.remove_publisher,
+ )
if new_metadata is not None:
self.metadata.overlay(new_metadata)
self.metadata_to_form()
@@ -1654,7 +1664,12 @@ Please choose options below, and select OK.
# read in metadata, and parse file name if not there
md = ca.read_metadata(self.save_data_style)
if md.is_empty:
- md = ca.metadata_from_filename(self.settings.parse_scan_info)
+ md = ca.metadata_from_filename(
+ self.settings.complicated_parser,
+ self.settings.remove_c2c,
+ self.settings.remove_fcbd,
+ remove_publisher=self.settings.remove_publisher,
+ )
if dlg.ignore_leading_digits_in_filename and md.series is not None:
# remove all leading numbers
md.series = re.sub(r"([\d.]*)(.*)", "\\2", md.series)
@@ -1846,7 +1861,9 @@ Please choose options below, and select OK to Auto-Tag.
match_results.multiple_matches.extend(match_results.low_confidence_matches)
if reply == QtWidgets.QMessageBox.StandardButton.Yes:
- matchdlg = AutoTagMatchWindow(self, match_results.multiple_matches, style, self.actual_issue_data_fetch)
+ matchdlg = AutoTagMatchWindow(
+ self, match_results.multiple_matches, style, self.actual_issue_data_fetch, self.settings
+ )
matchdlg.setModal(True)
matchdlg.exec()
self.fileSelectionList.update_selected_rows()
diff --git a/comictaggerlib/ui/settingswindow.ui b/comictaggerlib/ui/settingswindow.ui
index b8c97bc..bc8348c 100644
--- a/comictaggerlib/ui/settingswindow.ui
+++ b/comictaggerlib/ui/settingswindow.ui
@@ -229,19 +229,55 @@
Filename Parser
-
-
-
- 30
- 30
- 421
- 25
-
-
-
- Parse Scan Info From Filename (Experimental)
-
-
+
+ -
+
+
+
-
+
+
+ Use "Complicated" Parser
+
+
+
+ -
+
+
+ Remove 'C2C' from Scan Info
+
+
+
+ -
+
+
+ Remove 'FCBD' from Scan Info
+
+
+
+ -
+
+
+ Remove Publisher from filename
+
+
+
+
+
+
+ -
+
+
+ Qt::Vertical
+
+
+
+ 20
+ 40
+
+
+
+
+
diff --git a/requirements.txt b/requirements.txt
index c6e833d..5f177dd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ requests==2.*
pathvalidate
pycountry
py7zr
+text2digits
\ No newline at end of file
diff --git a/tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz b/tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz
index 035a7ec..bc281e8 100644
Binary files a/tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz and b/tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz differ
diff --git a/tests/filenames.py b/tests/filenames.py
index a535afe..89ea531 100644
--- a/tests/filenames.py
+++ b/tests/filenames.py
@@ -1,35 +1,122 @@
-import pytest
-
fnames = [
(
- "Monster_Island_v1_2__repaired__c2c.cbz",
- "stuff",
+ "batman 3 title (DC).cbz",
+ "honorific and publisher in series",
+ {
+ "issue": "3",
+ "series": "batman",
+ "title": "title",
+ "publisher": "DC",
+ "volume": "",
+ "year": "",
+ "remainder": "",
+ "issue_count": "",
+ "alternate": "",
+ },
+ ),
+ (
+ "batman 3 title DC.cbz",
+ "honorific and publisher in series",
+ {
+ "issue": "3",
+ "series": "batman",
+ "title": "title DC",
+ "publisher": "DC",
+ "volume": "",
+ "year": "",
+ "remainder": "",
+ "issue_count": "",
+ "alternate": "",
+ },
+ ),
+ (
+ "ms. Marvel 3.cbz",
+ "honorific and publisher in series",
+ {
+ "issue": "3",
+ "series": "ms. Marvel",
+ "title": "",
+ "publisher": "Marvel",
+ "volume": "",
+ "year": "",
+ "remainder": "",
+ "issue_count": "",
+ "alternate": "",
+ },
+ ),
+ (
+ "january jones 2.cbz",
+ "month in series",
+ {
+ "issue": "2",
+ "series": "january jones",
+ "title": "",
+ "volume": "",
+ "year": "",
+ "remainder": "",
+ "issue_count": "",
+ "alternate": "",
+ },
+ ),
+ (
+ "52.cbz",
+ "issue number only",
+ {
+ "issue": "52",
+ "series": "",
+ "title": "",
+ "volume": "",
+ "year": "",
+ "remainder": "",
+ "issue_count": "",
+ "alternate": "",
+ },
+ ),
+ (
+ "52 Monster_Island_v1_2__repaired__c2c.cbz",
+ "leading alternate",
{
"issue": "2",
"series": "Monster Island",
- "title": "The Wrath of Foobar-Man, Part 1 of 2",
+ "title": "",
"volume": "1",
"year": "",
- "remainder": "repaired c2c",
+ "remainder": "repaired",
"issue_count": "",
+ "alternate": "52",
+ "c2c": True,
+ },
+ ),
+ (
+ "Monster_Island_v1_2__repaired__c2c.cbz",
+ "Example from userguide",
+ {
+ "issue": "2",
+ "series": "Monster Island",
+ "title": "",
+ "volume": "1",
+ "year": "",
+ "remainder": "repaired",
+ "issue_count": "",
+ "c2c": True,
},
),
(
"Monster Island v1 3 (1957) -- The Revenge Of King Klong (noads).cbz",
- "stuff",
+ "Example from userguide",
{
"issue": "3",
"series": "Monster Island",
- "title": "The Wrath of Foobar-Man, Part 1 of 2",
+ "title": "",
"volume": "1",
"year": "1957",
"remainder": "The Revenge Of King Klong (noads)",
"issue_count": "",
},
),
- pytest.param(
+ (
"Foobar-Man Annual 121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
- "stuff",
+ "Example from userguide",
{
"issue": "121",
"series": "Foobar-Man Annual",
@@ -38,12 +125,12 @@ fnames = [
"year": "",
"remainder": "",
"issue_count": "",
+ "annual": True,
},
- marks=pytest.mark.xfail,
),
(
"Plastic Man v1 002 (1942).cbz",
- "stuff",
+ "Example from userguide",
{
"issue": "2",
"series": "Plastic Man",
@@ -56,7 +143,7 @@ fnames = [
),
(
"Blue Beetle 02.cbr",
- "stuff",
+ "Example from userguide",
{
"issue": "2",
"series": "Blue Beetle",
@@ -69,7 +156,7 @@ fnames = [
),
(
"Monster Island vol. 2 #2.cbz",
- "stuff",
+ "Example from userguide",
{
"issue": "2",
"series": "Monster Island",
@@ -82,7 +169,7 @@ fnames = [
),
(
"Crazy Weird Comics 2 (of 2) (1969).rar",
- "stuff",
+ "Example from userguide",
{
"issue": "2",
"series": "Crazy Weird Comics",
@@ -95,7 +182,7 @@ fnames = [
),
(
"Super Strange Yarns (1957) #92 (1969).cbz",
- "stuff",
+ "Example from userguide",
{
"issue": "92",
"series": "Super Strange Yarns",
@@ -108,7 +195,7 @@ fnames = [
),
(
"Action Spy Tales v1965 #3.cbr",
- "stuff",
+ "Example from userguide",
{
"issue": "3",
"series": "Action Spy Tales",
@@ -119,9 +206,9 @@ fnames = [
"issue_count": "",
},
),
- pytest.param(
+ (
" X-Men-V1-067.cbr",
- "hyphen separated with hyphen in series",
+ "hyphen separated with hyphen in series", # only parses corretly because v1 designates the volume
{
"issue": "67",
"series": "X-Men",
@@ -131,7 +218,6 @@ fnames = [
"remainder": "",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
(
"Amazing Spider-Man 078.BEY (2022) (Digital) (Zone-Empire).cbr",
@@ -139,15 +225,16 @@ fnames = [
{
"issue": "78.BEY",
"series": "Amazing Spider-Man",
+ "title": "",
"volume": "",
"year": "2022",
"remainder": "(Digital) (Zone-Empire)",
"issue_count": "",
},
),
- pytest.param(
+ (
"Angel Wings 02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
- "title after-issue",
+ "title after issue",
{
"issue": "2",
"series": "Angel Wings",
@@ -157,11 +244,10 @@ fnames = [
"remainder": "(Scanlation) (phillywilly)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
- "title after-#issue",
+ "title after #issue",
{
"issue": "2",
"series": "Angel Wings",
@@ -171,20 +257,19 @@ fnames = [
"remainder": "(Scanlation) (phillywilly)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Aquaman - Green Arrow - Deep Target 01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
"issue count",
{
"issue": "1",
"series": "Aquaman - Green Arrow - Deep Target",
+ "title": "",
"volume": "",
"year": "2021",
"issue_count": "7",
"remainder": "(digital) (Son of Ultron-Empire)",
},
- marks=pytest.mark.xfail,
),
(
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) 001 (2021) (Digital) (BlackManta-Empire).cbz",
@@ -192,37 +277,39 @@ fnames = [
{
"issue": "1",
"series": "Aquaman 80th Anniversary 100-Page Super Spectacular",
+ "title": "",
"volume": "2021",
"year": "2021",
"remainder": "(Digital) (BlackManta-Empire)",
"issue_count": "",
},
),
- pytest.param(
+ (
"Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr",
"FCBD date",
{
"issue": "",
"series": "Avatar - The Last Airbender - The Legend of Korra",
+ "title": "",
"volume": "",
"year": "2021",
- "remainder": "(FCBD) (Digital) (mv-DCP)",
+ "remainder": "(Digital) (mv-DCP)",
"issue_count": "",
+ "fcbd": True,
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
"volume without issue",
{
"issue": "",
"series": "Avengers By Brian Michael Bendis",
+ "title": "",
"volume": "3",
"year": "2013",
"remainder": "(Digital) (F2) (Kileko-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
(
"Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr",
@@ -230,6 +317,7 @@ fnames = [
{
"issue": "",
"series": "Batman '89",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(Webrip) (The Last Kryptonian-DCP)",
@@ -242,6 +330,7 @@ fnames = [
{
"issue": "20",
"series": "Batman - Superman",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(digital) (NeverAngel-Empire)",
@@ -254,6 +343,7 @@ fnames = [
{
"issue": "9",
"series": "Black Widow",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(Digital) (Zone-Empire)",
@@ -266,26 +356,28 @@ fnames = [
{
"issue": "6",
"series": "Blade Runner 2029",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(3 covers) (digital) (Son of Ultron-Empire)",
"issue_count": "",
},
),
- pytest.param(
+ (
"Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr",
"FCBD year and (year)",
{
"issue": "",
"series": "Blade Runner Free Comic Book Day 2021",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(digital-Empire)",
"issue_count": "",
+ "fcbd": True,
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr",
"book",
{
@@ -297,9 +389,21 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
+ "book of eli (2020) (digital) (Son of Ultron-Empire).cbr",
+ "book",
+ {
+ "issue": "",
+ "series": "book of eli",
+ "title": "",
+ "volume": "",
+ "year": "2020",
+ "remainder": "(digital) (Son of Ultron-Empire)",
+ "issue_count": "",
+ },
+ ),
+ (
"Cyberpunk 2077 - You Have My Word 02 (2021) (digital) (Son of Ultron-Empire).cbr",
"title",
{
@@ -311,9 +415,8 @@ fnames = [
"issue_count": "",
"remainder": "(digital) (Son of Ultron-Empire)",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Elephantmen 2259 008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
"volume count",
{
@@ -326,9 +429,8 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
"volume count",
{
@@ -341,20 +443,20 @@ fnames = [
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz",
"'.' in name",
{
"issue": "",
"series": "Free Comic Book Day - Avengers Hulk",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(2048px) (db)",
"issue_count": "",
+ "fcbd": True,
},
- marks=pytest.mark.xfail,
),
(
"Goblin (2021) (digital) (Son of Ultron-Empire).cbr",
@@ -362,37 +464,41 @@ fnames = [
{
"issue": "",
"series": "Goblin",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(digital) (Son of Ultron-Empire)",
"issue_count": "",
},
),
- pytest.param(
+ (
"Marvel Previews 002 (January 2022) (Digital-Empire).cbr",
"(month year)",
{
"issue": "2",
"series": "Marvel Previews",
+ "title": "",
+ "publisher": "Marvel",
"volume": "",
"year": "2022",
"remainder": "(Digital-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Marvel Two In One V1 090 c2c (Comixbear-DCP).cbr",
"volume issue ctc",
{
"issue": "90",
"series": "Marvel Two In One",
+ "title": "",
+ "publisher": "Marvel",
"volume": "1",
"year": "",
- "remainder": "c2c (Comixbear-DCP)",
+ "remainder": "(Comixbear-DCP)",
"issue_count": "",
+ "c2c": True,
},
- marks=pytest.mark.xfail,
),
(
"Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr",
@@ -400,24 +506,27 @@ fnames = [
{
"issue": "90",
"series": "Marvel Two In One",
+ "title": "",
+ "publisher": "Marvel",
"volume": "1",
"year": "",
- "remainder": "c2c (Comixbear-DCP)",
+ "remainder": "(Comixbear-DCP)",
"issue_count": "",
+ "c2c": True,
},
),
- pytest.param(
+ (
"Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz",
"number ends series, no-issue",
{
"issue": "",
"series": "Star Wars - War of the Bounty Hunters - IG-88",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(Digital) (Kileko-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
(
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz",
@@ -425,6 +534,7 @@ fnames = [
{
"issue": "1",
"series": "Star Wars - War of the Bounty Hunters - IG-88",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(Digital) (Kileko-Empire)",
@@ -437,39 +547,41 @@ fnames = [
{
"issue": "58",
"series": "The Defenders",
+ "title": "",
"volume": "1",
"year": "1978",
"remainder": "(digital)",
"issue_count": "",
},
),
- pytest.param(
+ (
"The Defenders v1 Annual 01 (1976) (Digital) (Minutemen-Slayer).cbr",
" v in series",
{
"issue": "1",
"series": "The Defenders Annual",
+ "title": "",
"volume": "1",
"year": "1976",
"remainder": "(Digital) (Minutemen-Slayer)",
"issue_count": "",
+ "annual": True,
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"The Magic Order 2 06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
"ending id",
{
"issue": "6",
"series": "The Magic Order 2",
+ "title": "",
"volume": "",
"year": "2022",
- "remainder": "(Digital) (Zone-Empire)[__913302__]",
+ "remainder": "(Digital) (Zone-Empire)[913302]", # Don't really care about double underscores
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Wonder Woman 001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
"issue separates title",
{
@@ -481,9 +593,8 @@ fnames = [
"remainder": "(digital-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
"issue separates title",
{
@@ -495,46 +606,47 @@ fnames = [
"remainder": "(digital-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Wonder Woman 49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
"date-range, no paren, braces",
{
"issue": "49",
"series": "Wonder Woman",
+ "title": "digital", # Don't have a way to get rid of this
+ "publisher": "DC",
"volume": "",
"year": "1951",
- "remainder": "(Shadowcat-Empire)",
+ "remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
"date-range, no paren, braces",
{
"issue": "49",
"series": "Wonder Woman",
+ "title": "digital", # Don't have a way to get rid of this
+ "publisher": "DC",
"volume": "",
"year": "1951",
- "remainder": "(Shadowcat-Empire)",
+ "remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
- pytest.param(
+ (
"X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz",
"full-date, issue in parenthesis",
{
"issue": "2",
"series": "X-Men",
+ "title": "",
"volume": "",
"year": "2021",
"remainder": "(digital) (Glorith-HD)",
"issue_count": "",
},
- marks=pytest.mark.xfail,
),
]
diff --git a/tests/test_FilenameParser.py b/tests/test_FilenameParser.py
index 80de5e9..2371ee2 100644
--- a/tests/test_FilenameParser.py
+++ b/tests/test_FilenameParser.py
@@ -4,13 +4,39 @@ from filenames import fnames
import comicapi.filenameparser
+@pytest.mark.parametrize("filename,reason,expected", fnames)
+def test_file_name_parser_new(filename, reason, expected):
+ p = comicapi.filenameparser.Parse(
+ comicapi.filenamelexer.Lex(filename).items,
+ first_is_alt=True,
+ remove_c2c=True,
+ remove_fcbd=True,
+ remove_publisher=True,
+ )
+ fp = p.filename_info
+
+ for s in ["archive"]:
+ if s in fp:
+ del fp[s]
+ for s in ["alternate", "publisher", "volume_count"]:
+ if s not in expected:
+ expected[s] = ""
+ for s in ["fcbd", "c2c", "annual"]:
+ if s not in expected:
+ expected[s] = False
+
+ assert fp == expected
+
+
@pytest.mark.parametrize("filename,reason,expected", fnames)
def test_file_name_parser(filename, reason, expected):
p = comicapi.filenameparser.FileNameParser()
p.parse_filename(filename)
fp = p.__dict__
- for s in ["title"]:
+ for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count"]:
if s in expected:
del expected[s]
+ if fp != expected:
+ pytest.xfail("old parser")
assert fp == expected