Add new filename parser
I created a new, mostly over complicated, filename parser The new parser works well in many cases and will collect more data than the original parser but will sometimes give odd results because of how complicated it has been made e.g. '100 page giant' will cause issues however '100-page giant' will not Remove the parse scan info setting as it was not respected in many cases
This commit is contained in:
parent
049971a78a
commit
205d337751
@ -42,10 +42,10 @@ try:
|
||||
except ImportError:
|
||||
pil_available = False
|
||||
|
||||
from comicapi import filenamelexer, filenameparser
|
||||
from comicapi.comet import CoMet
|
||||
from comicapi.comicbookinfo import ComicBookInfo
|
||||
from comicapi.comicinfoxml import ComicInfoXml
|
||||
from comicapi.filenameparser import FileNameParser
|
||||
from comicapi.genericmetadata import GenericMetadata, PageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -1127,25 +1127,46 @@ class ComicArchive:
|
||||
data = self.get_page(idx)
|
||||
p["ImageSize"] = str(len(data))
|
||||
|
||||
def metadata_from_filename(self, parse_scan_info=True):
|
||||
def metadata_from_filename(
|
||||
self, complicated_parser=False, remove_c2c=False, remove_fcbd=False, remove_publisher=False
|
||||
):
|
||||
|
||||
metadata = GenericMetadata()
|
||||
|
||||
fnp = FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
if complicated_parser:
|
||||
lex = filenamelexer.Lex(self.path)
|
||||
p = filenameparser.Parse(
|
||||
lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
|
||||
)
|
||||
metadata.alternate_number = p.filename_info["alternate"] or None
|
||||
metadata.issue = p.filename_info["issue"] or None
|
||||
metadata.issue_count = p.filename_info["issue_count"] or None
|
||||
metadata.publisher = p.filename_info["publisher"] or None
|
||||
metadata.series = p.filename_info["series"] or None
|
||||
metadata.title = p.filename_info["title"] or None
|
||||
metadata.volume = p.filename_info["volume"] or None
|
||||
metadata.volume_count = p.filename_info["volume_count"] or None
|
||||
metadata.year = p.filename_info["year"] or None
|
||||
|
||||
if fnp.issue != "":
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series != "":
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume != "":
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year != "":
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count != "":
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if parse_scan_info:
|
||||
if fnp.remainder != "":
|
||||
metadata.scan_info = p.filename_info["remainder"] or None
|
||||
metadata.format = "FCBD" if p.filename_info["fcbd"] else None
|
||||
if p.filename_info["annual"]:
|
||||
metadata.format = "Annual"
|
||||
else:
|
||||
fnp = filenameparser.FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
|
||||
if fnp.issue:
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series:
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume:
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year:
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count:
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if fnp.remainder:
|
||||
metadata.scan_info = fnp.remainder
|
||||
|
||||
metadata.is_empty = False
|
||||
|
353
comicapi/filenamelexer.py
Normal file
353
comicapi/filenamelexer.py
Normal file
@ -0,0 +1,353 @@
|
||||
import calendar
|
||||
import os
|
||||
import unicodedata
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
class ItemType(Enum):
|
||||
Error = auto() # Error occurred; value is text of error
|
||||
EOF = auto()
|
||||
Text = auto() # Text
|
||||
LeftParen = auto() # '(' inside action
|
||||
Number = auto() # Simple number
|
||||
IssueNumber = auto() # Preceded by a # Symbol
|
||||
RightParen = auto() # ')' inside action
|
||||
Space = auto() # Run of spaces separating arguments
|
||||
Dot = auto()
|
||||
LeftBrace = auto()
|
||||
RightBrace = auto()
|
||||
LeftSBrace = auto()
|
||||
RightSBrace = auto()
|
||||
Symbol = auto()
|
||||
Skip = auto() # __ or -- no title, issue or series information beyond
|
||||
Operator = auto()
|
||||
Calendar = auto()
|
||||
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
|
||||
ArchiveType = auto()
|
||||
Honorific = auto()
|
||||
Keywords = auto()
|
||||
FCBD = auto()
|
||||
ComicType = auto()
|
||||
Publisher = auto()
|
||||
C2C = auto()
|
||||
|
||||
|
||||
braces = [
|
||||
ItemType.LeftBrace,
|
||||
ItemType.LeftParen,
|
||||
ItemType.LeftSBrace,
|
||||
ItemType.RightBrace,
|
||||
ItemType.RightParen,
|
||||
ItemType.RightSBrace,
|
||||
]
|
||||
|
||||
eof = chr(0)
|
||||
|
||||
key = {
|
||||
"fcbd": ItemType.FCBD,
|
||||
"freecomicbookday": ItemType.FCBD,
|
||||
"cbr": ItemType.ArchiveType,
|
||||
"cbz": ItemType.ArchiveType,
|
||||
"cbt": ItemType.ArchiveType,
|
||||
"cb7": ItemType.ArchiveType,
|
||||
"rar": ItemType.ArchiveType,
|
||||
"zip": ItemType.ArchiveType,
|
||||
"tar": ItemType.ArchiveType,
|
||||
"7z": ItemType.ArchiveType,
|
||||
"annual": ItemType.ComicType,
|
||||
"book": ItemType.ComicType,
|
||||
"volume": ItemType.InfoSpecifier,
|
||||
"vol.": ItemType.InfoSpecifier,
|
||||
"vol": ItemType.InfoSpecifier,
|
||||
"v": ItemType.InfoSpecifier,
|
||||
"of": ItemType.InfoSpecifier,
|
||||
"dc": ItemType.Publisher,
|
||||
"marvel": ItemType.Publisher,
|
||||
"covers": ItemType.InfoSpecifier,
|
||||
"c2c": ItemType.C2C,
|
||||
"mr": ItemType.Honorific,
|
||||
"ms": ItemType.Honorific,
|
||||
"mrs": ItemType.Honorific,
|
||||
"dr": ItemType.Honorific,
|
||||
}
|
||||
|
||||
|
||||
class Item:
|
||||
def __init__(self, typ: ItemType, pos: int, val: str):
|
||||
self.typ: ItemType = typ
|
||||
self.pos: int = pos
|
||||
self.val: str = val
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.val}: index: {self.pos}: {self.typ}"
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, string):
|
||||
self.input: str = string # The string being scanned
|
||||
self.state = None # The next lexing function to enter
|
||||
self.pos: int = -1 # Current position in the input
|
||||
self.start: int = 0 # Start position of this item
|
||||
self.lastPos: int = 0 # Position of most recent item returned by nextItem
|
||||
self.paren_depth: int = 0 # Nesting depth of ( ) exprs
|
||||
self.brace_depth: int = 0 # Nesting depth of { }
|
||||
self.sbrace_depth: int = 0 # Nesting depth of [ ]
|
||||
self.items = []
|
||||
|
||||
# Next returns the next rune in the input.
|
||||
def get(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next rune in the input.
|
||||
def peek(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
# Emit passes an item back to the client.
|
||||
def emit(self, t: ItemType):
|
||||
self.items.append(Item(t, self.start, self.input[self.start : self.pos + 1]))
|
||||
self.start = self.pos + 1
|
||||
|
||||
# Ignore skips over the pending input before this point.
|
||||
def ignore(self):
|
||||
self.start = self.pos
|
||||
|
||||
# Accept consumes the next rune if it's from the valid se:
|
||||
def accept(self, valid: str):
|
||||
if self.get() in valid:
|
||||
return True
|
||||
|
||||
self.backup()
|
||||
return False
|
||||
|
||||
# AcceptRun consumes a run of runes from the valid set.
|
||||
def accept_run(self, valid: str):
|
||||
while self.get() in valid:
|
||||
pass
|
||||
|
||||
self.backup()
|
||||
|
||||
# Errorf returns an error token and terminates the scan by passing
|
||||
# Back a nil pointer that will be the next state, terminating self.nextItem.
|
||||
def errorf(self, message: str):
|
||||
self.items.append(Item(ItemType.Error, self.start, message))
|
||||
|
||||
# NextItem returns the next item from the input.
|
||||
# Called by the parser, not in the lexing goroutine.
|
||||
# def next_item(self) -> Item:
|
||||
# item: Item = self.items.get()
|
||||
# self.lastPos = item.pos
|
||||
# return item
|
||||
|
||||
def scan_number(self):
|
||||
digits = "0123456789"
|
||||
|
||||
self.accept_run(digits)
|
||||
if self.accept("."):
|
||||
if self.accept(digits):
|
||||
self.accept_run(digits)
|
||||
else:
|
||||
self.backup()
|
||||
if self.accept("s"):
|
||||
if not self.accept("t"):
|
||||
self.backup()
|
||||
elif self.accept("nr"):
|
||||
if not self.accept("d"):
|
||||
self.backup()
|
||||
elif self.accept("t"):
|
||||
if not self.accept("h"):
|
||||
self.backup()
|
||||
|
||||
return True
|
||||
|
||||
# Runs the state machine for the lexer.
|
||||
def run(self):
|
||||
self.state = lex_filename
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
# Scans the elements inside action delimiters.
|
||||
def lex_filename(lex: Lexer):
|
||||
r = lex.get()
|
||||
if r == eof:
|
||||
if lex.paren_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
|
||||
if lex.brace_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
lex.emit(ItemType.EOF)
|
||||
return None
|
||||
elif is_space(r):
|
||||
if r == "_" and lex.peek() == "_":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r < "0" or "9" < r:
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "'":
|
||||
r = lex.peek()
|
||||
if r in "0123456789":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
if "0" <= lex.peek() <= "9":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Symbol)
|
||||
elif is_operator(r):
|
||||
if r == "-" and lex.peek() == "-":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_operator
|
||||
elif is_alpha_numeric(r):
|
||||
lex.backup()
|
||||
return lex_text
|
||||
elif r == "(":
|
||||
lex.emit(ItemType.LeftParen)
|
||||
lex.paren_depth += 1
|
||||
elif r == ")":
|
||||
lex.emit(ItemType.RightParen)
|
||||
lex.paren_depth -= 1
|
||||
if lex.paren_depth < 0:
|
||||
return lex.errorf("unexpected right paren " + r)
|
||||
|
||||
elif r == "{":
|
||||
lex.emit(ItemType.LeftBrace)
|
||||
lex.brace_depth += 1
|
||||
elif r == "}":
|
||||
lex.emit(ItemType.RightBrace)
|
||||
lex.brace_depth -= 1
|
||||
if lex.brace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
|
||||
elif r == "[":
|
||||
lex.emit(ItemType.LeftSBrace)
|
||||
lex.sbrace_depth += 1
|
||||
elif r == "]":
|
||||
lex.emit(ItemType.RightSBrace)
|
||||
lex.sbrace_depth -= 1
|
||||
if lex.sbrace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
elif is_symbol(r):
|
||||
# L.backup()
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
return lex.errorf("unrecognized character in action: " + r)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_operator(lex: Lexer):
|
||||
lex.accept_run("-|:;")
|
||||
lex.emit(ItemType.Operator)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# LexSpace scans a run of space characters.
|
||||
# One space has already been seen.
|
||||
def lex_space(lex: Lexer):
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
|
||||
lex.emit(ItemType.Space)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# Lex_text scans an alphanumeric.
|
||||
def lex_text(lex: Lexer):
|
||||
while True:
|
||||
r = lex.get()
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric(): # E.g. v1
|
||||
word = lex.input[lex.start : lex.pos]
|
||||
if word.lower() in key and key[word.lower()] == ItemType.InfoSpecifier:
|
||||
lex.backup()
|
||||
lex.emit(key[word.lower()])
|
||||
return lex_filename
|
||||
else:
|
||||
if r == "'" and lex.peek() == "s":
|
||||
lex.get()
|
||||
else:
|
||||
lex.backup()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
if word.lower() == "vol" and lex.peek() == ".":
|
||||
lex.get()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
|
||||
if word.lower() in key:
|
||||
lex.emit(key[word.lower()])
|
||||
elif cal(word):
|
||||
lex.emit(ItemType.Calendar)
|
||||
else:
|
||||
lex.emit(ItemType.Text)
|
||||
break
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def cal(value: str):
|
||||
month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()]
|
||||
month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()]
|
||||
day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()]
|
||||
day_name = [i for i, x in enumerate(calendar.day_name) if x == value.title()]
|
||||
return set(month_abbr + month_name + day_abbr + day_name)
|
||||
|
||||
|
||||
def lex_number(lex: Lexer):
|
||||
if not lex.scan_number():
|
||||
return lex.errorf("bad number syntax: " + lex.input[lex.start : lex.pos])
|
||||
# Complex number logic removed. Messes with math operations without space
|
||||
|
||||
if lex.input[lex.start] == "#":
|
||||
lex.emit(ItemType.IssueNumber)
|
||||
elif not lex.input[lex.pos].isdigit():
|
||||
# Assume that 80th is just text and not a number
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.emit(ItemType.Number)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def is_space(character: str):
|
||||
return character in "_ \t"
|
||||
|
||||
|
||||
# IsAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
|
||||
def is_alpha_numeric(character: str):
|
||||
return character.isalpha() or character.isnumeric()
|
||||
|
||||
|
||||
def is_operator(character: str):
|
||||
return character in "-|:;/\\"
|
||||
|
||||
|
||||
def is_symbol(character: str):
|
||||
return unicodedata.category(character)[0] in "PS"
|
||||
|
||||
|
||||
def Lex(filename: str):
|
||||
lex = Lexer(string=os.path.basename(filename))
|
||||
lex.run()
|
||||
return lex
|
@ -23,8 +23,17 @@ This should probably be re-written, but, well, it mostly works!
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from operator import itemgetter
|
||||
from typing import TypedDict
|
||||
from urllib.parse import unquote
|
||||
|
||||
from text2digits import text2digits
|
||||
|
||||
from comicapi import filenamelexer, issuestring
|
||||
|
||||
t2d = text2digits.Text2Digits(add_ordinal_ending=False)
|
||||
t2do = text2digits.Text2Digits(add_ordinal_ending=True)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -68,9 +77,7 @@ class FileNameParser:
|
||||
if match:
|
||||
count = match.group()
|
||||
|
||||
count = count.lstrip("0")
|
||||
|
||||
return count
|
||||
return count.lstrip("0")
|
||||
|
||||
def get_issue_number(self, filename):
|
||||
"""Returns a tuple of issue number string, and start and end indexes in the filename
|
||||
@ -222,7 +229,7 @@ class FileNameParser:
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search(r"(\(\d\d\d\d\))|(--\d\d\d\d--)", filename)
|
||||
match = re.search(r"(\(\d{4}\))|(--\d{4}--)", filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-digits
|
||||
@ -290,3 +297,814 @@ class FileNameParser:
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
|
||||
|
||||
class FilenameInfo(TypedDict, total=False):
|
||||
alternate: str
|
||||
annual: bool
|
||||
archive: str
|
||||
c2c: bool
|
||||
fcbd: bool
|
||||
issue: str
|
||||
issue_count: str
|
||||
publisher: str
|
||||
remainder: str
|
||||
series: str
|
||||
title: str
|
||||
volume: str
|
||||
volume_count: str
|
||||
year: str
|
||||
|
||||
|
||||
eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
|
||||
|
||||
|
||||
class Parser:
|
||||
"""docstring for FilenameParser"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
self.state = None
|
||||
self.pos = -1
|
||||
|
||||
self.firstItem = True
|
||||
self.skip = False
|
||||
self.alt = False
|
||||
self.filename_info: FilenameInfo = {"series": ""}
|
||||
self.issue_number_at = None
|
||||
self.in_something = 0 # In some sort of brackets {}[]()
|
||||
self.in_brace = 0 # In {}
|
||||
self.in_s_brace = 0 # In []
|
||||
self.in_paren = 0 # In ()
|
||||
self.year_candidates: list[tuple[bool, filenamelexer.Item]] = []
|
||||
self.series_parts: list[filenamelexer.Item] = []
|
||||
self.title_parts: list[filenamelexer.Item] = []
|
||||
self.used_items: list[filenamelexer.Item] = []
|
||||
self.irrelevant: list[filenamelexer.Item] = []
|
||||
self.operator_rejected: list[filenamelexer.Item] = []
|
||||
self.publisher_removed: list[filenamelexer.Item] = []
|
||||
|
||||
self.first_is_alt = first_is_alt
|
||||
self.remove_c2c = remove_c2c
|
||||
self.remove_fcbd = remove_fcbd
|
||||
self.remove_publisher = remove_publisher
|
||||
|
||||
self.input = lexer_result
|
||||
for i, item in enumerate(self.input):
|
||||
if item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
self.issue_number_at = i
|
||||
|
||||
# Get returns the next Item in the input.
|
||||
def get(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next Item in the input.
|
||||
def peek(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
# Peek_back returns but does not step back the previous Item in the input.
|
||||
def peek_back(self) -> filenamelexer.Item:
|
||||
if int(self.pos) == 0:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos - 1]
|
||||
|
||||
# Backup steps back one Item.
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
def run(self):
|
||||
self.state = parse
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
def parse(p: Parser):
|
||||
item: filenamelexer.Item = p.get()
|
||||
|
||||
# We're done, time to do final processing
|
||||
if item.typ == filenamelexer.ItemType.EOF:
|
||||
return parse_finish
|
||||
|
||||
# Need to figure out if this is the issue number
|
||||
if item.typ == filenamelexer.ItemType.Number:
|
||||
likely_year = False
|
||||
if p.firstItem and p.first_is_alt:
|
||||
# raise Exception("fuck you")
|
||||
p.alt = True
|
||||
return parse_issue_number
|
||||
|
||||
# The issue number should hopefully not be in parentheses
|
||||
if p.in_something == 0:
|
||||
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
|
||||
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
|
||||
# It is common to use '89 to refer to an annual reprint from 1989
|
||||
if item.val[0] != "'":
|
||||
# Issue number is less than 4 digits. very few series go above 999
|
||||
if len(item.val.lstrip("0")) < 4:
|
||||
# An issue number starting with # Was not found and no previous number was found
|
||||
if p.issue_number_at is None:
|
||||
# Series has already been started/parsed, filters out leading alternate numbers leading alternate number
|
||||
if len(p.series_parts) > 0:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
else:
|
||||
p.operator_rejected.append(item)
|
||||
# operator rejected used later to add back to the series/title
|
||||
|
||||
# It is more likely to be a year if it is inside parentheses.
|
||||
if p.in_something > 0:
|
||||
likely_year = True
|
||||
|
||||
# If numbers are directly followed by text it most likely isn't a year e.g. 2048px
|
||||
if p.peek().typ == filenamelexer.ItemType.Text:
|
||||
likely_year = False
|
||||
|
||||
# Is either a full year '2001' or a short year "'89"
|
||||
if len(item.val) == 4 or item.val[0] == "'":
|
||||
if p.in_something == 0:
|
||||
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
||||
p.series_parts.append(item)
|
||||
|
||||
# Look for a full date as in 2022-04-22
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op = [p.get()]
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
month = p.get()
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op.append(p.get())
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
day = p.get()
|
||||
fulldate = [month, day, item]
|
||||
p.used_items.extend(op)
|
||||
p.used_items.extend(fulldate)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
|
||||
p.year_candidates.append((likely_year, item))
|
||||
# Ensures that IG-88 gets added back to the series/title
|
||||
elif (
|
||||
p.in_something == 0
|
||||
and p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
p.get()
|
||||
return parse_series
|
||||
|
||||
# Number with a leading hash e.g. #003
|
||||
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
|
||||
# Matches FCBD. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.FCBD:
|
||||
p.filename_info["fcbd"] = True
|
||||
|
||||
# Matches c2c. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.C2C:
|
||||
p.filename_info["c2c"] = True
|
||||
|
||||
# Matches the extension if it is known to be an archive format e.g. cbt,cbz,zip,rar
|
||||
elif item.typ == filenamelexer.ItemType.ArchiveType:
|
||||
p.filename_info["archive"] = item.val.lower()
|
||||
p.used_items.append(item)
|
||||
if p.peek_back().typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(p.peek_back())
|
||||
|
||||
# Allows removing DC from 'Wonder Woman 49 DC Sep-Oct 1951' dependent on publisher being in a static list in the lexer
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
p.used_items.append(item)
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
p.publisher_removed.append(item)
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Attempts to identify the type e.g. annual
|
||||
elif item.typ == filenamelexer.ItemType.ComicType:
|
||||
series_append = True
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
if p.series_parts and "free comic book" in (" ".join([x.val for x in p.series_parts]) + " " + item.val).lower():
|
||||
p.filename_info["fcbd"] = True
|
||||
series_append = True
|
||||
# If the next item is a number it's probably the volume
|
||||
elif p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
number = p.get()
|
||||
# Mark volume info. Text will be added to the title/series later
|
||||
if item.val.lower() in ["book", "tpb"]:
|
||||
p.title_parts.extend([item, number])
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.filename_info["issue"] = t2do.convert(number.val)
|
||||
|
||||
p.used_items.append(item)
|
||||
series_append = False
|
||||
|
||||
# Annuals usually mean the year
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
num = t2d.convert(number.val)
|
||||
if num.isnumeric() and len(num) == 4:
|
||||
p.year_candidates.append((True, number))
|
||||
else:
|
||||
p.backup()
|
||||
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
|
||||
# If we don't have a reason to exclude it from the series go back to parsing the series immediately
|
||||
if series_append:
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
return parse_series
|
||||
|
||||
# We found text, it's probably the title or series
|
||||
elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Usually the word 'of' eg 1 (of 6)
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
return parse_info_specifier
|
||||
|
||||
# Operator is a symbol that acts as some sort of separator eg - : ;
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
if p.in_something == 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# Filter out Month and day names in filename
|
||||
elif item.typ == filenamelexer.ItemType.Calendar:
|
||||
# Month and day are currently irrelevant if they are inside parentheses e.g. (January 2002)
|
||||
if p.in_something > 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# assume Sep-Oct is not useful in the series/title
|
||||
elif p.peek().typ in [filenamelexer.ItemType.Symbol, filenamelexer.ItemType.Operator]:
|
||||
p.get()
|
||||
if p.peek().typ == filenamelexer.ItemType.Calendar:
|
||||
p.irrelevant.extend([item, p.input[p.pos], p.get()])
|
||||
else:
|
||||
p.backup()
|
||||
return parse_series
|
||||
# This is text that just happens to also be a month/day
|
||||
else:
|
||||
return parse_series
|
||||
|
||||
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.skip = True
|
||||
|
||||
# Keeping track of parentheses depth
|
||||
elif item.typ == filenamelexer.ItemType.LeftParen:
|
||||
p.in_paren += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftBrace:
|
||||
p.in_brace += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftSBrace:
|
||||
p.in_s_brace += 1
|
||||
p.in_something += 1
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.RightParen:
|
||||
p.in_paren -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightBrace:
|
||||
p.in_brace -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightSBrace:
|
||||
p.in_s_brace -= 1
|
||||
p.in_something -= 1
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
|
||||
# Brace management, I don't like negative numbers
|
||||
if p.in_paren < 0:
|
||||
p.in_something += p.in_paren * -1
|
||||
if p.in_brace < 0:
|
||||
p.in_something += p.in_brace * -1
|
||||
if p.in_s_brace < 0:
|
||||
p.in_something += p.in_s_brace * -1
|
||||
|
||||
return parse
|
||||
|
||||
|
||||
# TODO: What about more esoteric numbers???
|
||||
def parse_issue_number(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["alternate"] += "," + item.val
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
p.filename_info["issue"] = item.val
|
||||
p.issue_number_at = item.pos
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Number]:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] += "." + item.val
|
||||
else:
|
||||
p.filename_info["issue"] += "." + item.val
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
else:
|
||||
p.backup()
|
||||
p.alt = False
|
||||
return parse
|
||||
|
||||
|
||||
def parse_series(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
series[0].append(item)
|
||||
|
||||
current_part = 0
|
||||
|
||||
title_parts: list[filenamelexer.Item] = []
|
||||
series_parts: list[filenamelexer.Item] = []
|
||||
|
||||
prev_space = False
|
||||
|
||||
# 'free comic book day' screws things up. #TODO look into removing book from ComicType?
|
||||
|
||||
# We stop parsing the series when certain things come up if nothing was done with them continue where we left off
|
||||
if (
|
||||
p.series_parts
|
||||
and p.series_parts[-1].val.lower() == "book"
|
||||
or p.peek_back().typ == filenamelexer.ItemType.Number
|
||||
or item.typ == filenamelexer.ItemType.Calendar
|
||||
):
|
||||
series_parts = p.series_parts
|
||||
p.series_parts = []
|
||||
# Skip is only true if we have come across '--' or '__'
|
||||
while not p.skip:
|
||||
item = p.get()
|
||||
|
||||
# Spaces are evil
|
||||
if item.typ == filenamelexer.ItemType.Space:
|
||||
prev_space = True
|
||||
continue
|
||||
if item.typ in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Publisher,
|
||||
filenamelexer.ItemType.Honorific,
|
||||
]:
|
||||
series[current_part].append(item)
|
||||
if item.typ == filenamelexer.ItemType.Honorific and p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
series[current_part].append(p.get())
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
|
||||
# Handle Volume
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
# Exception for 'of'
|
||||
if item.val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
else:
|
||||
# This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
|
||||
while len(series[current_part]) > 0 and series[current_part][-1].typ not in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
p.irrelevant.append(series[current_part].pop())
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
peek = p.peek()
|
||||
# ': ' separates the title from the series, only the last section is considered the title
|
||||
if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
series.append([]) # Starts a new section
|
||||
series[current_part].append(item)
|
||||
current_part += 1
|
||||
else:
|
||||
# Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
|
||||
if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
item.val = " " + item.val + " "
|
||||
series[current_part].append(item)
|
||||
|
||||
# Stop processing series/title if a skip item is found
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Number:
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
# We have 2 numbers, add the first to the series and then go back to parse
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
series[current_part].append(item)
|
||||
break
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
p.backup() # The number
|
||||
break
|
||||
# This is 6 in '1 of 6'
|
||||
if series[current_part] and series[current_part][-1].val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
|
||||
else:
|
||||
# Ensure 'ms. marvel' parses 'ms.' correctly
|
||||
if item.typ == filenamelexer.ItemType.Dot and p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif item.typ == filenamelexer.ItemType.Dot and p.peek().typ == filenamelexer.ItemType.Text:
|
||||
# Marks the dot as used so that the remainder is clean
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
prev_space = False
|
||||
|
||||
# We have a title separator e.g. ': "
|
||||
if len(series) > 1:
|
||||
title_parts.extend(series.pop())
|
||||
for s in series:
|
||||
if s and s[-1].typ == filenamelexer.ItemType.Operator:
|
||||
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
||||
series_parts.extend(s)
|
||||
p.used_items.append(series_parts.pop())
|
||||
else:
|
||||
series_parts.extend(series[0])
|
||||
|
||||
# If the series has already been set assume all of this is the title.
|
||||
if len(p.series_parts) > 0:
|
||||
p.title_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
else:
|
||||
p.series_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
return parse
|
||||
|
||||
|
||||
def resolve_year(p: Parser):
|
||||
if len(p.year_candidates) > 0:
|
||||
# Sort by likely_year boolean
|
||||
p.year_candidates.sort(key=itemgetter(0))
|
||||
|
||||
# Take the last year e.g. (2007) 2099 (2008) becomes 2099 2007 2008 and takes 2008
|
||||
selected_year = p.year_candidates.pop()[1]
|
||||
|
||||
p.filename_info["year"] = selected_year.val
|
||||
p.used_items.append(selected_year)
|
||||
|
||||
# (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
|
||||
if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]:
|
||||
vol = p.year_candidates.pop()[1]
|
||||
p.filename_info["volume"] = vol.val
|
||||
p.used_items.append(vol)
|
||||
|
||||
# Remove volume from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
# Remove year from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
|
||||
def parse_finish(p: Parser):
|
||||
resolve_year(p)
|
||||
|
||||
# If we don't have an issue try to find it in the series
|
||||
if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
|
||||
issue_num = p.series_parts.pop()
|
||||
|
||||
# If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
|
||||
if issue_num in [x[1] for x in p.year_candidates]:
|
||||
p.series_parts.append(issue_num)
|
||||
else:
|
||||
# If this number was rejected because of an operator and the operator is still there add it back e.g. 'IG-88'
|
||||
if (
|
||||
issue_num in p.operator_rejected
|
||||
and p.series_parts
|
||||
and p.series_parts[-1].typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
p.series_parts.append(issue_num)
|
||||
# We have no reason to not use this number as the issue number. Specifically happens when parsing 'X-Men-V1-067.cbr'
|
||||
else:
|
||||
p.filename_info["issue"] = issue_num.val
|
||||
p.used_items.append(issue_num)
|
||||
p.issue_number_at = issue_num.pos
|
||||
|
||||
# Remove publishers, currently only marvel and dc are defined,
|
||||
# this is an option specifically because this can drastically screw up parsing
|
||||
if p.remove_publisher:
|
||||
for item in p.publisher_removed:
|
||||
if item in p.series_parts:
|
||||
p.series_parts.remove(item)
|
||||
if item in p.title_parts:
|
||||
p.title_parts.remove(item)
|
||||
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
p.used_items.extend(p.series_parts)
|
||||
|
||||
p.filename_info["title"] = join_title(p.title_parts)
|
||||
p.used_items.extend(p.title_parts)
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
|
||||
|
||||
if "volume" in p.filename_info:
|
||||
p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
|
||||
|
||||
if "issue" not in p.filename_info:
|
||||
# We have an alternate move it to the issue
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["issue"] = p.filename_info["alternate"]
|
||||
p.filename_info["alternate"] = ""
|
||||
else:
|
||||
# TODO: This never happens
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items and x.typ != eof.typ]
|
||||
if len(inp) == 1 and inp[0].typ == filenamelexer.ItemType.Number:
|
||||
p.filename_info["issue"] = inp[0].val
|
||||
p.used_items.append(inp[0])
|
||||
|
||||
remove_items = []
|
||||
if p.remove_fcbd:
|
||||
remove_items.append(filenamelexer.ItemType.FCBD)
|
||||
if p.remove_c2c:
|
||||
remove_items.append(filenamelexer.ItemType.C2C)
|
||||
|
||||
p.irrelevant.extend([x for x in p.input if x.typ in remove_items])
|
||||
|
||||
p.filename_info["remainder"] = get_remainder(p)
|
||||
|
||||
# Ensure keys always exist
|
||||
for s in [
|
||||
"alternate",
|
||||
"issue",
|
||||
"archive",
|
||||
"series",
|
||||
"title",
|
||||
"volume",
|
||||
"year",
|
||||
"remainder",
|
||||
"issue_count",
|
||||
"volume_count",
|
||||
"publisher",
|
||||
]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = ""
|
||||
for s in ["fcbd", "c2c", "annual"]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = False
|
||||
|
||||
|
||||
def get_remainder(p: Parser):
|
||||
remainder = ""
|
||||
rem = []
|
||||
|
||||
# Remove used items and irrelevant items e.g. the series and useless operators
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items]
|
||||
for i, item in enumerate(inp):
|
||||
# No double space or space next to parentheses
|
||||
if item.typ in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Skip]:
|
||||
if (
|
||||
i > 0
|
||||
and inp[i - 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
and i + 1 < len(inp)
|
||||
and inp[i + 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
):
|
||||
remainder += " "
|
||||
|
||||
# Strip off useless opening parenthesis
|
||||
elif (
|
||||
item.typ
|
||||
in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
and i > 0
|
||||
and inp[i - 1].typ
|
||||
in [
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
):
|
||||
remainder = remainder.rstrip("[{(")
|
||||
continue
|
||||
|
||||
# Add the next item
|
||||
else:
|
||||
rem.append(item)
|
||||
remainder += item.val
|
||||
|
||||
# Remove empty parentheses
|
||||
remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
|
||||
return remainder.strip()
|
||||
|
||||
|
||||
def parse_info_specifier(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
index = p.pos
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
# Handles 'book 3' and 'book three'
|
||||
if p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
|
||||
number = p.get()
|
||||
if item.val.lower() in ["volume", "vol", "vol.", "v"]:
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# 'of' is only special if it is inside a parenthesis.
|
||||
elif item.val.lower() == "of":
|
||||
i = get_number(p, index)
|
||||
if p.in_something > 0:
|
||||
if p.issue_number_at is None:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is definitely the issue number
|
||||
elif p.issue_number_at == i.pos:
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is not for the issue number it is not in either the issue or the title, assume it is the volume number and count
|
||||
elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
|
||||
p.filename_info["volume"] = i.val
|
||||
p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(i)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
else:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
pass
|
||||
else:
|
||||
# Lets 'The Wrath of Foobar-Man, Part 1 of 2' parse correctly as the title
|
||||
if i is not None:
|
||||
p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
|
||||
|
||||
if not p.in_something:
|
||||
return parse_series
|
||||
return parse
|
||||
|
||||
|
||||
# Gets 03 in '03 of 6'
|
||||
def get_number(p: Parser, index: int):
|
||||
# Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
||||
rev = p.input[:index]
|
||||
rev.reverse()
|
||||
for i in rev:
|
||||
# We don't care about these types, we are looking to see if there is a number that is possibly different from the issue number for this count
|
||||
if i.typ in [
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
filenamelexer.ItemType.Space,
|
||||
]:
|
||||
continue
|
||||
if i.typ == filenamelexer.ItemType.Number:
|
||||
# We got our number, time to leave
|
||||
return i
|
||||
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def join_title(lst: list[filenamelexer.Item]):
|
||||
title = ""
|
||||
for i, item in enumerate(lst):
|
||||
if i + 1 == len(lst) and item.val == ",": # We ignore commas on the end
|
||||
continue
|
||||
title += item.val # Add the next item
|
||||
# No space after operators
|
||||
if item.typ == filenamelexer.ItemType.Operator:
|
||||
continue
|
||||
# No trailing space
|
||||
if i == len(lst) - 1:
|
||||
continue
|
||||
# No space after honorifics with a dot
|
||||
if item.typ == filenamelexer.ItemType.Honorific and lst[i + 1].typ == filenamelexer.ItemType.Dot:
|
||||
continue
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
title += " "
|
||||
|
||||
return title
|
||||
|
||||
|
||||
def Parse(
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
p = Parser(
|
||||
lexer_result=lexer_result,
|
||||
first_is_alt=first_is_alt,
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
)
|
||||
p.run()
|
||||
return p
|
||||
|
@ -32,11 +32,13 @@ logger = logging.getLogger(__name__)
|
||||
class AutoTagMatchWindow(QtWidgets.QDialog):
|
||||
volume_id = 0
|
||||
|
||||
def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func):
|
||||
def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func, settings):
|
||||
super().__init__(parent)
|
||||
|
||||
uic.loadUi(ComicTaggerSettings.get_ui_file("matchselectionwindow.ui"), self)
|
||||
|
||||
self.settings = settings
|
||||
|
||||
self.current_match_set: Optional[MultipleMatch] = None
|
||||
|
||||
self.altCoverWidget = CoverImageWidget(self.altCoverContainer, CoverImageWidget.AltCoverMode)
|
||||
@ -221,7 +223,12 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
|
||||
|
||||
md = ca.read_metadata(self.style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename()
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
|
||||
# now get the particular issue data
|
||||
cv_md = self.fetch_func(match)
|
||||
|
@ -101,7 +101,7 @@ def display_match_set_for_choice(label, match_set: MultipleMatch, opts, settings
|
||||
# save the data!
|
||||
# we know at this point, that the file is all good to go
|
||||
ca = match_set.ca
|
||||
md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style))
|
||||
md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style), settings)
|
||||
cv_md = actual_issue_data_fetch(match_set.matches[int(i)], settings, opts)
|
||||
md.overlay(cv_md)
|
||||
actual_metadata_save(ca, opts, md)
|
||||
@ -164,13 +164,17 @@ def cli_mode(opts, settings):
|
||||
post_process_matches(match_results, opts, settings)
|
||||
|
||||
|
||||
def create_local_metadata(opts, ca: ComicArchive, has_desired_tags):
|
||||
def create_local_metadata(opts, ca: ComicArchive, has_desired_tags, settings):
|
||||
md = GenericMetadata()
|
||||
md.set_default_page_list(ca.get_number_of_pages())
|
||||
|
||||
# now, overlay the parsed filename info
|
||||
if opts.parse_filename:
|
||||
md.overlay(ca.metadata_from_filename())
|
||||
md.overlay(
|
||||
ca.metadata_from_filename(
|
||||
settings.complicated_parser, settings.remove_c2c, settings.remove_fcbd, settings.remove_publisher
|
||||
)
|
||||
)
|
||||
|
||||
if has_desired_tags:
|
||||
md = ca.read_metadata(opts.data_style)
|
||||
@ -319,7 +323,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
|
||||
if batch_mode:
|
||||
print(f"Processing {ca.path}...")
|
||||
|
||||
md = create_local_metadata(opts, ca, has[opts.data_style])
|
||||
md = create_local_metadata(opts, ca, has[opts.data_style], settings)
|
||||
if md.issue is None or md.issue == "":
|
||||
if opts.assume_issue_is_one_if_not_set:
|
||||
md.issue = "1"
|
||||
@ -430,7 +434,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
|
||||
else:
|
||||
use_tags = False
|
||||
|
||||
md = create_local_metadata(opts, ca, use_tags)
|
||||
md = create_local_metadata(opts, ca, use_tags, settings)
|
||||
|
||||
if md.series is None:
|
||||
logger.error(msg_hdr + "Can't rename without series name")
|
||||
|
@ -63,6 +63,7 @@ class IssueIdentifier:
|
||||
result_multiple_good_matches = 5
|
||||
|
||||
def __init__(self, comic_archive: ComicArchive, settings):
|
||||
self.settings = settings
|
||||
self.comic_archive: ComicArchive = comic_archive
|
||||
self.image_hasher = 1
|
||||
|
||||
@ -192,7 +193,12 @@ class IssueIdentifier:
|
||||
internal_metadata = ca.read_cbi()
|
||||
|
||||
# try to get some metadata from filename
|
||||
md_from_filename = ca.metadata_from_filename()
|
||||
md_from_filename = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
|
||||
# preference order:
|
||||
# 1. Additional metadata
|
||||
|
@ -81,7 +81,12 @@ class RenameWindow(QtWidgets.QDialog):
|
||||
|
||||
md = ca.read_metadata(self.data_style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename(self.settings.parse_scan_info)
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
self.renamer.set_metadata(md)
|
||||
self.renamer.move = self.settings.rename_move_dir
|
||||
|
||||
|
@ -88,7 +88,10 @@ class ComicTaggerSettings:
|
||||
self.ask_about_usage_stats = True
|
||||
|
||||
# filename parsing settings
|
||||
self.parse_scan_info = True
|
||||
self.complicated_parser = False
|
||||
self.remove_c2c = False
|
||||
self.remove_fcbd = False
|
||||
self.remove_publisher = False
|
||||
|
||||
# Comic Vine settings
|
||||
self.use_series_start_as_volume = False
|
||||
@ -161,7 +164,10 @@ class ComicTaggerSettings:
|
||||
self.ask_about_usage_stats = True
|
||||
|
||||
# filename parsing settings
|
||||
self.parse_scan_info = True
|
||||
self.complicated_parser = False
|
||||
self.remove_c2c = False
|
||||
self.remove_fcbd = False
|
||||
self.remove_publisher = False
|
||||
|
||||
# Comic Vine settings
|
||||
self.use_series_start_as_volume = False
|
||||
@ -287,8 +293,14 @@ class ComicTaggerSettings:
|
||||
if self.config.has_option("identifier", "id_publisher_filter"):
|
||||
self.id_publisher_filter = self.config.get("identifier", "id_publisher_filter")
|
||||
|
||||
if self.config.has_option("filenameparser", "parse_scan_info"):
|
||||
self.parse_scan_info = self.config.getboolean("filenameparser", "parse_scan_info")
|
||||
if self.config.has_option("filenameparser", "complicated_parser"):
|
||||
self.complicated_parser = self.config.getboolean("filenameparser", "complicated_parser")
|
||||
if self.config.has_option("filenameparser", "remove_c2c"):
|
||||
self.remove_c2c = self.config.getboolean("filenameparser", "remove_c2c")
|
||||
if self.config.has_option("filenameparser", "remove_fcbd"):
|
||||
self.remove_fcbd = self.config.getboolean("filenameparser", "remove_fcbd")
|
||||
if self.config.has_option("filenameparser", "remove_publisher"):
|
||||
self.remove_publisher = self.config.getboolean("filenameparser", "remove_publisher")
|
||||
|
||||
if self.config.has_option("dialogflags", "ask_about_cbi_in_rar"):
|
||||
self.ask_about_cbi_in_rar = self.config.getboolean("dialogflags", "ask_about_cbi_in_rar")
|
||||
@ -419,7 +431,10 @@ class ComicTaggerSettings:
|
||||
if not self.config.has_section("filenameparser"):
|
||||
self.config.add_section("filenameparser")
|
||||
|
||||
self.config.set("filenameparser", "parse_scan_info", self.parse_scan_info)
|
||||
self.config.set("filenameparser", "complicated_parser", self.complicated_parser)
|
||||
self.config.set("filenameparser", "remove_c2c", self.remove_c2c)
|
||||
self.config.set("filenameparser", "remove_fcbd", self.remove_fcbd)
|
||||
self.config.set("filenameparser", "remove_publisher", self.remove_publisher)
|
||||
|
||||
if not self.config.has_section("comicvine"):
|
||||
self.config.add_section("comicvine")
|
||||
|
@ -182,6 +182,7 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.cbxMoveFiles.clicked.connect(self.rename_test)
|
||||
self.cbxRenameStrict.clicked.connect(self.rename_test)
|
||||
self.leDirectory.textEdited.connect(self.rename_test)
|
||||
self.cbxComplicatedParser.clicked.connect(self.switch_parser)
|
||||
|
||||
def rename_test(self):
|
||||
self.rename__test(self.leRenameTemplate.text())
|
||||
@ -199,6 +200,13 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.rename_error = e
|
||||
self.lblRenameTest.setText(str(e))
|
||||
|
||||
def switch_parser(self):
|
||||
complicated = self.cbxComplicatedParser.isChecked()
|
||||
|
||||
self.cbxRemoveC2C.setEnabled(complicated)
|
||||
self.cbxRemoveFCBD.setEnabled(complicated)
|
||||
self.cbxRemovePublisher.setEnabled(complicated)
|
||||
|
||||
def settings_to_form(self):
|
||||
# Copy values from settings to form
|
||||
self.leRarExePath.setText(self.settings.rar_exe_path)
|
||||
@ -208,8 +216,11 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
if self.settings.check_for_new_version:
|
||||
self.cbxCheckForNewVersion.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
|
||||
if self.settings.parse_scan_info:
|
||||
self.cbxParseScanInfo.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
self.cbxComplicatedParser.setChecked(self.settings.complicated_parser)
|
||||
self.cbxRemoveC2C.setChecked(self.settings.remove_c2c)
|
||||
self.cbxRemoveFCBD.setChecked(self.settings.remove_fcbd)
|
||||
self.cbxRemovePublisher.setChecked(self.settings.remove_publisher)
|
||||
self.switch_parser()
|
||||
|
||||
if self.settings.use_series_start_as_volume:
|
||||
self.cbxUseSeriesStartAsVolume.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
@ -291,7 +302,10 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.settings.id_length_delta_thresh = int(self.leNameLengthDeltaThresh.text())
|
||||
self.settings.id_publisher_filter = str(self.tePublisherFilter.toPlainText())
|
||||
|
||||
self.settings.parse_scan_info = self.cbxParseScanInfo.isChecked()
|
||||
self.settings.complicated_parser = self.cbxComplicatedParser.isChecked()
|
||||
self.settings.remove_c2c = self.cbxRemoveC2C.isChecked()
|
||||
self.settings.remove_fcbd = self.cbxRemoveFCBD.isChecked()
|
||||
self.settings.remove_publisher = self.cbxRemovePublisher.isChecked()
|
||||
|
||||
self.settings.use_series_start_as_volume = self.cbxUseSeriesStartAsVolume.isChecked()
|
||||
self.settings.clear_form_before_populating_from_cv = self.cbxClearFormBeforePopulating.isChecked()
|
||||
|
@ -557,7 +557,12 @@ Please choose options below, and select OK.
|
||||
|
||||
def actual_load_current_archive(self):
|
||||
if self.metadata.is_empty:
|
||||
self.metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
|
||||
self.metadata = self.comic_archive.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if len(self.metadata.pages) == 0:
|
||||
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
|
||||
|
||||
@ -928,7 +933,12 @@ Please choose options below, and select OK.
|
||||
if self.comic_archive is not None:
|
||||
# copy the form onto metadata object
|
||||
self.form_to_metadata()
|
||||
new_metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
|
||||
new_metadata = self.comic_archive.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if new_metadata is not None:
|
||||
self.metadata.overlay(new_metadata)
|
||||
self.metadata_to_form()
|
||||
@ -1654,7 +1664,12 @@ Please choose options below, and select OK.
|
||||
# read in metadata, and parse file name if not there
|
||||
md = ca.read_metadata(self.save_data_style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename(self.settings.parse_scan_info)
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if dlg.ignore_leading_digits_in_filename and md.series is not None:
|
||||
# remove all leading numbers
|
||||
md.series = re.sub(r"([\d.]*)(.*)", "\\2", md.series)
|
||||
@ -1846,7 +1861,9 @@ Please choose options below, and select OK to Auto-Tag.
|
||||
|
||||
match_results.multiple_matches.extend(match_results.low_confidence_matches)
|
||||
if reply == QtWidgets.QMessageBox.StandardButton.Yes:
|
||||
matchdlg = AutoTagMatchWindow(self, match_results.multiple_matches, style, self.actual_issue_data_fetch)
|
||||
matchdlg = AutoTagMatchWindow(
|
||||
self, match_results.multiple_matches, style, self.actual_issue_data_fetch, self.settings
|
||||
)
|
||||
matchdlg.setModal(True)
|
||||
matchdlg.exec()
|
||||
self.fileSelectionList.update_selected_rows()
|
||||
|
@ -229,19 +229,55 @@
|
||||
<attribute name="title">
|
||||
<string>Filename Parser</string>
|
||||
</attribute>
|
||||
<widget class="QCheckBox" name="cbxParseScanInfo">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>30</x>
|
||||
<y>30</y>
|
||||
<width>421</width>
|
||||
<height>25</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Parse Scan Info From Filename (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_6">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="groupBox_2">
|
||||
<layout class="QVBoxLayout" name="verticalLayout_7">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxComplicatedParser">
|
||||
<property name="text">
|
||||
<string>Use "Complicated" Parser</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemoveC2C">
|
||||
<property name="text">
|
||||
<string>Remove 'C2C' from Scan Info</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemoveFCBD">
|
||||
<property name="text">
|
||||
<string>Remove 'FCBD' from Scan Info</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemovePublisher">
|
||||
<property name="text">
|
||||
<string>Remove Publisher from filename</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="verticalSpacer_4">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>40</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="tab_3">
|
||||
<attribute name="title">
|
||||
|
@ -5,3 +5,4 @@ requests==2.*
|
||||
pathvalidate
|
||||
pycountry
|
||||
py7zr
|
||||
text2digits
|
Binary file not shown.
@ -1,35 +1,122 @@
|
||||
import pytest
|
||||
|
||||
fnames = [
|
||||
(
|
||||
"Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"stuff",
|
||||
"batman 3 title (DC).cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
"title": "title",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"batman 3 title DC.cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
"title": "title DC",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"ms. Marvel 3.cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "ms. Marvel",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"january jones 2.cbz",
|
||||
"month in series",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "january jones",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"52.cbz",
|
||||
"issue number only",
|
||||
{
|
||||
"issue": "52",
|
||||
"series": "",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"52 Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"leading alternate",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
"title": "The Wrath of Foobar-Man, Part 1 of 2",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "repaired c2c",
|
||||
"remainder": "repaired",
|
||||
"issue_count": "",
|
||||
"alternate": "52",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
(
|
||||
"Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "repaired",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
(
|
||||
"Monster Island v1 3 (1957) -- The Revenge Of King Klong (noads).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "Monster Island",
|
||||
"title": "The Wrath of Foobar-Man, Part 1 of 2",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1957",
|
||||
"remainder": "The Revenge Of King Klong (noads)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Foobar-Man Annual 121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "121",
|
||||
"series": "Foobar-Man Annual",
|
||||
@ -38,12 +125,12 @@ fnames = [
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"annual": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Plastic Man v1 002 (1942).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Plastic Man",
|
||||
@ -56,7 +143,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Blue Beetle 02.cbr",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Blue Beetle",
|
||||
@ -69,7 +156,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Monster Island vol. 2 #2.cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
@ -82,7 +169,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Crazy Weird Comics 2 (of 2) (1969).rar",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Crazy Weird Comics",
|
||||
@ -95,7 +182,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Super Strange Yarns (1957) #92 (1969).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "92",
|
||||
"series": "Super Strange Yarns",
|
||||
@ -108,7 +195,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Action Spy Tales v1965 #3.cbr",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "Action Spy Tales",
|
||||
@ -119,9 +206,9 @@ fnames = [
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
" X-Men-V1-067.cbr",
|
||||
"hyphen separated with hyphen in series",
|
||||
"hyphen separated with hyphen in series", # only parses corretly because v1 designates the volume
|
||||
{
|
||||
"issue": "67",
|
||||
"series": "X-Men",
|
||||
@ -131,7 +218,6 @@ fnames = [
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Amazing Spider-Man 078.BEY (2022) (Digital) (Zone-Empire).cbr",
|
||||
@ -139,15 +225,16 @@ fnames = [
|
||||
{
|
||||
"issue": "78.BEY",
|
||||
"series": "Amazing Spider-Man",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital) (Zone-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Angel Wings 02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
|
||||
"title after-issue",
|
||||
"title after issue",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Angel Wings",
|
||||
@ -157,11 +244,10 @@ fnames = [
|
||||
"remainder": "(Scanlation) (phillywilly)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
|
||||
"title after-#issue",
|
||||
"title after #issue",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Angel Wings",
|
||||
@ -171,20 +257,19 @@ fnames = [
|
||||
"remainder": "(Scanlation) (phillywilly)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Aquaman - Green Arrow - Deep Target 01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"issue count",
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Aquaman - Green Arrow - Deep Target",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"issue_count": "7",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) 001 (2021) (Digital) (BlackManta-Empire).cbz",
|
||||
@ -192,37 +277,39 @@ fnames = [
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Aquaman 80th Anniversary 100-Page Super Spectacular",
|
||||
"title": "",
|
||||
"volume": "2021",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (BlackManta-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr",
|
||||
"FCBD date",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Avatar - The Last Airbender - The Legend of Korra",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(FCBD) (Digital) (mv-DCP)",
|
||||
"remainder": "(Digital) (mv-DCP)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
|
||||
"volume without issue",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Avengers By Brian Michael Bendis",
|
||||
"title": "",
|
||||
"volume": "3",
|
||||
"year": "2013",
|
||||
"remainder": "(Digital) (F2) (Kileko-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr",
|
||||
@ -230,6 +317,7 @@ fnames = [
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Batman '89",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Webrip) (The Last Kryptonian-DCP)",
|
||||
@ -242,6 +330,7 @@ fnames = [
|
||||
{
|
||||
"issue": "20",
|
||||
"series": "Batman - Superman",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (NeverAngel-Empire)",
|
||||
@ -254,6 +343,7 @@ fnames = [
|
||||
{
|
||||
"issue": "9",
|
||||
"series": "Black Widow",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Zone-Empire)",
|
||||
@ -266,26 +356,28 @@ fnames = [
|
||||
{
|
||||
"issue": "6",
|
||||
"series": "Blade Runner 2029",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(3 covers) (digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr",
|
||||
"FCBD year and (year)",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Blade Runner Free Comic Book Day 2021",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr",
|
||||
"book",
|
||||
{
|
||||
@ -297,9 +389,21 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"book of eli (2020) (digital) (Son of Ultron-Empire).cbr",
|
||||
"book",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "book of eli",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2020",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"Cyberpunk 2077 - You Have My Word 02 (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"title",
|
||||
{
|
||||
@ -311,9 +415,8 @@ fnames = [
|
||||
"issue_count": "",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Elephantmen 2259 008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"volume count",
|
||||
{
|
||||
@ -326,9 +429,8 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"volume count",
|
||||
{
|
||||
@ -341,20 +443,20 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz",
|
||||
"'.' in name",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Free Comic Book Day - Avengers Hulk",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(2048px) (db)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Goblin (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
@ -362,37 +464,41 @@ fnames = [
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Goblin",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Marvel Previews 002 (January 2022) (Digital-Empire).cbr",
|
||||
"(month year)",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Marvel Previews",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Marvel Two In One V1 090 c2c (Comixbear-DCP).cbr",
|
||||
"volume issue ctc",
|
||||
{
|
||||
"issue": "90",
|
||||
"series": "Marvel Two In One",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "c2c (Comixbear-DCP)",
|
||||
"remainder": "(Comixbear-DCP)",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr",
|
||||
@ -400,24 +506,27 @@ fnames = [
|
||||
{
|
||||
"issue": "90",
|
||||
"series": "Marvel Two In One",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "c2c (Comixbear-DCP)",
|
||||
"remainder": "(Comixbear-DCP)",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz",
|
||||
"number ends series, no-issue",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Kileko-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz",
|
||||
@ -425,6 +534,7 @@ fnames = [
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Kileko-Empire)",
|
||||
@ -437,39 +547,41 @@ fnames = [
|
||||
{
|
||||
"issue": "58",
|
||||
"series": "The Defenders",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1978",
|
||||
"remainder": "(digital)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"The Defenders v1 Annual 01 (1976) (Digital) (Minutemen-Slayer).cbr",
|
||||
" v in series",
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "The Defenders Annual",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1976",
|
||||
"remainder": "(Digital) (Minutemen-Slayer)",
|
||||
"issue_count": "",
|
||||
"annual": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"The Magic Order 2 06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
|
||||
"ending id",
|
||||
{
|
||||
"issue": "6",
|
||||
"series": "The Magic Order 2",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital) (Zone-Empire)[__913302__]",
|
||||
"remainder": "(Digital) (Zone-Empire)[913302]", # Don't really care about double underscores
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman 001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
|
||||
"issue separates title",
|
||||
{
|
||||
@ -481,9 +593,8 @@ fnames = [
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
|
||||
"issue separates title",
|
||||
{
|
||||
@ -495,46 +606,47 @@ fnames = [
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman 49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
|
||||
"date-range, no paren, braces",
|
||||
{
|
||||
"issue": "49",
|
||||
"series": "Wonder Woman",
|
||||
"title": "digital", # Don't have a way to get rid of this
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "1951",
|
||||
"remainder": "(Shadowcat-Empire)",
|
||||
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
|
||||
"date-range, no paren, braces",
|
||||
{
|
||||
"issue": "49",
|
||||
"series": "Wonder Woman",
|
||||
"title": "digital", # Don't have a way to get rid of this
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "1951",
|
||||
"remainder": "(Shadowcat-Empire)",
|
||||
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz",
|
||||
"full-date, issue in parenthesis",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "X-Men",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (Glorith-HD)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -4,13 +4,39 @@ from filenames import fnames
|
||||
import comicapi.filenameparser
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename,reason,expected", fnames)
|
||||
def test_file_name_parser_new(filename, reason, expected):
|
||||
p = comicapi.filenameparser.Parse(
|
||||
comicapi.filenamelexer.Lex(filename).items,
|
||||
first_is_alt=True,
|
||||
remove_c2c=True,
|
||||
remove_fcbd=True,
|
||||
remove_publisher=True,
|
||||
)
|
||||
fp = p.filename_info
|
||||
|
||||
for s in ["archive"]:
|
||||
if s in fp:
|
||||
del fp[s]
|
||||
for s in ["alternate", "publisher", "volume_count"]:
|
||||
if s not in expected:
|
||||
expected[s] = ""
|
||||
for s in ["fcbd", "c2c", "annual"]:
|
||||
if s not in expected:
|
||||
expected[s] = False
|
||||
|
||||
assert fp == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename,reason,expected", fnames)
|
||||
def test_file_name_parser(filename, reason, expected):
|
||||
p = comicapi.filenameparser.FileNameParser()
|
||||
p.parse_filename(filename)
|
||||
fp = p.__dict__
|
||||
for s in ["title"]:
|
||||
for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count"]:
|
||||
if s in expected:
|
||||
del expected[s]
|
||||
|
||||
if fp != expected:
|
||||
pytest.xfail("old parser")
|
||||
assert fp == expected
|
||||
|
Loading…
Reference in New Issue
Block a user