Merge branch 'filenameParser' into develop
This commit is contained in:
commit
1bbdebff42
@ -42,10 +42,10 @@ try:
|
||||
except ImportError:
|
||||
pil_available = False
|
||||
|
||||
from comicapi import filenamelexer, filenameparser
|
||||
from comicapi.comet import CoMet
|
||||
from comicapi.comicbookinfo import ComicBookInfo
|
||||
from comicapi.comicinfoxml import ComicInfoXml
|
||||
from comicapi.filenameparser import FileNameParser
|
||||
from comicapi.genericmetadata import GenericMetadata, PageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -1127,25 +1127,46 @@ class ComicArchive:
|
||||
data = self.get_page(idx)
|
||||
p["ImageSize"] = str(len(data))
|
||||
|
||||
def metadata_from_filename(self, parse_scan_info=True):
|
||||
def metadata_from_filename(
|
||||
self, complicated_parser=False, remove_c2c=False, remove_fcbd=False, remove_publisher=False
|
||||
):
|
||||
|
||||
metadata = GenericMetadata()
|
||||
|
||||
fnp = FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
if complicated_parser:
|
||||
lex = filenamelexer.Lex(self.path)
|
||||
p = filenameparser.Parse(
|
||||
lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
|
||||
)
|
||||
metadata.alternate_number = p.filename_info["alternate"] or None
|
||||
metadata.issue = p.filename_info["issue"] or None
|
||||
metadata.issue_count = p.filename_info["issue_count"] or None
|
||||
metadata.publisher = p.filename_info["publisher"] or None
|
||||
metadata.series = p.filename_info["series"] or None
|
||||
metadata.title = p.filename_info["title"] or None
|
||||
metadata.volume = p.filename_info["volume"] or None
|
||||
metadata.volume_count = p.filename_info["volume_count"] or None
|
||||
metadata.year = p.filename_info["year"] or None
|
||||
|
||||
if fnp.issue != "":
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series != "":
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume != "":
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year != "":
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count != "":
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if parse_scan_info:
|
||||
if fnp.remainder != "":
|
||||
metadata.scan_info = p.filename_info["remainder"] or None
|
||||
metadata.format = "FCBD" if p.filename_info["fcbd"] else None
|
||||
if p.filename_info["annual"]:
|
||||
metadata.format = "Annual"
|
||||
else:
|
||||
fnp = filenameparser.FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
|
||||
if fnp.issue:
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series:
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume:
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year:
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count:
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if fnp.remainder:
|
||||
metadata.scan_info = fnp.remainder
|
||||
|
||||
metadata.is_empty = False
|
||||
|
353
comicapi/filenamelexer.py
Normal file
353
comicapi/filenamelexer.py
Normal file
@ -0,0 +1,353 @@
|
||||
import calendar
|
||||
import os
|
||||
import unicodedata
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
class ItemType(Enum):
|
||||
Error = auto() # Error occurred; value is text of error
|
||||
EOF = auto()
|
||||
Text = auto() # Text
|
||||
LeftParen = auto() # '(' inside action
|
||||
Number = auto() # Simple number
|
||||
IssueNumber = auto() # Preceded by a # Symbol
|
||||
RightParen = auto() # ')' inside action
|
||||
Space = auto() # Run of spaces separating arguments
|
||||
Dot = auto()
|
||||
LeftBrace = auto()
|
||||
RightBrace = auto()
|
||||
LeftSBrace = auto()
|
||||
RightSBrace = auto()
|
||||
Symbol = auto()
|
||||
Skip = auto() # __ or -- no title, issue or series information beyond
|
||||
Operator = auto()
|
||||
Calendar = auto()
|
||||
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
|
||||
ArchiveType = auto()
|
||||
Honorific = auto()
|
||||
Keywords = auto()
|
||||
FCBD = auto()
|
||||
ComicType = auto()
|
||||
Publisher = auto()
|
||||
C2C = auto()
|
||||
|
||||
|
||||
braces = [
|
||||
ItemType.LeftBrace,
|
||||
ItemType.LeftParen,
|
||||
ItemType.LeftSBrace,
|
||||
ItemType.RightBrace,
|
||||
ItemType.RightParen,
|
||||
ItemType.RightSBrace,
|
||||
]
|
||||
|
||||
eof = chr(0)
|
||||
|
||||
key = {
|
||||
"fcbd": ItemType.FCBD,
|
||||
"freecomicbookday": ItemType.FCBD,
|
||||
"cbr": ItemType.ArchiveType,
|
||||
"cbz": ItemType.ArchiveType,
|
||||
"cbt": ItemType.ArchiveType,
|
||||
"cb7": ItemType.ArchiveType,
|
||||
"rar": ItemType.ArchiveType,
|
||||
"zip": ItemType.ArchiveType,
|
||||
"tar": ItemType.ArchiveType,
|
||||
"7z": ItemType.ArchiveType,
|
||||
"annual": ItemType.ComicType,
|
||||
"book": ItemType.ComicType,
|
||||
"volume": ItemType.InfoSpecifier,
|
||||
"vol.": ItemType.InfoSpecifier,
|
||||
"vol": ItemType.InfoSpecifier,
|
||||
"v": ItemType.InfoSpecifier,
|
||||
"of": ItemType.InfoSpecifier,
|
||||
"dc": ItemType.Publisher,
|
||||
"marvel": ItemType.Publisher,
|
||||
"covers": ItemType.InfoSpecifier,
|
||||
"c2c": ItemType.C2C,
|
||||
"mr": ItemType.Honorific,
|
||||
"ms": ItemType.Honorific,
|
||||
"mrs": ItemType.Honorific,
|
||||
"dr": ItemType.Honorific,
|
||||
}
|
||||
|
||||
|
||||
class Item:
|
||||
def __init__(self, typ: ItemType, pos: int, val: str):
|
||||
self.typ: ItemType = typ
|
||||
self.pos: int = pos
|
||||
self.val: str = val
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.val}: index: {self.pos}: {self.typ}"
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, string):
|
||||
self.input: str = string # The string being scanned
|
||||
self.state = None # The next lexing function to enter
|
||||
self.pos: int = -1 # Current position in the input
|
||||
self.start: int = 0 # Start position of this item
|
||||
self.lastPos: int = 0 # Position of most recent item returned by nextItem
|
||||
self.paren_depth: int = 0 # Nesting depth of ( ) exprs
|
||||
self.brace_depth: int = 0 # Nesting depth of { }
|
||||
self.sbrace_depth: int = 0 # Nesting depth of [ ]
|
||||
self.items = []
|
||||
|
||||
# Next returns the next rune in the input.
|
||||
def get(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next rune in the input.
|
||||
def peek(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
# Emit passes an item back to the client.
|
||||
def emit(self, t: ItemType):
|
||||
self.items.append(Item(t, self.start, self.input[self.start : self.pos + 1]))
|
||||
self.start = self.pos + 1
|
||||
|
||||
# Ignore skips over the pending input before this point.
|
||||
def ignore(self):
|
||||
self.start = self.pos
|
||||
|
||||
# Accept consumes the next rune if it's from the valid se:
|
||||
def accept(self, valid: str):
|
||||
if self.get() in valid:
|
||||
return True
|
||||
|
||||
self.backup()
|
||||
return False
|
||||
|
||||
# AcceptRun consumes a run of runes from the valid set.
|
||||
def accept_run(self, valid: str):
|
||||
while self.get() in valid:
|
||||
pass
|
||||
|
||||
self.backup()
|
||||
|
||||
# Errorf returns an error token and terminates the scan by passing
|
||||
# Back a nil pointer that will be the next state, terminating self.nextItem.
|
||||
def errorf(self, message: str):
|
||||
self.items.append(Item(ItemType.Error, self.start, message))
|
||||
|
||||
# NextItem returns the next item from the input.
|
||||
# Called by the parser, not in the lexing goroutine.
|
||||
# def next_item(self) -> Item:
|
||||
# item: Item = self.items.get()
|
||||
# self.lastPos = item.pos
|
||||
# return item
|
||||
|
||||
def scan_number(self):
|
||||
digits = "0123456789"
|
||||
|
||||
self.accept_run(digits)
|
||||
if self.accept("."):
|
||||
if self.accept(digits):
|
||||
self.accept_run(digits)
|
||||
else:
|
||||
self.backup()
|
||||
if self.accept("s"):
|
||||
if not self.accept("t"):
|
||||
self.backup()
|
||||
elif self.accept("nr"):
|
||||
if not self.accept("d"):
|
||||
self.backup()
|
||||
elif self.accept("t"):
|
||||
if not self.accept("h"):
|
||||
self.backup()
|
||||
|
||||
return True
|
||||
|
||||
# Runs the state machine for the lexer.
|
||||
def run(self):
|
||||
self.state = lex_filename
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
# Scans the elements inside action delimiters.
|
||||
def lex_filename(lex: Lexer):
|
||||
r = lex.get()
|
||||
if r == eof:
|
||||
if lex.paren_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
|
||||
if lex.brace_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
lex.emit(ItemType.EOF)
|
||||
return None
|
||||
elif is_space(r):
|
||||
if r == "_" and lex.peek() == "_":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r < "0" or "9" < r:
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "'":
|
||||
r = lex.peek()
|
||||
if r in "0123456789":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
if "0" <= lex.peek() <= "9":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Symbol)
|
||||
elif is_operator(r):
|
||||
if r == "-" and lex.peek() == "-":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_operator
|
||||
elif is_alpha_numeric(r):
|
||||
lex.backup()
|
||||
return lex_text
|
||||
elif r == "(":
|
||||
lex.emit(ItemType.LeftParen)
|
||||
lex.paren_depth += 1
|
||||
elif r == ")":
|
||||
lex.emit(ItemType.RightParen)
|
||||
lex.paren_depth -= 1
|
||||
if lex.paren_depth < 0:
|
||||
return lex.errorf("unexpected right paren " + r)
|
||||
|
||||
elif r == "{":
|
||||
lex.emit(ItemType.LeftBrace)
|
||||
lex.brace_depth += 1
|
||||
elif r == "}":
|
||||
lex.emit(ItemType.RightBrace)
|
||||
lex.brace_depth -= 1
|
||||
if lex.brace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
|
||||
elif r == "[":
|
||||
lex.emit(ItemType.LeftSBrace)
|
||||
lex.sbrace_depth += 1
|
||||
elif r == "]":
|
||||
lex.emit(ItemType.RightSBrace)
|
||||
lex.sbrace_depth -= 1
|
||||
if lex.sbrace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
elif is_symbol(r):
|
||||
# L.backup()
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
return lex.errorf("unrecognized character in action: " + r)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_operator(lex: Lexer):
|
||||
lex.accept_run("-|:;")
|
||||
lex.emit(ItemType.Operator)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# LexSpace scans a run of space characters.
|
||||
# One space has already been seen.
|
||||
def lex_space(lex: Lexer):
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
|
||||
lex.emit(ItemType.Space)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# Lex_text scans an alphanumeric.
|
||||
def lex_text(lex: Lexer):
|
||||
while True:
|
||||
r = lex.get()
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric(): # E.g. v1
|
||||
word = lex.input[lex.start : lex.pos]
|
||||
if word.lower() in key and key[word.lower()] == ItemType.InfoSpecifier:
|
||||
lex.backup()
|
||||
lex.emit(key[word.lower()])
|
||||
return lex_filename
|
||||
else:
|
||||
if r == "'" and lex.peek() == "s":
|
||||
lex.get()
|
||||
else:
|
||||
lex.backup()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
if word.lower() == "vol" and lex.peek() == ".":
|
||||
lex.get()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
|
||||
if word.lower() in key:
|
||||
lex.emit(key[word.lower()])
|
||||
elif cal(word):
|
||||
lex.emit(ItemType.Calendar)
|
||||
else:
|
||||
lex.emit(ItemType.Text)
|
||||
break
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def cal(value: str):
|
||||
month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()]
|
||||
month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()]
|
||||
day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()]
|
||||
day_name = [i for i, x in enumerate(calendar.day_name) if x == value.title()]
|
||||
return set(month_abbr + month_name + day_abbr + day_name)
|
||||
|
||||
|
||||
def lex_number(lex: Lexer):
|
||||
if not lex.scan_number():
|
||||
return lex.errorf("bad number syntax: " + lex.input[lex.start : lex.pos])
|
||||
# Complex number logic removed. Messes with math operations without space
|
||||
|
||||
if lex.input[lex.start] == "#":
|
||||
lex.emit(ItemType.IssueNumber)
|
||||
elif not lex.input[lex.pos].isdigit():
|
||||
# Assume that 80th is just text and not a number
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.emit(ItemType.Number)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def is_space(character: str):
|
||||
return character in "_ \t"
|
||||
|
||||
|
||||
# IsAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
|
||||
def is_alpha_numeric(character: str):
|
||||
return character.isalpha() or character.isnumeric()
|
||||
|
||||
|
||||
def is_operator(character: str):
|
||||
return character in "-|:;/\\"
|
||||
|
||||
|
||||
def is_symbol(character: str):
|
||||
return unicodedata.category(character)[0] in "PS"
|
||||
|
||||
|
||||
def Lex(filename: str):
|
||||
lex = Lexer(string=os.path.basename(filename))
|
||||
lex.run()
|
||||
return lex
|
@ -23,8 +23,17 @@ This should probably be re-written, but, well, it mostly works!
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from operator import itemgetter
|
||||
from typing import TypedDict
|
||||
from urllib.parse import unquote
|
||||
|
||||
from text2digits import text2digits
|
||||
|
||||
from comicapi import filenamelexer, issuestring
|
||||
|
||||
t2d = text2digits.Text2Digits(add_ordinal_ending=False)
|
||||
t2do = text2digits.Text2Digits(add_ordinal_ending=True)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -68,9 +77,7 @@ class FileNameParser:
|
||||
if match:
|
||||
count = match.group()
|
||||
|
||||
count = count.lstrip("0")
|
||||
|
||||
return count
|
||||
return count.lstrip("0")
|
||||
|
||||
def get_issue_number(self, filename):
|
||||
"""Returns a tuple of issue number string, and start and end indexes in the filename
|
||||
@ -222,7 +229,7 @@ class FileNameParser:
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search(r"(\(\d\d\d\d\))|(--\d\d\d\d--)", filename)
|
||||
match = re.search(r"(\(\d{4}\))|(--\d{4}--)", filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-digits
|
||||
@ -290,3 +297,814 @@ class FileNameParser:
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
|
||||
|
||||
class FilenameInfo(TypedDict, total=False):
|
||||
alternate: str
|
||||
annual: bool
|
||||
archive: str
|
||||
c2c: bool
|
||||
fcbd: bool
|
||||
issue: str
|
||||
issue_count: str
|
||||
publisher: str
|
||||
remainder: str
|
||||
series: str
|
||||
title: str
|
||||
volume: str
|
||||
volume_count: str
|
||||
year: str
|
||||
|
||||
|
||||
eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
|
||||
|
||||
|
||||
class Parser:
|
||||
"""docstring for FilenameParser"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
self.state = None
|
||||
self.pos = -1
|
||||
|
||||
self.firstItem = True
|
||||
self.skip = False
|
||||
self.alt = False
|
||||
self.filename_info: FilenameInfo = {"series": ""}
|
||||
self.issue_number_at = None
|
||||
self.in_something = 0 # In some sort of brackets {}[]()
|
||||
self.in_brace = 0 # In {}
|
||||
self.in_s_brace = 0 # In []
|
||||
self.in_paren = 0 # In ()
|
||||
self.year_candidates: list[tuple[bool, filenamelexer.Item]] = []
|
||||
self.series_parts: list[filenamelexer.Item] = []
|
||||
self.title_parts: list[filenamelexer.Item] = []
|
||||
self.used_items: list[filenamelexer.Item] = []
|
||||
self.irrelevant: list[filenamelexer.Item] = []
|
||||
self.operator_rejected: list[filenamelexer.Item] = []
|
||||
self.publisher_removed: list[filenamelexer.Item] = []
|
||||
|
||||
self.first_is_alt = first_is_alt
|
||||
self.remove_c2c = remove_c2c
|
||||
self.remove_fcbd = remove_fcbd
|
||||
self.remove_publisher = remove_publisher
|
||||
|
||||
self.input = lexer_result
|
||||
for i, item in enumerate(self.input):
|
||||
if item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
self.issue_number_at = i
|
||||
|
||||
# Get returns the next Item in the input.
|
||||
def get(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next Item in the input.
|
||||
def peek(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
# Peek_back returns but does not step back the previous Item in the input.
|
||||
def peek_back(self) -> filenamelexer.Item:
|
||||
if int(self.pos) == 0:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos - 1]
|
||||
|
||||
# Backup steps back one Item.
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
def run(self):
|
||||
self.state = parse
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
def parse(p: Parser):
|
||||
item: filenamelexer.Item = p.get()
|
||||
|
||||
# We're done, time to do final processing
|
||||
if item.typ == filenamelexer.ItemType.EOF:
|
||||
return parse_finish
|
||||
|
||||
# Need to figure out if this is the issue number
|
||||
if item.typ == filenamelexer.ItemType.Number:
|
||||
likely_year = False
|
||||
if p.firstItem and p.first_is_alt:
|
||||
# raise Exception("fuck you")
|
||||
p.alt = True
|
||||
return parse_issue_number
|
||||
|
||||
# The issue number should hopefully not be in parentheses
|
||||
if p.in_something == 0:
|
||||
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
|
||||
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
|
||||
# It is common to use '89 to refer to an annual reprint from 1989
|
||||
if item.val[0] != "'":
|
||||
# Issue number is less than 4 digits. very few series go above 999
|
||||
if len(item.val.lstrip("0")) < 4:
|
||||
# An issue number starting with # Was not found and no previous number was found
|
||||
if p.issue_number_at is None:
|
||||
# Series has already been started/parsed, filters out leading alternate numbers leading alternate number
|
||||
if len(p.series_parts) > 0:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
else:
|
||||
p.operator_rejected.append(item)
|
||||
# operator rejected used later to add back to the series/title
|
||||
|
||||
# It is more likely to be a year if it is inside parentheses.
|
||||
if p.in_something > 0:
|
||||
likely_year = True
|
||||
|
||||
# If numbers are directly followed by text it most likely isn't a year e.g. 2048px
|
||||
if p.peek().typ == filenamelexer.ItemType.Text:
|
||||
likely_year = False
|
||||
|
||||
# Is either a full year '2001' or a short year "'89"
|
||||
if len(item.val) == 4 or item.val[0] == "'":
|
||||
if p.in_something == 0:
|
||||
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
||||
p.series_parts.append(item)
|
||||
|
||||
# Look for a full date as in 2022-04-22
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op = [p.get()]
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
month = p.get()
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op.append(p.get())
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
day = p.get()
|
||||
fulldate = [month, day, item]
|
||||
p.used_items.extend(op)
|
||||
p.used_items.extend(fulldate)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
|
||||
p.year_candidates.append((likely_year, item))
|
||||
# Ensures that IG-88 gets added back to the series/title
|
||||
elif (
|
||||
p.in_something == 0
|
||||
and p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
p.get()
|
||||
return parse_series
|
||||
|
||||
# Number with a leading hash e.g. #003
|
||||
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
|
||||
# Matches FCBD. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.FCBD:
|
||||
p.filename_info["fcbd"] = True
|
||||
|
||||
# Matches c2c. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.C2C:
|
||||
p.filename_info["c2c"] = True
|
||||
|
||||
# Matches the extension if it is known to be an archive format e.g. cbt,cbz,zip,rar
|
||||
elif item.typ == filenamelexer.ItemType.ArchiveType:
|
||||
p.filename_info["archive"] = item.val.lower()
|
||||
p.used_items.append(item)
|
||||
if p.peek_back().typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(p.peek_back())
|
||||
|
||||
# Allows removing DC from 'Wonder Woman 49 DC Sep-Oct 1951' dependent on publisher being in a static list in the lexer
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
p.used_items.append(item)
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
p.publisher_removed.append(item)
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Attempts to identify the type e.g. annual
|
||||
elif item.typ == filenamelexer.ItemType.ComicType:
|
||||
series_append = True
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
if p.series_parts and "free comic book" in (" ".join([x.val for x in p.series_parts]) + " " + item.val).lower():
|
||||
p.filename_info["fcbd"] = True
|
||||
series_append = True
|
||||
# If the next item is a number it's probably the volume
|
||||
elif p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
number = p.get()
|
||||
# Mark volume info. Text will be added to the title/series later
|
||||
if item.val.lower() in ["book", "tpb"]:
|
||||
p.title_parts.extend([item, number])
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.filename_info["issue"] = t2do.convert(number.val)
|
||||
|
||||
p.used_items.append(item)
|
||||
series_append = False
|
||||
|
||||
# Annuals usually mean the year
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
num = t2d.convert(number.val)
|
||||
if num.isnumeric() and len(num) == 4:
|
||||
p.year_candidates.append((True, number))
|
||||
else:
|
||||
p.backup()
|
||||
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
|
||||
# If we don't have a reason to exclude it from the series go back to parsing the series immediately
|
||||
if series_append:
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
return parse_series
|
||||
|
||||
# We found text, it's probably the title or series
|
||||
elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Usually the word 'of' eg 1 (of 6)
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
return parse_info_specifier
|
||||
|
||||
# Operator is a symbol that acts as some sort of separator eg - : ;
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
if p.in_something == 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# Filter out Month and day names in filename
|
||||
elif item.typ == filenamelexer.ItemType.Calendar:
|
||||
# Month and day are currently irrelevant if they are inside parentheses e.g. (January 2002)
|
||||
if p.in_something > 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# assume Sep-Oct is not useful in the series/title
|
||||
elif p.peek().typ in [filenamelexer.ItemType.Symbol, filenamelexer.ItemType.Operator]:
|
||||
p.get()
|
||||
if p.peek().typ == filenamelexer.ItemType.Calendar:
|
||||
p.irrelevant.extend([item, p.input[p.pos], p.get()])
|
||||
else:
|
||||
p.backup()
|
||||
return parse_series
|
||||
# This is text that just happens to also be a month/day
|
||||
else:
|
||||
return parse_series
|
||||
|
||||
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.skip = True
|
||||
|
||||
# Keeping track of parentheses depth
|
||||
elif item.typ == filenamelexer.ItemType.LeftParen:
|
||||
p.in_paren += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftBrace:
|
||||
p.in_brace += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftSBrace:
|
||||
p.in_s_brace += 1
|
||||
p.in_something += 1
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.RightParen:
|
||||
p.in_paren -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightBrace:
|
||||
p.in_brace -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightSBrace:
|
||||
p.in_s_brace -= 1
|
||||
p.in_something -= 1
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
|
||||
# Brace management, I don't like negative numbers
|
||||
if p.in_paren < 0:
|
||||
p.in_something += p.in_paren * -1
|
||||
if p.in_brace < 0:
|
||||
p.in_something += p.in_brace * -1
|
||||
if p.in_s_brace < 0:
|
||||
p.in_something += p.in_s_brace * -1
|
||||
|
||||
return parse
|
||||
|
||||
|
||||
# TODO: What about more esoteric numbers???
|
||||
def parse_issue_number(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["alternate"] += "," + item.val
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
p.filename_info["issue"] = item.val
|
||||
p.issue_number_at = item.pos
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Number]:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] += "." + item.val
|
||||
else:
|
||||
p.filename_info["issue"] += "." + item.val
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
else:
|
||||
p.backup()
|
||||
p.alt = False
|
||||
return parse
|
||||
|
||||
|
||||
def parse_series(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
series[0].append(item)
|
||||
|
||||
current_part = 0
|
||||
|
||||
title_parts: list[filenamelexer.Item] = []
|
||||
series_parts: list[filenamelexer.Item] = []
|
||||
|
||||
prev_space = False
|
||||
|
||||
# 'free comic book day' screws things up. #TODO look into removing book from ComicType?
|
||||
|
||||
# We stop parsing the series when certain things come up if nothing was done with them continue where we left off
|
||||
if (
|
||||
p.series_parts
|
||||
and p.series_parts[-1].val.lower() == "book"
|
||||
or p.peek_back().typ == filenamelexer.ItemType.Number
|
||||
or item.typ == filenamelexer.ItemType.Calendar
|
||||
):
|
||||
series_parts = p.series_parts
|
||||
p.series_parts = []
|
||||
# Skip is only true if we have come across '--' or '__'
|
||||
while not p.skip:
|
||||
item = p.get()
|
||||
|
||||
# Spaces are evil
|
||||
if item.typ == filenamelexer.ItemType.Space:
|
||||
prev_space = True
|
||||
continue
|
||||
if item.typ in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Publisher,
|
||||
filenamelexer.ItemType.Honorific,
|
||||
]:
|
||||
series[current_part].append(item)
|
||||
if item.typ == filenamelexer.ItemType.Honorific and p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
series[current_part].append(p.get())
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
|
||||
# Handle Volume
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
# Exception for 'of'
|
||||
if item.val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
else:
|
||||
# This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
|
||||
while len(series[current_part]) > 0 and series[current_part][-1].typ not in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
p.irrelevant.append(series[current_part].pop())
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
peek = p.peek()
|
||||
# ': ' separates the title from the series, only the last section is considered the title
|
||||
if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
series.append([]) # Starts a new section
|
||||
series[current_part].append(item)
|
||||
current_part += 1
|
||||
else:
|
||||
# Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
|
||||
if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
item.val = " " + item.val + " "
|
||||
series[current_part].append(item)
|
||||
|
||||
# Stop processing series/title if a skip item is found
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Number:
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
# We have 2 numbers, add the first to the series and then go back to parse
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
series[current_part].append(item)
|
||||
break
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
p.backup() # The number
|
||||
break
|
||||
# This is 6 in '1 of 6'
|
||||
if series[current_part] and series[current_part][-1].val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
|
||||
else:
|
||||
# Ensure 'ms. marvel' parses 'ms.' correctly
|
||||
if item.typ == filenamelexer.ItemType.Dot and p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif item.typ == filenamelexer.ItemType.Dot and p.peek().typ == filenamelexer.ItemType.Text:
|
||||
# Marks the dot as used so that the remainder is clean
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
prev_space = False
|
||||
|
||||
# We have a title separator e.g. ': "
|
||||
if len(series) > 1:
|
||||
title_parts.extend(series.pop())
|
||||
for s in series:
|
||||
if s and s[-1].typ == filenamelexer.ItemType.Operator:
|
||||
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
||||
series_parts.extend(s)
|
||||
p.used_items.append(series_parts.pop())
|
||||
else:
|
||||
series_parts.extend(series[0])
|
||||
|
||||
# If the series has already been set assume all of this is the title.
|
||||
if len(p.series_parts) > 0:
|
||||
p.title_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
else:
|
||||
p.series_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
return parse
|
||||
|
||||
|
||||
def resolve_year(p: Parser):
|
||||
if len(p.year_candidates) > 0:
|
||||
# Sort by likely_year boolean
|
||||
p.year_candidates.sort(key=itemgetter(0))
|
||||
|
||||
# Take the last year e.g. (2007) 2099 (2008) becomes 2099 2007 2008 and takes 2008
|
||||
selected_year = p.year_candidates.pop()[1]
|
||||
|
||||
p.filename_info["year"] = selected_year.val
|
||||
p.used_items.append(selected_year)
|
||||
|
||||
# (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
|
||||
if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]:
|
||||
vol = p.year_candidates.pop()[1]
|
||||
p.filename_info["volume"] = vol.val
|
||||
p.used_items.append(vol)
|
||||
|
||||
# Remove volume from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
# Remove year from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
|
||||
def parse_finish(p: Parser):
|
||||
resolve_year(p)
|
||||
|
||||
# If we don't have an issue try to find it in the series
|
||||
if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
|
||||
issue_num = p.series_parts.pop()
|
||||
|
||||
# If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
|
||||
if issue_num in [x[1] for x in p.year_candidates]:
|
||||
p.series_parts.append(issue_num)
|
||||
else:
|
||||
# If this number was rejected because of an operator and the operator is still there add it back e.g. 'IG-88'
|
||||
if (
|
||||
issue_num in p.operator_rejected
|
||||
and p.series_parts
|
||||
and p.series_parts[-1].typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
p.series_parts.append(issue_num)
|
||||
# We have no reason to not use this number as the issue number. Specifically happens when parsing 'X-Men-V1-067.cbr'
|
||||
else:
|
||||
p.filename_info["issue"] = issue_num.val
|
||||
p.used_items.append(issue_num)
|
||||
p.issue_number_at = issue_num.pos
|
||||
|
||||
# Remove publishers, currently only marvel and dc are defined,
|
||||
# this is an option specifically because this can drastically screw up parsing
|
||||
if p.remove_publisher:
|
||||
for item in p.publisher_removed:
|
||||
if item in p.series_parts:
|
||||
p.series_parts.remove(item)
|
||||
if item in p.title_parts:
|
||||
p.title_parts.remove(item)
|
||||
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
p.used_items.extend(p.series_parts)
|
||||
|
||||
p.filename_info["title"] = join_title(p.title_parts)
|
||||
p.used_items.extend(p.title_parts)
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
|
||||
|
||||
if "volume" in p.filename_info:
|
||||
p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
|
||||
|
||||
if "issue" not in p.filename_info:
|
||||
# We have an alternate move it to the issue
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["issue"] = p.filename_info["alternate"]
|
||||
p.filename_info["alternate"] = ""
|
||||
else:
|
||||
# TODO: This never happens
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items and x.typ != eof.typ]
|
||||
if len(inp) == 1 and inp[0].typ == filenamelexer.ItemType.Number:
|
||||
p.filename_info["issue"] = inp[0].val
|
||||
p.used_items.append(inp[0])
|
||||
|
||||
remove_items = []
|
||||
if p.remove_fcbd:
|
||||
remove_items.append(filenamelexer.ItemType.FCBD)
|
||||
if p.remove_c2c:
|
||||
remove_items.append(filenamelexer.ItemType.C2C)
|
||||
|
||||
p.irrelevant.extend([x for x in p.input if x.typ in remove_items])
|
||||
|
||||
p.filename_info["remainder"] = get_remainder(p)
|
||||
|
||||
# Ensure keys always exist
|
||||
for s in [
|
||||
"alternate",
|
||||
"issue",
|
||||
"archive",
|
||||
"series",
|
||||
"title",
|
||||
"volume",
|
||||
"year",
|
||||
"remainder",
|
||||
"issue_count",
|
||||
"volume_count",
|
||||
"publisher",
|
||||
]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = ""
|
||||
for s in ["fcbd", "c2c", "annual"]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = False
|
||||
|
||||
|
||||
def get_remainder(p: Parser):
|
||||
remainder = ""
|
||||
rem = []
|
||||
|
||||
# Remove used items and irrelevant items e.g. the series and useless operators
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items]
|
||||
for i, item in enumerate(inp):
|
||||
# No double space or space next to parentheses
|
||||
if item.typ in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Skip]:
|
||||
if (
|
||||
i > 0
|
||||
and inp[i - 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
and i + 1 < len(inp)
|
||||
and inp[i + 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
):
|
||||
remainder += " "
|
||||
|
||||
# Strip off useless opening parenthesis
|
||||
elif (
|
||||
item.typ
|
||||
in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
and i > 0
|
||||
and inp[i - 1].typ
|
||||
in [
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
):
|
||||
remainder = remainder.rstrip("[{(")
|
||||
continue
|
||||
|
||||
# Add the next item
|
||||
else:
|
||||
rem.append(item)
|
||||
remainder += item.val
|
||||
|
||||
# Remove empty parentheses
|
||||
remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
|
||||
return remainder.strip()
|
||||
|
||||
|
||||
def parse_info_specifier(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
index = p.pos
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
# Handles 'book 3' and 'book three'
|
||||
if p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
|
||||
number = p.get()
|
||||
if item.val.lower() in ["volume", "vol", "vol.", "v"]:
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# 'of' is only special if it is inside a parenthesis.
|
||||
elif item.val.lower() == "of":
|
||||
i = get_number(p, index)
|
||||
if p.in_something > 0:
|
||||
if p.issue_number_at is None:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is definitely the issue number
|
||||
elif p.issue_number_at == i.pos:
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is not for the issue number it is not in either the issue or the title, assume it is the volume number and count
|
||||
elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
|
||||
p.filename_info["volume"] = i.val
|
||||
p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(i)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
else:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
pass
|
||||
else:
|
||||
# Lets 'The Wrath of Foobar-Man, Part 1 of 2' parse correctly as the title
|
||||
if i is not None:
|
||||
p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
|
||||
|
||||
if not p.in_something:
|
||||
return parse_series
|
||||
return parse
|
||||
|
||||
|
||||
# Gets 03 in '03 of 6'
|
||||
def get_number(p: Parser, index: int):
|
||||
# Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
||||
rev = p.input[:index]
|
||||
rev.reverse()
|
||||
for i in rev:
|
||||
# We don't care about these types, we are looking to see if there is a number that is possibly different from the issue number for this count
|
||||
if i.typ in [
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
filenamelexer.ItemType.Space,
|
||||
]:
|
||||
continue
|
||||
if i.typ == filenamelexer.ItemType.Number:
|
||||
# We got our number, time to leave
|
||||
return i
|
||||
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def join_title(lst: list[filenamelexer.Item]):
|
||||
title = ""
|
||||
for i, item in enumerate(lst):
|
||||
if i + 1 == len(lst) and item.val == ",": # We ignore commas on the end
|
||||
continue
|
||||
title += item.val # Add the next item
|
||||
# No space after operators
|
||||
if item.typ == filenamelexer.ItemType.Operator:
|
||||
continue
|
||||
# No trailing space
|
||||
if i == len(lst) - 1:
|
||||
continue
|
||||
# No space after honorifics with a dot
|
||||
if item.typ == filenamelexer.ItemType.Honorific and lst[i + 1].typ == filenamelexer.ItemType.Dot:
|
||||
continue
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
title += " "
|
||||
|
||||
return title
|
||||
|
||||
|
||||
def Parse(
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
p = Parser(
|
||||
lexer_result=lexer_result,
|
||||
first_is_alt=first_is_alt,
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
)
|
||||
p.run()
|
||||
return p
|
||||
|
@ -32,11 +32,13 @@ logger = logging.getLogger(__name__)
|
||||
class AutoTagMatchWindow(QtWidgets.QDialog):
|
||||
volume_id = 0
|
||||
|
||||
def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func):
|
||||
def __init__(self, parent, match_set_list: List[MultipleMatch], style, fetch_func, settings):
|
||||
super().__init__(parent)
|
||||
|
||||
uic.loadUi(ComicTaggerSettings.get_ui_file("matchselectionwindow.ui"), self)
|
||||
|
||||
self.settings = settings
|
||||
|
||||
self.current_match_set: Optional[MultipleMatch] = None
|
||||
|
||||
self.altCoverWidget = CoverImageWidget(self.altCoverContainer, CoverImageWidget.AltCoverMode)
|
||||
@ -221,7 +223,12 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
|
||||
|
||||
md = ca.read_metadata(self.style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename()
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
|
||||
# now get the particular issue data
|
||||
cv_md = self.fetch_func(match)
|
||||
|
@ -101,7 +101,7 @@ def display_match_set_for_choice(label, match_set: MultipleMatch, opts, settings
|
||||
# save the data!
|
||||
# we know at this point, that the file is all good to go
|
||||
ca = match_set.ca
|
||||
md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style))
|
||||
md = create_local_metadata(opts, ca, ca.has_metadata(opts.data_style), settings)
|
||||
cv_md = actual_issue_data_fetch(match_set.matches[int(i)], settings, opts)
|
||||
md.overlay(cv_md)
|
||||
actual_metadata_save(ca, opts, md)
|
||||
@ -164,13 +164,17 @@ def cli_mode(opts, settings):
|
||||
post_process_matches(match_results, opts, settings)
|
||||
|
||||
|
||||
def create_local_metadata(opts, ca: ComicArchive, has_desired_tags):
|
||||
def create_local_metadata(opts, ca: ComicArchive, has_desired_tags, settings):
|
||||
md = GenericMetadata()
|
||||
md.set_default_page_list(ca.get_number_of_pages())
|
||||
|
||||
# now, overlay the parsed filename info
|
||||
if opts.parse_filename:
|
||||
md.overlay(ca.metadata_from_filename())
|
||||
md.overlay(
|
||||
ca.metadata_from_filename(
|
||||
settings.complicated_parser, settings.remove_c2c, settings.remove_fcbd, settings.remove_publisher
|
||||
)
|
||||
)
|
||||
|
||||
if has_desired_tags:
|
||||
md = ca.read_metadata(opts.data_style)
|
||||
@ -319,7 +323,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
|
||||
if batch_mode:
|
||||
print(f"Processing {ca.path}...")
|
||||
|
||||
md = create_local_metadata(opts, ca, has[opts.data_style])
|
||||
md = create_local_metadata(opts, ca, has[opts.data_style], settings)
|
||||
if md.issue is None or md.issue == "":
|
||||
if opts.assume_issue_is_one_if_not_set:
|
||||
md.issue = "1"
|
||||
@ -430,7 +434,7 @@ def process_file_cli(filename, opts, settings, match_results: OnlineMatchResults
|
||||
else:
|
||||
use_tags = False
|
||||
|
||||
md = create_local_metadata(opts, ca, use_tags)
|
||||
md = create_local_metadata(opts, ca, use_tags, settings)
|
||||
|
||||
if md.series is None:
|
||||
logger.error(msg_hdr + "Can't rename without series name")
|
||||
|
@ -63,6 +63,7 @@ class IssueIdentifier:
|
||||
result_multiple_good_matches = 5
|
||||
|
||||
def __init__(self, comic_archive: ComicArchive, settings):
|
||||
self.settings = settings
|
||||
self.comic_archive: ComicArchive = comic_archive
|
||||
self.image_hasher = 1
|
||||
|
||||
@ -192,7 +193,12 @@ class IssueIdentifier:
|
||||
internal_metadata = ca.read_cbi()
|
||||
|
||||
# try to get some metadata from filename
|
||||
md_from_filename = ca.metadata_from_filename()
|
||||
md_from_filename = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
|
||||
# preference order:
|
||||
# 1. Additional metadata
|
||||
|
@ -81,7 +81,12 @@ class RenameWindow(QtWidgets.QDialog):
|
||||
|
||||
md = ca.read_metadata(self.data_style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename(self.settings.parse_scan_info)
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
self.settings.remove_publisher,
|
||||
)
|
||||
self.renamer.set_metadata(md)
|
||||
self.renamer.move = self.settings.rename_move_dir
|
||||
|
||||
|
@ -88,7 +88,10 @@ class ComicTaggerSettings:
|
||||
self.ask_about_usage_stats = True
|
||||
|
||||
# filename parsing settings
|
||||
self.parse_scan_info = True
|
||||
self.complicated_parser = False
|
||||
self.remove_c2c = False
|
||||
self.remove_fcbd = False
|
||||
self.remove_publisher = False
|
||||
|
||||
# Comic Vine settings
|
||||
self.use_series_start_as_volume = False
|
||||
@ -161,7 +164,10 @@ class ComicTaggerSettings:
|
||||
self.ask_about_usage_stats = True
|
||||
|
||||
# filename parsing settings
|
||||
self.parse_scan_info = True
|
||||
self.complicated_parser = False
|
||||
self.remove_c2c = False
|
||||
self.remove_fcbd = False
|
||||
self.remove_publisher = False
|
||||
|
||||
# Comic Vine settings
|
||||
self.use_series_start_as_volume = False
|
||||
@ -287,8 +293,14 @@ class ComicTaggerSettings:
|
||||
if self.config.has_option("identifier", "id_publisher_filter"):
|
||||
self.id_publisher_filter = self.config.get("identifier", "id_publisher_filter")
|
||||
|
||||
if self.config.has_option("filenameparser", "parse_scan_info"):
|
||||
self.parse_scan_info = self.config.getboolean("filenameparser", "parse_scan_info")
|
||||
if self.config.has_option("filenameparser", "complicated_parser"):
|
||||
self.complicated_parser = self.config.getboolean("filenameparser", "complicated_parser")
|
||||
if self.config.has_option("filenameparser", "remove_c2c"):
|
||||
self.remove_c2c = self.config.getboolean("filenameparser", "remove_c2c")
|
||||
if self.config.has_option("filenameparser", "remove_fcbd"):
|
||||
self.remove_fcbd = self.config.getboolean("filenameparser", "remove_fcbd")
|
||||
if self.config.has_option("filenameparser", "remove_publisher"):
|
||||
self.remove_publisher = self.config.getboolean("filenameparser", "remove_publisher")
|
||||
|
||||
if self.config.has_option("dialogflags", "ask_about_cbi_in_rar"):
|
||||
self.ask_about_cbi_in_rar = self.config.getboolean("dialogflags", "ask_about_cbi_in_rar")
|
||||
@ -419,7 +431,10 @@ class ComicTaggerSettings:
|
||||
if not self.config.has_section("filenameparser"):
|
||||
self.config.add_section("filenameparser")
|
||||
|
||||
self.config.set("filenameparser", "parse_scan_info", self.parse_scan_info)
|
||||
self.config.set("filenameparser", "complicated_parser", self.complicated_parser)
|
||||
self.config.set("filenameparser", "remove_c2c", self.remove_c2c)
|
||||
self.config.set("filenameparser", "remove_fcbd", self.remove_fcbd)
|
||||
self.config.set("filenameparser", "remove_publisher", self.remove_publisher)
|
||||
|
||||
if not self.config.has_section("comicvine"):
|
||||
self.config.add_section("comicvine")
|
||||
|
@ -182,6 +182,7 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.cbxMoveFiles.clicked.connect(self.rename_test)
|
||||
self.cbxRenameStrict.clicked.connect(self.rename_test)
|
||||
self.leDirectory.textEdited.connect(self.rename_test)
|
||||
self.cbxComplicatedParser.clicked.connect(self.switch_parser)
|
||||
|
||||
def rename_test(self):
|
||||
self.rename__test(self.leRenameTemplate.text())
|
||||
@ -199,6 +200,13 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.rename_error = e
|
||||
self.lblRenameTest.setText(str(e))
|
||||
|
||||
def switch_parser(self):
|
||||
complicated = self.cbxComplicatedParser.isChecked()
|
||||
|
||||
self.cbxRemoveC2C.setEnabled(complicated)
|
||||
self.cbxRemoveFCBD.setEnabled(complicated)
|
||||
self.cbxRemovePublisher.setEnabled(complicated)
|
||||
|
||||
def settings_to_form(self):
|
||||
# Copy values from settings to form
|
||||
self.leRarExePath.setText(self.settings.rar_exe_path)
|
||||
@ -208,8 +216,11 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
if self.settings.check_for_new_version:
|
||||
self.cbxCheckForNewVersion.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
|
||||
if self.settings.parse_scan_info:
|
||||
self.cbxParseScanInfo.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
self.cbxComplicatedParser.setChecked(self.settings.complicated_parser)
|
||||
self.cbxRemoveC2C.setChecked(self.settings.remove_c2c)
|
||||
self.cbxRemoveFCBD.setChecked(self.settings.remove_fcbd)
|
||||
self.cbxRemovePublisher.setChecked(self.settings.remove_publisher)
|
||||
self.switch_parser()
|
||||
|
||||
if self.settings.use_series_start_as_volume:
|
||||
self.cbxUseSeriesStartAsVolume.setCheckState(QtCore.Qt.CheckState.Checked)
|
||||
@ -291,7 +302,10 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.settings.id_length_delta_thresh = int(self.leNameLengthDeltaThresh.text())
|
||||
self.settings.id_publisher_filter = str(self.tePublisherFilter.toPlainText())
|
||||
|
||||
self.settings.parse_scan_info = self.cbxParseScanInfo.isChecked()
|
||||
self.settings.complicated_parser = self.cbxComplicatedParser.isChecked()
|
||||
self.settings.remove_c2c = self.cbxRemoveC2C.isChecked()
|
||||
self.settings.remove_fcbd = self.cbxRemoveFCBD.isChecked()
|
||||
self.settings.remove_publisher = self.cbxRemovePublisher.isChecked()
|
||||
|
||||
self.settings.use_series_start_as_volume = self.cbxUseSeriesStartAsVolume.isChecked()
|
||||
self.settings.clear_form_before_populating_from_cv = self.cbxClearFormBeforePopulating.isChecked()
|
||||
|
@ -557,7 +557,12 @@ Please choose options below, and select OK.
|
||||
|
||||
def actual_load_current_archive(self):
|
||||
if self.metadata.is_empty:
|
||||
self.metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
|
||||
self.metadata = self.comic_archive.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if len(self.metadata.pages) == 0:
|
||||
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
|
||||
|
||||
@ -928,7 +933,12 @@ Please choose options below, and select OK.
|
||||
if self.comic_archive is not None:
|
||||
# copy the form onto metadata object
|
||||
self.form_to_metadata()
|
||||
new_metadata = self.comic_archive.metadata_from_filename(self.settings.parse_scan_info)
|
||||
new_metadata = self.comic_archive.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if new_metadata is not None:
|
||||
self.metadata.overlay(new_metadata)
|
||||
self.metadata_to_form()
|
||||
@ -1654,7 +1664,12 @@ Please choose options below, and select OK.
|
||||
# read in metadata, and parse file name if not there
|
||||
md = ca.read_metadata(self.save_data_style)
|
||||
if md.is_empty:
|
||||
md = ca.metadata_from_filename(self.settings.parse_scan_info)
|
||||
md = ca.metadata_from_filename(
|
||||
self.settings.complicated_parser,
|
||||
self.settings.remove_c2c,
|
||||
self.settings.remove_fcbd,
|
||||
remove_publisher=self.settings.remove_publisher,
|
||||
)
|
||||
if dlg.ignore_leading_digits_in_filename and md.series is not None:
|
||||
# remove all leading numbers
|
||||
md.series = re.sub(r"([\d.]*)(.*)", "\\2", md.series)
|
||||
@ -1846,7 +1861,9 @@ Please choose options below, and select OK to Auto-Tag.
|
||||
|
||||
match_results.multiple_matches.extend(match_results.low_confidence_matches)
|
||||
if reply == QtWidgets.QMessageBox.StandardButton.Yes:
|
||||
matchdlg = AutoTagMatchWindow(self, match_results.multiple_matches, style, self.actual_issue_data_fetch)
|
||||
matchdlg = AutoTagMatchWindow(
|
||||
self, match_results.multiple_matches, style, self.actual_issue_data_fetch, self.settings
|
||||
)
|
||||
matchdlg.setModal(True)
|
||||
matchdlg.exec()
|
||||
self.fileSelectionList.update_selected_rows()
|
||||
|
@ -229,19 +229,55 @@
|
||||
<attribute name="title">
|
||||
<string>Filename Parser</string>
|
||||
</attribute>
|
||||
<widget class="QCheckBox" name="cbxParseScanInfo">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>30</x>
|
||||
<y>30</y>
|
||||
<width>421</width>
|
||||
<height>25</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Parse Scan Info From Filename (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
<layout class="QVBoxLayout" name="verticalLayout_6">
|
||||
<item>
|
||||
<widget class="QGroupBox" name="groupBox_2">
|
||||
<layout class="QVBoxLayout" name="verticalLayout_7">
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxComplicatedParser">
|
||||
<property name="text">
|
||||
<string>Use "Complicated" Parser</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemoveC2C">
|
||||
<property name="text">
|
||||
<string>Remove 'C2C' from Scan Info</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemoveFCBD">
|
||||
<property name="text">
|
||||
<string>Remove 'FCBD' from Scan Info</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cbxRemovePublisher">
|
||||
<property name="text">
|
||||
<string>Remove Publisher from filename</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<spacer name="verticalSpacer_4">
|
||||
<property name="orientation">
|
||||
<enum>Qt::Vertical</enum>
|
||||
</property>
|
||||
<property name="sizeHint" stdset="0">
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>40</height>
|
||||
</size>
|
||||
</property>
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="tab_3">
|
||||
<attribute name="title">
|
||||
|
@ -5,3 +5,4 @@ requests==2.*
|
||||
pathvalidate
|
||||
pycountry
|
||||
py7zr
|
||||
text2digits
|
Binary file not shown.
@ -1,35 +1,122 @@
|
||||
import pytest
|
||||
|
||||
fnames = [
|
||||
(
|
||||
"Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"stuff",
|
||||
"batman 3 title (DC).cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
"title": "title",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"batman 3 title DC.cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
"title": "title DC",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"ms. Marvel 3.cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "ms. Marvel",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"january jones 2.cbz",
|
||||
"month in series",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "january jones",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"52.cbz",
|
||||
"issue number only",
|
||||
{
|
||||
"issue": "52",
|
||||
"series": "",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"52 Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"leading alternate",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
"title": "The Wrath of Foobar-Man, Part 1 of 2",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "repaired c2c",
|
||||
"remainder": "repaired",
|
||||
"issue_count": "",
|
||||
"alternate": "52",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
(
|
||||
"Monster_Island_v1_2__repaired__c2c.cbz",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "repaired",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
(
|
||||
"Monster Island v1 3 (1957) -- The Revenge Of King Klong (noads).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "Monster Island",
|
||||
"title": "The Wrath of Foobar-Man, Part 1 of 2",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1957",
|
||||
"remainder": "The Revenge Of King Klong (noads)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Foobar-Man Annual 121 - The Wrath of Foobar-Man, Part 1 of 2.cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "121",
|
||||
"series": "Foobar-Man Annual",
|
||||
@ -38,12 +125,12 @@ fnames = [
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"annual": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Plastic Man v1 002 (1942).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Plastic Man",
|
||||
@ -56,7 +143,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Blue Beetle 02.cbr",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Blue Beetle",
|
||||
@ -69,7 +156,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Monster Island vol. 2 #2.cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
@ -82,7 +169,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Crazy Weird Comics 2 (of 2) (1969).rar",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Crazy Weird Comics",
|
||||
@ -95,7 +182,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Super Strange Yarns (1957) #92 (1969).cbz",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "92",
|
||||
"series": "Super Strange Yarns",
|
||||
@ -108,7 +195,7 @@ fnames = [
|
||||
),
|
||||
(
|
||||
"Action Spy Tales v1965 #3.cbr",
|
||||
"stuff",
|
||||
"Example from userguide",
|
||||
{
|
||||
"issue": "3",
|
||||
"series": "Action Spy Tales",
|
||||
@ -119,9 +206,9 @@ fnames = [
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
" X-Men-V1-067.cbr",
|
||||
"hyphen separated with hyphen in series",
|
||||
"hyphen separated with hyphen in series", # only parses corretly because v1 designates the volume
|
||||
{
|
||||
"issue": "67",
|
||||
"series": "X-Men",
|
||||
@ -131,7 +218,6 @@ fnames = [
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Amazing Spider-Man 078.BEY (2022) (Digital) (Zone-Empire).cbr",
|
||||
@ -139,15 +225,16 @@ fnames = [
|
||||
{
|
||||
"issue": "78.BEY",
|
||||
"series": "Amazing Spider-Man",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital) (Zone-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Angel Wings 02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
|
||||
"title after-issue",
|
||||
"title after issue",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Angel Wings",
|
||||
@ -157,11 +244,10 @@ fnames = [
|
||||
"remainder": "(Scanlation) (phillywilly)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Angel Wings #02 - Black Widow (2015) (Scanlation) (phillywilly).cbr",
|
||||
"title after-#issue",
|
||||
"title after #issue",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Angel Wings",
|
||||
@ -171,20 +257,19 @@ fnames = [
|
||||
"remainder": "(Scanlation) (phillywilly)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Aquaman - Green Arrow - Deep Target 01 (of 07) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"issue count",
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Aquaman - Green Arrow - Deep Target",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"issue_count": "7",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Aquaman 80th Anniversary 100-Page Super Spectacular (2021) 001 (2021) (Digital) (BlackManta-Empire).cbz",
|
||||
@ -192,37 +277,39 @@ fnames = [
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Aquaman 80th Anniversary 100-Page Super Spectacular",
|
||||
"title": "",
|
||||
"volume": "2021",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (BlackManta-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Avatar - The Last Airbender - The Legend of Korra (FCBD 2021) (Digital) (mv-DCP).cbr",
|
||||
"FCBD date",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Avatar - The Last Airbender - The Legend of Korra",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(FCBD) (Digital) (mv-DCP)",
|
||||
"remainder": "(Digital) (mv-DCP)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Avengers By Brian Michael Bendis v03 (2013) (Digital) (F2) (Kileko-Empire).cbz",
|
||||
"volume without issue",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Avengers By Brian Michael Bendis",
|
||||
"title": "",
|
||||
"volume": "3",
|
||||
"year": "2013",
|
||||
"remainder": "(Digital) (F2) (Kileko-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Batman '89 (2021) (Webrip) (The Last Kryptonian-DCP).cbr",
|
||||
@ -230,6 +317,7 @@ fnames = [
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Batman '89",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Webrip) (The Last Kryptonian-DCP)",
|
||||
@ -242,6 +330,7 @@ fnames = [
|
||||
{
|
||||
"issue": "20",
|
||||
"series": "Batman - Superman",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (NeverAngel-Empire)",
|
||||
@ -254,6 +343,7 @@ fnames = [
|
||||
{
|
||||
"issue": "9",
|
||||
"series": "Black Widow",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Zone-Empire)",
|
||||
@ -266,26 +356,28 @@ fnames = [
|
||||
{
|
||||
"issue": "6",
|
||||
"series": "Blade Runner 2029",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(3 covers) (digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Blade Runner Free Comic Book Day 2021 (2021) (digital-Empire).cbr",
|
||||
"FCBD year and (year)",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Blade Runner Free Comic Book Day 2021",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Bloodshot Book 03 (2020) (digital) (Son of Ultron-Empire).cbr",
|
||||
"book",
|
||||
{
|
||||
@ -297,9 +389,21 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"book of eli (2020) (digital) (Son of Ultron-Empire).cbr",
|
||||
"book",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "book of eli",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2020",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
(
|
||||
"Cyberpunk 2077 - You Have My Word 02 (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"title",
|
||||
{
|
||||
@ -311,9 +415,8 @@ fnames = [
|
||||
"issue_count": "",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Elephantmen 2259 008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"volume count",
|
||||
{
|
||||
@ -326,9 +429,8 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
"volume count",
|
||||
{
|
||||
@ -341,20 +443,20 @@ fnames = [
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Free Comic Book Day - Avengers.Hulk (2021) (2048px) (db).cbz",
|
||||
"'.' in name",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Free Comic Book Day - Avengers Hulk",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(2048px) (db)",
|
||||
"issue_count": "",
|
||||
"fcbd": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Goblin (2021) (digital) (Son of Ultron-Empire).cbr",
|
||||
@ -362,37 +464,41 @@ fnames = [
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Goblin",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (Son of Ultron-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Marvel Previews 002 (January 2022) (Digital-Empire).cbr",
|
||||
"(month year)",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "Marvel Previews",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Marvel Two In One V1 090 c2c (Comixbear-DCP).cbr",
|
||||
"volume issue ctc",
|
||||
{
|
||||
"issue": "90",
|
||||
"series": "Marvel Two In One",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "c2c (Comixbear-DCP)",
|
||||
"remainder": "(Comixbear-DCP)",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Marvel Two In One V1 #090 c2c (Comixbear-DCP).cbr",
|
||||
@ -400,24 +506,27 @@ fnames = [
|
||||
{
|
||||
"issue": "90",
|
||||
"series": "Marvel Two In One",
|
||||
"title": "",
|
||||
"publisher": "Marvel",
|
||||
"volume": "1",
|
||||
"year": "",
|
||||
"remainder": "c2c (Comixbear-DCP)",
|
||||
"remainder": "(Comixbear-DCP)",
|
||||
"issue_count": "",
|
||||
"c2c": True,
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 (2021) (Digital) (Kileko-Empire).cbz",
|
||||
"number ends series, no-issue",
|
||||
{
|
||||
"issue": "",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Kileko-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
(
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021) (Digital) (Kileko-Empire).cbz",
|
||||
@ -425,6 +534,7 @@ fnames = [
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(Digital) (Kileko-Empire)",
|
||||
@ -437,39 +547,41 @@ fnames = [
|
||||
{
|
||||
"issue": "58",
|
||||
"series": "The Defenders",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1978",
|
||||
"remainder": "(digital)",
|
||||
"issue_count": "",
|
||||
},
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"The Defenders v1 Annual 01 (1976) (Digital) (Minutemen-Slayer).cbr",
|
||||
" v in series",
|
||||
{
|
||||
"issue": "1",
|
||||
"series": "The Defenders Annual",
|
||||
"title": "",
|
||||
"volume": "1",
|
||||
"year": "1976",
|
||||
"remainder": "(Digital) (Minutemen-Slayer)",
|
||||
"issue_count": "",
|
||||
"annual": True,
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"The Magic Order 2 06 (2022) (Digital) (Zone-Empire)[__913302__].cbz",
|
||||
"ending id",
|
||||
{
|
||||
"issue": "6",
|
||||
"series": "The Magic Order 2",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2022",
|
||||
"remainder": "(Digital) (Zone-Empire)[__913302__]",
|
||||
"remainder": "(Digital) (Zone-Empire)[913302]", # Don't really care about double underscores
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman 001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
|
||||
"issue separates title",
|
||||
{
|
||||
@ -481,9 +593,8 @@ fnames = [
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman #001 Wonder Woman Day Special Edition (2021) (digital-Empire).cbr",
|
||||
"issue separates title",
|
||||
{
|
||||
@ -495,46 +606,47 @@ fnames = [
|
||||
"remainder": "(digital-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman 49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
|
||||
"date-range, no paren, braces",
|
||||
{
|
||||
"issue": "49",
|
||||
"series": "Wonder Woman",
|
||||
"title": "digital", # Don't have a way to get rid of this
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "1951",
|
||||
"remainder": "(Shadowcat-Empire)",
|
||||
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz",
|
||||
"date-range, no paren, braces",
|
||||
{
|
||||
"issue": "49",
|
||||
"series": "Wonder Woman",
|
||||
"title": "digital", # Don't have a way to get rid of this
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "1951",
|
||||
"remainder": "(Shadowcat-Empire)",
|
||||
"remainder": "[downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
pytest.param(
|
||||
(
|
||||
"X-Men, 2021-08-04 (#02) (digital) (Glorith-HD).cbz",
|
||||
"full-date, issue in parenthesis",
|
||||
{
|
||||
"issue": "2",
|
||||
"series": "X-Men",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2021",
|
||||
"remainder": "(digital) (Glorith-HD)",
|
||||
"issue_count": "",
|
||||
},
|
||||
marks=pytest.mark.xfail,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -4,13 +4,39 @@ from filenames import fnames
|
||||
import comicapi.filenameparser
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename,reason,expected", fnames)
|
||||
def test_file_name_parser_new(filename, reason, expected):
|
||||
p = comicapi.filenameparser.Parse(
|
||||
comicapi.filenamelexer.Lex(filename).items,
|
||||
first_is_alt=True,
|
||||
remove_c2c=True,
|
||||
remove_fcbd=True,
|
||||
remove_publisher=True,
|
||||
)
|
||||
fp = p.filename_info
|
||||
|
||||
for s in ["archive"]:
|
||||
if s in fp:
|
||||
del fp[s]
|
||||
for s in ["alternate", "publisher", "volume_count"]:
|
||||
if s not in expected:
|
||||
expected[s] = ""
|
||||
for s in ["fcbd", "c2c", "annual"]:
|
||||
if s not in expected:
|
||||
expected[s] = False
|
||||
|
||||
assert fp == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename,reason,expected", fnames)
|
||||
def test_file_name_parser(filename, reason, expected):
|
||||
p = comicapi.filenameparser.FileNameParser()
|
||||
p.parse_filename(filename)
|
||||
fp = p.__dict__
|
||||
for s in ["title"]:
|
||||
for s in ["title", "alternate", "publisher", "fcbd", "c2c", "annual", "volume_count"]:
|
||||
if s in expected:
|
||||
del expected[s]
|
||||
|
||||
if fp != expected:
|
||||
pytest.xfail("old parser")
|
||||
assert fp == expected
|
||||
|
Loading…
Reference in New Issue
Block a user