Merge branch 'filenameParser' into develop
This commit is contained in:
commit
42da653b6e
@ -42,10 +42,10 @@ try:
|
||||
except ImportError:
|
||||
pil_available = False
|
||||
|
||||
from comicapi import filenamelexer, filenameparser
|
||||
from comicapi.comet import CoMet
|
||||
from comicapi.comicbookinfo import ComicBookInfo
|
||||
from comicapi.comicinfoxml import ComicInfoXml
|
||||
from comicapi.filenameparser import FileNameParser
|
||||
from comicapi.genericmetadata import GenericMetadata, PageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -1127,25 +1127,46 @@ class ComicArchive:
|
||||
data = self.get_page(idx)
|
||||
p["ImageSize"] = str(len(data))
|
||||
|
||||
def metadata_from_filename(self, parse_scan_info=True):
|
||||
def metadata_from_filename(
|
||||
self, complicated_parser=False, remove_c2c=False, remove_fcbd=False, remove_publisher=False
|
||||
):
|
||||
|
||||
metadata = GenericMetadata()
|
||||
|
||||
fnp = FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
if complicated_parser:
|
||||
lex = filenamelexer.Lex(self.path)
|
||||
p = filenameparser.Parse(
|
||||
lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
|
||||
)
|
||||
metadata.alternate_number = p.filename_info["alternate"] or None
|
||||
metadata.issue = p.filename_info["issue"] or None
|
||||
metadata.issue_count = p.filename_info["issue_count"] or None
|
||||
metadata.publisher = p.filename_info["publisher"] or None
|
||||
metadata.series = p.filename_info["series"] or None
|
||||
metadata.title = p.filename_info["title"] or None
|
||||
metadata.volume = p.filename_info["volume"] or None
|
||||
metadata.volume_count = p.filename_info["volume_count"] or None
|
||||
metadata.year = p.filename_info["year"] or None
|
||||
|
||||
if fnp.issue != "":
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series != "":
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume != "":
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year != "":
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count != "":
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if parse_scan_info:
|
||||
if fnp.remainder != "":
|
||||
metadata.scan_info = p.filename_info["remainder"] or None
|
||||
metadata.format = "FCBD" if p.filename_info["fcbd"] else None
|
||||
if p.filename_info["annual"]:
|
||||
metadata.format = "Annual"
|
||||
else:
|
||||
fnp = filenameparser.FileNameParser()
|
||||
fnp.parse_filename(self.path)
|
||||
|
||||
if fnp.issue:
|
||||
metadata.issue = fnp.issue
|
||||
if fnp.series:
|
||||
metadata.series = fnp.series
|
||||
if fnp.volume:
|
||||
metadata.volume = fnp.volume
|
||||
if fnp.year:
|
||||
metadata.year = fnp.year
|
||||
if fnp.issue_count:
|
||||
metadata.issue_count = fnp.issue_count
|
||||
if fnp.remainder:
|
||||
metadata.scan_info = fnp.remainder
|
||||
|
||||
metadata.is_empty = False
|
||||
|
353
comicapi/filenamelexer.py
Normal file
353
comicapi/filenamelexer.py
Normal file
@ -0,0 +1,353 @@
|
||||
import calendar
|
||||
import os
|
||||
import unicodedata
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
class ItemType(Enum):
|
||||
Error = auto() # Error occurred; value is text of error
|
||||
EOF = auto()
|
||||
Text = auto() # Text
|
||||
LeftParen = auto() # '(' inside action
|
||||
Number = auto() # Simple number
|
||||
IssueNumber = auto() # Preceded by a # Symbol
|
||||
RightParen = auto() # ')' inside action
|
||||
Space = auto() # Run of spaces separating arguments
|
||||
Dot = auto()
|
||||
LeftBrace = auto()
|
||||
RightBrace = auto()
|
||||
LeftSBrace = auto()
|
||||
RightSBrace = auto()
|
||||
Symbol = auto()
|
||||
Skip = auto() # __ or -- no title, issue or series information beyond
|
||||
Operator = auto()
|
||||
Calendar = auto()
|
||||
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
|
||||
ArchiveType = auto()
|
||||
Honorific = auto()
|
||||
Keywords = auto()
|
||||
FCBD = auto()
|
||||
ComicType = auto()
|
||||
Publisher = auto()
|
||||
C2C = auto()
|
||||
|
||||
|
||||
braces = [
|
||||
ItemType.LeftBrace,
|
||||
ItemType.LeftParen,
|
||||
ItemType.LeftSBrace,
|
||||
ItemType.RightBrace,
|
||||
ItemType.RightParen,
|
||||
ItemType.RightSBrace,
|
||||
]
|
||||
|
||||
eof = chr(0)
|
||||
|
||||
key = {
|
||||
"fcbd": ItemType.FCBD,
|
||||
"freecomicbookday": ItemType.FCBD,
|
||||
"cbr": ItemType.ArchiveType,
|
||||
"cbz": ItemType.ArchiveType,
|
||||
"cbt": ItemType.ArchiveType,
|
||||
"cb7": ItemType.ArchiveType,
|
||||
"rar": ItemType.ArchiveType,
|
||||
"zip": ItemType.ArchiveType,
|
||||
"tar": ItemType.ArchiveType,
|
||||
"7z": ItemType.ArchiveType,
|
||||
"annual": ItemType.ComicType,
|
||||
"book": ItemType.ComicType,
|
||||
"volume": ItemType.InfoSpecifier,
|
||||
"vol.": ItemType.InfoSpecifier,
|
||||
"vol": ItemType.InfoSpecifier,
|
||||
"v": ItemType.InfoSpecifier,
|
||||
"of": ItemType.InfoSpecifier,
|
||||
"dc": ItemType.Publisher,
|
||||
"marvel": ItemType.Publisher,
|
||||
"covers": ItemType.InfoSpecifier,
|
||||
"c2c": ItemType.C2C,
|
||||
"mr": ItemType.Honorific,
|
||||
"ms": ItemType.Honorific,
|
||||
"mrs": ItemType.Honorific,
|
||||
"dr": ItemType.Honorific,
|
||||
}
|
||||
|
||||
|
||||
class Item:
|
||||
def __init__(self, typ: ItemType, pos: int, val: str):
|
||||
self.typ: ItemType = typ
|
||||
self.pos: int = pos
|
||||
self.val: str = val
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.val}: index: {self.pos}: {self.typ}"
|
||||
|
||||
|
||||
class Lexer:
|
||||
def __init__(self, string):
|
||||
self.input: str = string # The string being scanned
|
||||
self.state = None # The next lexing function to enter
|
||||
self.pos: int = -1 # Current position in the input
|
||||
self.start: int = 0 # Start position of this item
|
||||
self.lastPos: int = 0 # Position of most recent item returned by nextItem
|
||||
self.paren_depth: int = 0 # Nesting depth of ( ) exprs
|
||||
self.brace_depth: int = 0 # Nesting depth of { }
|
||||
self.sbrace_depth: int = 0 # Nesting depth of [ ]
|
||||
self.items = []
|
||||
|
||||
# Next returns the next rune in the input.
|
||||
def get(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next rune in the input.
|
||||
def peek(self) -> str:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
# Emit passes an item back to the client.
|
||||
def emit(self, t: ItemType):
|
||||
self.items.append(Item(t, self.start, self.input[self.start : self.pos + 1]))
|
||||
self.start = self.pos + 1
|
||||
|
||||
# Ignore skips over the pending input before this point.
|
||||
def ignore(self):
|
||||
self.start = self.pos
|
||||
|
||||
# Accept consumes the next rune if it's from the valid se:
|
||||
def accept(self, valid: str):
|
||||
if self.get() in valid:
|
||||
return True
|
||||
|
||||
self.backup()
|
||||
return False
|
||||
|
||||
# AcceptRun consumes a run of runes from the valid set.
|
||||
def accept_run(self, valid: str):
|
||||
while self.get() in valid:
|
||||
pass
|
||||
|
||||
self.backup()
|
||||
|
||||
# Errorf returns an error token and terminates the scan by passing
|
||||
# Back a nil pointer that will be the next state, terminating self.nextItem.
|
||||
def errorf(self, message: str):
|
||||
self.items.append(Item(ItemType.Error, self.start, message))
|
||||
|
||||
# NextItem returns the next item from the input.
|
||||
# Called by the parser, not in the lexing goroutine.
|
||||
# def next_item(self) -> Item:
|
||||
# item: Item = self.items.get()
|
||||
# self.lastPos = item.pos
|
||||
# return item
|
||||
|
||||
def scan_number(self):
|
||||
digits = "0123456789"
|
||||
|
||||
self.accept_run(digits)
|
||||
if self.accept("."):
|
||||
if self.accept(digits):
|
||||
self.accept_run(digits)
|
||||
else:
|
||||
self.backup()
|
||||
if self.accept("s"):
|
||||
if not self.accept("t"):
|
||||
self.backup()
|
||||
elif self.accept("nr"):
|
||||
if not self.accept("d"):
|
||||
self.backup()
|
||||
elif self.accept("t"):
|
||||
if not self.accept("h"):
|
||||
self.backup()
|
||||
|
||||
return True
|
||||
|
||||
# Runs the state machine for the lexer.
|
||||
def run(self):
|
||||
self.state = lex_filename
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
# Scans the elements inside action delimiters.
|
||||
def lex_filename(lex: Lexer):
|
||||
r = lex.get()
|
||||
if r == eof:
|
||||
if lex.paren_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
|
||||
if lex.brace_depth != 0:
|
||||
return lex.errorf("unclosed left paren")
|
||||
lex.emit(ItemType.EOF)
|
||||
return None
|
||||
elif is_space(r):
|
||||
if r == "_" and lex.peek() == "_":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r < "0" or "9" < r:
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "'":
|
||||
r = lex.peek()
|
||||
if r in "0123456789":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
if "0" <= lex.peek() <= "9":
|
||||
return lex_number
|
||||
lex.emit(ItemType.Symbol)
|
||||
elif is_operator(r):
|
||||
if r == "-" and lex.peek() == "-":
|
||||
lex.get()
|
||||
lex.emit(ItemType.Skip)
|
||||
else:
|
||||
return lex_operator
|
||||
elif is_alpha_numeric(r):
|
||||
lex.backup()
|
||||
return lex_text
|
||||
elif r == "(":
|
||||
lex.emit(ItemType.LeftParen)
|
||||
lex.paren_depth += 1
|
||||
elif r == ")":
|
||||
lex.emit(ItemType.RightParen)
|
||||
lex.paren_depth -= 1
|
||||
if lex.paren_depth < 0:
|
||||
return lex.errorf("unexpected right paren " + r)
|
||||
|
||||
elif r == "{":
|
||||
lex.emit(ItemType.LeftBrace)
|
||||
lex.brace_depth += 1
|
||||
elif r == "}":
|
||||
lex.emit(ItemType.RightBrace)
|
||||
lex.brace_depth -= 1
|
||||
if lex.brace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
|
||||
elif r == "[":
|
||||
lex.emit(ItemType.LeftSBrace)
|
||||
lex.sbrace_depth += 1
|
||||
elif r == "]":
|
||||
lex.emit(ItemType.RightSBrace)
|
||||
lex.sbrace_depth -= 1
|
||||
if lex.sbrace_depth < 0:
|
||||
return lex.errorf("unexpected right brace " + r)
|
||||
elif is_symbol(r):
|
||||
# L.backup()
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
return lex.errorf("unrecognized character in action: " + r)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_operator(lex: Lexer):
|
||||
lex.accept_run("-|:;")
|
||||
lex.emit(ItemType.Operator)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# LexSpace scans a run of space characters.
|
||||
# One space has already been seen.
|
||||
def lex_space(lex: Lexer):
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
|
||||
lex.emit(ItemType.Space)
|
||||
return lex_filename
|
||||
|
||||
|
||||
# Lex_text scans an alphanumeric.
|
||||
def lex_text(lex: Lexer):
|
||||
while True:
|
||||
r = lex.get()
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric(): # E.g. v1
|
||||
word = lex.input[lex.start : lex.pos]
|
||||
if word.lower() in key and key[word.lower()] == ItemType.InfoSpecifier:
|
||||
lex.backup()
|
||||
lex.emit(key[word.lower()])
|
||||
return lex_filename
|
||||
else:
|
||||
if r == "'" and lex.peek() == "s":
|
||||
lex.get()
|
||||
else:
|
||||
lex.backup()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
if word.lower() == "vol" and lex.peek() == ".":
|
||||
lex.get()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
|
||||
if word.lower() in key:
|
||||
lex.emit(key[word.lower()])
|
||||
elif cal(word):
|
||||
lex.emit(ItemType.Calendar)
|
||||
else:
|
||||
lex.emit(ItemType.Text)
|
||||
break
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def cal(value: str):
|
||||
month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()]
|
||||
month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()]
|
||||
day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()]
|
||||
day_name = [i for i, x in enumerate(calendar.day_name) if x == value.title()]
|
||||
return set(month_abbr + month_name + day_abbr + day_name)
|
||||
|
||||
|
||||
def lex_number(lex: Lexer):
|
||||
if not lex.scan_number():
|
||||
return lex.errorf("bad number syntax: " + lex.input[lex.start : lex.pos])
|
||||
# Complex number logic removed. Messes with math operations without space
|
||||
|
||||
if lex.input[lex.start] == "#":
|
||||
lex.emit(ItemType.IssueNumber)
|
||||
elif not lex.input[lex.pos].isdigit():
|
||||
# Assume that 80th is just text and not a number
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.emit(ItemType.Number)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def is_space(character: str):
|
||||
return character in "_ \t"
|
||||
|
||||
|
||||
# IsAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
|
||||
def is_alpha_numeric(character: str):
|
||||
return character.isalpha() or character.isnumeric()
|
||||
|
||||
|
||||
def is_operator(character: str):
|
||||
return character in "-|:;/\\"
|
||||
|
||||
|
||||
def is_symbol(character: str):
|
||||
return unicodedata.category(character)[0] in "PS"
|
||||
|
||||
|
||||
def Lex(filename: str):
|
||||
lex = Lexer(string=os.path.basename(filename))
|
||||
lex.run()
|
||||
return lex
|
@ -23,8 +23,17 @@ This should probably be re-written, but, well, it mostly works!
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from operator import itemgetter
|
||||
from typing import TypedDict
|
||||
from urllib.parse import unquote
|
||||
|
||||
from text2digits import text2digits
|
||||
|
||||
from comicapi import filenamelexer, issuestring
|
||||
|
||||
t2d = text2digits.Text2Digits(add_ordinal_ending=False)
|
||||
t2do = text2digits.Text2Digits(add_ordinal_ending=True)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -68,9 +77,7 @@ class FileNameParser:
|
||||
if match:
|
||||
count = match.group()
|
||||
|
||||
count = count.lstrip("0")
|
||||
|
||||
return count
|
||||
return count.lstrip("0")
|
||||
|
||||
def get_issue_number(self, filename):
|
||||
"""Returns a tuple of issue number string, and start and end indexes in the filename
|
||||
@ -222,7 +229,7 @@ class FileNameParser:
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search(r"(\(\d\d\d\d\))|(--\d\d\d\d--)", filename)
|
||||
match = re.search(r"(\(\d{4}\))|(--\d{4}--)", filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-digits
|
||||
@ -290,3 +297,814 @@ class FileNameParser:
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
|
||||
|
||||
class FilenameInfo(TypedDict, total=False):
|
||||
alternate: str
|
||||
annual: bool
|
||||
archive: str
|
||||
c2c: bool
|
||||
fcbd: bool
|
||||
issue: str
|
||||
issue_count: str
|
||||
publisher: str
|
||||
remainder: str
|
||||
series: str
|
||||
title: str
|
||||
volume: str
|
||||
volume_count: str
|
||||
year: str
|
||||
|
||||
|
||||
eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
|
||||
|
||||
|
||||
class Parser:
|
||||
"""docstring for FilenameParser"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
self.state = None
|
||||
self.pos = -1
|
||||
|
||||
self.firstItem = True
|
||||
self.skip = False
|
||||
self.alt = False
|
||||
self.filename_info: FilenameInfo = {"series": ""}
|
||||
self.issue_number_at = None
|
||||
self.in_something = 0 # In some sort of brackets {}[]()
|
||||
self.in_brace = 0 # In {}
|
||||
self.in_s_brace = 0 # In []
|
||||
self.in_paren = 0 # In ()
|
||||
self.year_candidates: list[tuple[bool, filenamelexer.Item]] = []
|
||||
self.series_parts: list[filenamelexer.Item] = []
|
||||
self.title_parts: list[filenamelexer.Item] = []
|
||||
self.used_items: list[filenamelexer.Item] = []
|
||||
self.irrelevant: list[filenamelexer.Item] = []
|
||||
self.operator_rejected: list[filenamelexer.Item] = []
|
||||
self.publisher_removed: list[filenamelexer.Item] = []
|
||||
|
||||
self.first_is_alt = first_is_alt
|
||||
self.remove_c2c = remove_c2c
|
||||
self.remove_fcbd = remove_fcbd
|
||||
self.remove_publisher = remove_publisher
|
||||
|
||||
self.input = lexer_result
|
||||
for i, item in enumerate(self.input):
|
||||
if item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
self.issue_number_at = i
|
||||
|
||||
# Get returns the next Item in the input.
|
||||
def get(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
self.pos += 1
|
||||
return eof
|
||||
|
||||
self.pos += 1
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next Item in the input.
|
||||
def peek(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
|
||||
# Peek_back returns but does not step back the previous Item in the input.
|
||||
def peek_back(self) -> filenamelexer.Item:
|
||||
if int(self.pos) == 0:
|
||||
return eof
|
||||
|
||||
return self.input[self.pos - 1]
|
||||
|
||||
# Backup steps back one Item.
|
||||
def backup(self):
|
||||
self.pos -= 1
|
||||
|
||||
def run(self):
|
||||
self.state = parse
|
||||
while self.state is not None:
|
||||
self.state = self.state(self)
|
||||
|
||||
|
||||
def parse(p: Parser):
|
||||
item: filenamelexer.Item = p.get()
|
||||
|
||||
# We're done, time to do final processing
|
||||
if item.typ == filenamelexer.ItemType.EOF:
|
||||
return parse_finish
|
||||
|
||||
# Need to figure out if this is the issue number
|
||||
if item.typ == filenamelexer.ItemType.Number:
|
||||
likely_year = False
|
||||
if p.firstItem and p.first_is_alt:
|
||||
# raise Exception("fuck you")
|
||||
p.alt = True
|
||||
return parse_issue_number
|
||||
|
||||
# The issue number should hopefully not be in parentheses
|
||||
if p.in_something == 0:
|
||||
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
|
||||
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
|
||||
# It is common to use '89 to refer to an annual reprint from 1989
|
||||
if item.val[0] != "'":
|
||||
# Issue number is less than 4 digits. very few series go above 999
|
||||
if len(item.val.lstrip("0")) < 4:
|
||||
# An issue number starting with # Was not found and no previous number was found
|
||||
if p.issue_number_at is None:
|
||||
# Series has already been started/parsed, filters out leading alternate numbers leading alternate number
|
||||
if len(p.series_parts) > 0:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
else:
|
||||
p.operator_rejected.append(item)
|
||||
# operator rejected used later to add back to the series/title
|
||||
|
||||
# It is more likely to be a year if it is inside parentheses.
|
||||
if p.in_something > 0:
|
||||
likely_year = True
|
||||
|
||||
# If numbers are directly followed by text it most likely isn't a year e.g. 2048px
|
||||
if p.peek().typ == filenamelexer.ItemType.Text:
|
||||
likely_year = False
|
||||
|
||||
# Is either a full year '2001' or a short year "'89"
|
||||
if len(item.val) == 4 or item.val[0] == "'":
|
||||
if p.in_something == 0:
|
||||
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
||||
p.series_parts.append(item)
|
||||
|
||||
# Look for a full date as in 2022-04-22
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op = [p.get()]
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
month = p.get()
|
||||
if p.peek().typ in [
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Dot,
|
||||
]:
|
||||
op.append(p.get())
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
day = p.get()
|
||||
fulldate = [month, day, item]
|
||||
p.used_items.extend(op)
|
||||
p.used_items.extend(fulldate)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
else:
|
||||
p.backup()
|
||||
# TODO never happens
|
||||
|
||||
p.year_candidates.append((likely_year, item))
|
||||
# Ensures that IG-88 gets added back to the series/title
|
||||
elif (
|
||||
p.in_something == 0
|
||||
and p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
p.get()
|
||||
return parse_series
|
||||
|
||||
# Number with a leading hash e.g. #003
|
||||
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_issue_number
|
||||
|
||||
# Matches FCBD. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.FCBD:
|
||||
p.filename_info["fcbd"] = True
|
||||
|
||||
# Matches c2c. Not added to p.used_items so it will show in "remainder"
|
||||
elif item.typ == filenamelexer.ItemType.C2C:
|
||||
p.filename_info["c2c"] = True
|
||||
|
||||
# Matches the extension if it is known to be an archive format e.g. cbt,cbz,zip,rar
|
||||
elif item.typ == filenamelexer.ItemType.ArchiveType:
|
||||
p.filename_info["archive"] = item.val.lower()
|
||||
p.used_items.append(item)
|
||||
if p.peek_back().typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(p.peek_back())
|
||||
|
||||
# Allows removing DC from 'Wonder Woman 49 DC Sep-Oct 1951' dependent on publisher being in a static list in the lexer
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
p.used_items.append(item)
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
p.publisher_removed.append(item)
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Attempts to identify the type e.g. annual
|
||||
elif item.typ == filenamelexer.ItemType.ComicType:
|
||||
series_append = True
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
if p.series_parts and "free comic book" in (" ".join([x.val for x in p.series_parts]) + " " + item.val).lower():
|
||||
p.filename_info["fcbd"] = True
|
||||
series_append = True
|
||||
# If the next item is a number it's probably the volume
|
||||
elif p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
number = p.get()
|
||||
# Mark volume info. Text will be added to the title/series later
|
||||
if item.val.lower() in ["book", "tpb"]:
|
||||
p.title_parts.extend([item, number])
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.filename_info["issue"] = t2do.convert(number.val)
|
||||
|
||||
p.used_items.append(item)
|
||||
series_append = False
|
||||
|
||||
# Annuals usually mean the year
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
num = t2d.convert(number.val)
|
||||
if num.isnumeric() and len(num) == 4:
|
||||
p.year_candidates.append((True, number))
|
||||
else:
|
||||
p.backup()
|
||||
|
||||
elif item.val.lower() in ["annual"]:
|
||||
p.filename_info["annual"] = True
|
||||
|
||||
# If we don't have a reason to exclude it from the series go back to parsing the series immediately
|
||||
if series_append:
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
return parse_series
|
||||
|
||||
# We found text, it's probably the title or series
|
||||
elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
|
||||
# Usually the word 'of' eg 1 (of 6)
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
return parse_info_specifier
|
||||
|
||||
# Operator is a symbol that acts as some sort of separator eg - : ;
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
if p.in_something == 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# Filter out Month and day names in filename
|
||||
elif item.typ == filenamelexer.ItemType.Calendar:
|
||||
# Month and day are currently irrelevant if they are inside parentheses e.g. (January 2002)
|
||||
if p.in_something > 0:
|
||||
p.irrelevant.append(item)
|
||||
|
||||
# assume Sep-Oct is not useful in the series/title
|
||||
elif p.peek().typ in [filenamelexer.ItemType.Symbol, filenamelexer.ItemType.Operator]:
|
||||
p.get()
|
||||
if p.peek().typ == filenamelexer.ItemType.Calendar:
|
||||
p.irrelevant.extend([item, p.input[p.pos], p.get()])
|
||||
else:
|
||||
p.backup()
|
||||
return parse_series
|
||||
# This is text that just happens to also be a month/day
|
||||
else:
|
||||
return parse_series
|
||||
|
||||
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.skip = True
|
||||
|
||||
# Keeping track of parentheses depth
|
||||
elif item.typ == filenamelexer.ItemType.LeftParen:
|
||||
p.in_paren += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftBrace:
|
||||
p.in_brace += 1
|
||||
p.in_something += 1
|
||||
elif item.typ == filenamelexer.ItemType.LeftSBrace:
|
||||
p.in_s_brace += 1
|
||||
p.in_something += 1
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.RightParen:
|
||||
p.in_paren -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightBrace:
|
||||
p.in_brace -= 1
|
||||
p.in_something -= 1
|
||||
elif item.typ == filenamelexer.ItemType.RightSBrace:
|
||||
p.in_s_brace -= 1
|
||||
p.in_something -= 1
|
||||
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
|
||||
# Brace management, I don't like negative numbers
|
||||
if p.in_paren < 0:
|
||||
p.in_something += p.in_paren * -1
|
||||
if p.in_brace < 0:
|
||||
p.in_something += p.in_brace * -1
|
||||
if p.in_s_brace < 0:
|
||||
p.in_something += p.in_s_brace * -1
|
||||
|
||||
return parse
|
||||
|
||||
|
||||
# TODO: What about more esoteric numbers???
|
||||
def parse_issue_number(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["alternate"] += "," + item.val
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] = item.val
|
||||
else:
|
||||
p.filename_info["issue"] = item.val
|
||||
p.issue_number_at = item.pos
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ == filenamelexer.ItemType.Dot:
|
||||
p.used_items.append(item)
|
||||
item = p.get()
|
||||
if item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Number]:
|
||||
if p.alt:
|
||||
p.filename_info["alternate"] += "." + item.val
|
||||
else:
|
||||
p.filename_info["issue"] += "." + item.val
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
p.backup()
|
||||
else:
|
||||
p.backup()
|
||||
p.alt = False
|
||||
return parse
|
||||
|
||||
|
||||
def parse_series(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
series[0].append(item)
|
||||
|
||||
current_part = 0
|
||||
|
||||
title_parts: list[filenamelexer.Item] = []
|
||||
series_parts: list[filenamelexer.Item] = []
|
||||
|
||||
prev_space = False
|
||||
|
||||
# 'free comic book day' screws things up. #TODO look into removing book from ComicType?
|
||||
|
||||
# We stop parsing the series when certain things come up if nothing was done with them continue where we left off
|
||||
if (
|
||||
p.series_parts
|
||||
and p.series_parts[-1].val.lower() == "book"
|
||||
or p.peek_back().typ == filenamelexer.ItemType.Number
|
||||
or item.typ == filenamelexer.ItemType.Calendar
|
||||
):
|
||||
series_parts = p.series_parts
|
||||
p.series_parts = []
|
||||
# Skip is only true if we have come across '--' or '__'
|
||||
while not p.skip:
|
||||
item = p.get()
|
||||
|
||||
# Spaces are evil
|
||||
if item.typ == filenamelexer.ItemType.Space:
|
||||
prev_space = True
|
||||
continue
|
||||
if item.typ in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
filenamelexer.ItemType.Publisher,
|
||||
filenamelexer.ItemType.Honorific,
|
||||
]:
|
||||
series[current_part].append(item)
|
||||
if item.typ == filenamelexer.ItemType.Honorific and p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
series[current_part].append(p.get())
|
||||
elif item.typ == filenamelexer.ItemType.Publisher:
|
||||
p.filename_info["publisher"] = item.val
|
||||
|
||||
# Handle Volume
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
# Exception for 'of'
|
||||
if item.val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
else:
|
||||
# This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
|
||||
while len(series[current_part]) > 0 and series[current_part][-1].typ not in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
p.irrelevant.append(series[current_part].pop())
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Operator:
|
||||
peek = p.peek()
|
||||
# ': ' separates the title from the series, only the last section is considered the title
|
||||
if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
series.append([]) # Starts a new section
|
||||
series[current_part].append(item)
|
||||
current_part += 1
|
||||
else:
|
||||
# Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
|
||||
if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
item.val = " " + item.val + " "
|
||||
series[current_part].append(item)
|
||||
|
||||
# Stop processing series/title if a skip item is found
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Number:
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
# We have 2 numbers, add the first to the series and then go back to parse
|
||||
if p.peek().typ == filenamelexer.ItemType.Number:
|
||||
series[current_part].append(item)
|
||||
break
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
p.backup() # The number
|
||||
break
|
||||
# This is 6 in '1 of 6'
|
||||
if series[current_part] and series[current_part][-1].val.lower() == "of":
|
||||
series[current_part].append(item)
|
||||
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
|
||||
else:
|
||||
# Ensure 'ms. marvel' parses 'ms.' correctly
|
||||
if item.typ == filenamelexer.ItemType.Dot and p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif item.typ == filenamelexer.ItemType.Dot and p.peek().typ == filenamelexer.ItemType.Text:
|
||||
# Marks the dot as used so that the remainder is clean
|
||||
p.used_items.append(item)
|
||||
else:
|
||||
p.backup()
|
||||
break
|
||||
|
||||
prev_space = False
|
||||
|
||||
# We have a title separator e.g. ': "
|
||||
if len(series) > 1:
|
||||
title_parts.extend(series.pop())
|
||||
for s in series:
|
||||
if s and s[-1].typ == filenamelexer.ItemType.Operator:
|
||||
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
||||
series_parts.extend(s)
|
||||
p.used_items.append(series_parts.pop())
|
||||
else:
|
||||
series_parts.extend(series[0])
|
||||
|
||||
# If the series has already been set assume all of this is the title.
|
||||
if len(p.series_parts) > 0:
|
||||
p.title_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
else:
|
||||
p.series_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
return parse
|
||||
|
||||
|
||||
def resolve_year(p: Parser):
|
||||
if len(p.year_candidates) > 0:
|
||||
# Sort by likely_year boolean
|
||||
p.year_candidates.sort(key=itemgetter(0))
|
||||
|
||||
# Take the last year e.g. (2007) 2099 (2008) becomes 2099 2007 2008 and takes 2008
|
||||
selected_year = p.year_candidates.pop()[1]
|
||||
|
||||
p.filename_info["year"] = selected_year.val
|
||||
p.used_items.append(selected_year)
|
||||
|
||||
# (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
|
||||
if "volume" not in p.filename_info and p.year_candidates and p.year_candidates[-1][0]:
|
||||
vol = p.year_candidates.pop()[1]
|
||||
p.filename_info["volume"] = vol.val
|
||||
p.used_items.append(vol)
|
||||
|
||||
# Remove volume from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
# Remove year from series and title
|
||||
if selected_year in p.series_parts:
|
||||
p.series_parts.remove(selected_year)
|
||||
if selected_year in p.title_parts:
|
||||
p.title_parts.remove(selected_year)
|
||||
|
||||
|
||||
def parse_finish(p: Parser):
|
||||
resolve_year(p)
|
||||
|
||||
# If we don't have an issue try to find it in the series
|
||||
if "issue" not in p.filename_info and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
|
||||
issue_num = p.series_parts.pop()
|
||||
|
||||
# If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
|
||||
if issue_num in [x[1] for x in p.year_candidates]:
|
||||
p.series_parts.append(issue_num)
|
||||
else:
|
||||
# If this number was rejected because of an operator and the operator is still there add it back e.g. 'IG-88'
|
||||
if (
|
||||
issue_num in p.operator_rejected
|
||||
and p.series_parts
|
||||
and p.series_parts[-1].typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
p.series_parts.append(issue_num)
|
||||
# We have no reason to not use this number as the issue number. Specifically happens when parsing 'X-Men-V1-067.cbr'
|
||||
else:
|
||||
p.filename_info["issue"] = issue_num.val
|
||||
p.used_items.append(issue_num)
|
||||
p.issue_number_at = issue_num.pos
|
||||
|
||||
# Remove publishers, currently only marvel and dc are defined,
|
||||
# this is an option specifically because this can drastically screw up parsing
|
||||
if p.remove_publisher:
|
||||
for item in p.publisher_removed:
|
||||
if item in p.series_parts:
|
||||
p.series_parts.remove(item)
|
||||
if item in p.title_parts:
|
||||
p.title_parts.remove(item)
|
||||
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
p.used_items.extend(p.series_parts)
|
||||
|
||||
p.filename_info["title"] = join_title(p.title_parts)
|
||||
p.used_items.extend(p.title_parts)
|
||||
|
||||
if "issue" in p.filename_info:
|
||||
p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
|
||||
|
||||
if "volume" in p.filename_info:
|
||||
p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
|
||||
|
||||
if "issue" not in p.filename_info:
|
||||
# We have an alternate move it to the issue
|
||||
if "alternate" in p.filename_info:
|
||||
p.filename_info["issue"] = p.filename_info["alternate"]
|
||||
p.filename_info["alternate"] = ""
|
||||
else:
|
||||
# TODO: This never happens
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items and x.typ != eof.typ]
|
||||
if len(inp) == 1 and inp[0].typ == filenamelexer.ItemType.Number:
|
||||
p.filename_info["issue"] = inp[0].val
|
||||
p.used_items.append(inp[0])
|
||||
|
||||
remove_items = []
|
||||
if p.remove_fcbd:
|
||||
remove_items.append(filenamelexer.ItemType.FCBD)
|
||||
if p.remove_c2c:
|
||||
remove_items.append(filenamelexer.ItemType.C2C)
|
||||
|
||||
p.irrelevant.extend([x for x in p.input if x.typ in remove_items])
|
||||
|
||||
p.filename_info["remainder"] = get_remainder(p)
|
||||
|
||||
# Ensure keys always exist
|
||||
for s in [
|
||||
"alternate",
|
||||
"issue",
|
||||
"archive",
|
||||
"series",
|
||||
"title",
|
||||
"volume",
|
||||
"year",
|
||||
"remainder",
|
||||
"issue_count",
|
||||
"volume_count",
|
||||
"publisher",
|
||||
]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = ""
|
||||
for s in ["fcbd", "c2c", "annual"]:
|
||||
if s not in p.filename_info:
|
||||
p.filename_info[s] = False
|
||||
|
||||
|
||||
def get_remainder(p: Parser):
|
||||
remainder = ""
|
||||
rem = []
|
||||
|
||||
# Remove used items and irrelevant items e.g. the series and useless operators
|
||||
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items]
|
||||
for i, item in enumerate(inp):
|
||||
# No double space or space next to parentheses
|
||||
if item.typ in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Skip]:
|
||||
if (
|
||||
i > 0
|
||||
and inp[i - 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
and i + 1 < len(inp)
|
||||
and inp[i + 1].typ
|
||||
not in [
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
):
|
||||
remainder += " "
|
||||
|
||||
# Strip off useless opening parenthesis
|
||||
elif (
|
||||
item.typ
|
||||
in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
]
|
||||
and i > 0
|
||||
and inp[i - 1].typ
|
||||
in [
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
]
|
||||
):
|
||||
remainder = remainder.rstrip("[{(")
|
||||
continue
|
||||
|
||||
# Add the next item
|
||||
else:
|
||||
rem.append(item)
|
||||
remainder += item.val
|
||||
|
||||
# Remove empty parentheses
|
||||
remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
|
||||
return remainder.strip()
|
||||
|
||||
|
||||
def parse_info_specifier(p: Parser):
|
||||
item = p.input[p.pos]
|
||||
index = p.pos
|
||||
|
||||
if p.peek().typ == filenamelexer.ItemType.Space:
|
||||
p.get()
|
||||
|
||||
# Handles 'book 3' and 'book three'
|
||||
if p.peek().typ == filenamelexer.ItemType.Number or (
|
||||
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
||||
):
|
||||
|
||||
number = p.get()
|
||||
if item.val.lower() in ["volume", "vol", "vol.", "v"]:
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# 'of' is only special if it is inside a parenthesis.
|
||||
elif item.val.lower() == "of":
|
||||
i = get_number(p, index)
|
||||
if p.in_something > 0:
|
||||
if p.issue_number_at is None:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is definitely the issue number
|
||||
elif p.issue_number_at == i.pos:
|
||||
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is not for the issue number it is not in either the issue or the title, assume it is the volume number and count
|
||||
elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
|
||||
p.filename_info["volume"] = i.val
|
||||
p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
|
||||
p.used_items.append(i)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
else:
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
pass
|
||||
else:
|
||||
# Lets 'The Wrath of Foobar-Man, Part 1 of 2' parse correctly as the title
|
||||
if i is not None:
|
||||
p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
|
||||
|
||||
if not p.in_something:
|
||||
return parse_series
|
||||
return parse
|
||||
|
||||
|
||||
# Gets 03 in '03 of 6'
|
||||
def get_number(p: Parser, index: int):
|
||||
# Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
||||
rev = p.input[:index]
|
||||
rev.reverse()
|
||||
for i in rev:
|
||||
# We don't care about these types, we are looking to see if there is a number that is possibly different from the issue number for this count
|
||||
if i.typ in [
|
||||
filenamelexer.ItemType.LeftParen,
|
||||
filenamelexer.ItemType.LeftBrace,
|
||||
filenamelexer.ItemType.LeftSBrace,
|
||||
filenamelexer.ItemType.Space,
|
||||
]:
|
||||
continue
|
||||
if i.typ == filenamelexer.ItemType.Number:
|
||||
# We got our number, time to leave
|
||||
return i
|
||||
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def join_title(lst: list[filenamelexer.Item]):
|
||||
title = ""
|
||||
for i, item in enumerate(lst):
|
||||
if i + 1 == len(lst) and item.val == ",": # We ignore commas on the end
|
||||
continue
|
||||
title += item.val # Add the next item
|
||||
# No space after operators
|
||||
if item.typ == filenamelexer.ItemType.Operator:
|
||||
continue
|
||||
# No trailing space
|
||||
if i == len(lst) - 1:
|
||||
continue
|
||||
# No space after honorifics with a dot
|
||||
if item.typ == filenamelexer.ItemType.Honorific and lst[i + 1].typ == filenamelexer.ItemType.Dot:
|
||||
continue
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
title += " "
|
||||
|
||||
return title
|
||||
|
||||
|
||||
def Parse(
|
||||
lexer_result: list[filenamelexer.Item],
|
||||
first_is_alt=False,
|
||||
remove_c2c=False,
|
||||
remove_fcbd=False,
|
||||
remove_publisher=False,
|
||||
):
|
||||
p = Parser(
|
||||
lexer_result=lexer_result,
|
||||
first_is_alt=first_is_alt,
|
||||
remove_c2c=remove_c2c,
|
||||
remove_fcbd=remove_fcbd,
|
||||
remove_publisher=remove_publisher,
|
||||
)
|
||||
p.run()
|
||||
return p
|
||||
|
Loading…
x
Reference in New Issue
Block a user