Merge branch 'lexNumbers' into develop
This commit is contained in:
commit
b23c3195e3
@ -30,10 +30,10 @@ class ItemType(Enum):
|
||||
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
|
||||
ArchiveType = auto()
|
||||
Honorific = auto()
|
||||
Publisher = auto()
|
||||
Keywords = auto()
|
||||
FCBD = auto()
|
||||
ComicType = auto()
|
||||
Publisher = auto()
|
||||
C2C = auto()
|
||||
|
||||
|
||||
@ -130,17 +130,25 @@ class Lexer:
|
||||
self.start = self.pos
|
||||
|
||||
# Accept consumes the next rune if it's from the valid se:
|
||||
def accept(self, valid: str) -> bool:
|
||||
if self.get() in valid:
|
||||
return True
|
||||
def accept(self, valid: str | Callable[[str], bool]) -> bool:
|
||||
if isinstance(valid, str):
|
||||
if self.get() in valid:
|
||||
return True
|
||||
else:
|
||||
if valid(self.get()):
|
||||
return True
|
||||
|
||||
self.backup()
|
||||
return False
|
||||
|
||||
# AcceptRun consumes a run of runes from the valid set.
|
||||
def accept_run(self, valid: str) -> None:
|
||||
while self.get() in valid:
|
||||
continue
|
||||
def accept_run(self, valid: str | Callable[[str], bool]) -> None:
|
||||
if isinstance(valid, str):
|
||||
while self.get() in valid:
|
||||
continue
|
||||
else:
|
||||
while valid(self.get()):
|
||||
continue
|
||||
|
||||
self.backup()
|
||||
|
||||
@ -150,9 +158,7 @@ class Lexer:
|
||||
self.accept_run(digits)
|
||||
if self.input[self.pos] == ".":
|
||||
self.backup()
|
||||
while self.get().isalpha():
|
||||
...
|
||||
self.backup()
|
||||
self.accept_run(str.isalpha)
|
||||
|
||||
return True
|
||||
|
||||
@ -189,14 +195,17 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r.isnumeric() and lex.pos > 0 and is_space(lex.input[lex.pos - 1]):
|
||||
return lex_number
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
elif r == "'":
|
||||
r = lex.peek()
|
||||
if r.isdigit():
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
lex.accept_run(is_symbol)
|
||||
lex.emit(ItemType.Symbol)
|
||||
elif r.isnumeric():
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
@ -241,13 +250,28 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
if lex.sbrace_depth < 0:
|
||||
return errorf(lex, "unexpected right brace " + r)
|
||||
elif is_symbol(r):
|
||||
if unicodedata.category(r) == "Sc":
|
||||
return lex_currency
|
||||
lex.accept_run(is_symbol)
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
return errorf(lex, "unrecognized character in action: " + r)
|
||||
return errorf(lex, "unrecognized character in action: " + repr(r))
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_currency(lex: Lexer) -> Callable:
|
||||
orig = lex.pos
|
||||
lex.accept_run(is_space)
|
||||
if lex.peek().isnumeric():
|
||||
return lex_number
|
||||
else:
|
||||
lex.pos = orig
|
||||
# We don't have a number with this currency symbol. Don't treat it special
|
||||
lex.emit(ItemType.Symbol)
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_operator(lex: Lexer) -> Callable: # type: ignore[type-arg]
|
||||
lex.accept_run("-|:;")
|
||||
lex.emit(ItemType.Operator)
|
||||
@ -257,8 +281,7 @@ def lex_operator(lex: Lexer) -> Callable: # type: ignore[type-arg]
|
||||
# LexSpace scans a run of space characters.
|
||||
# One space has already been seen.
|
||||
def lex_space(lex: Lexer) -> Callable: # type: ignore[type-arg]
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
lex.accept_run(is_space)
|
||||
|
||||
lex.emit(ItemType.Space)
|
||||
return lex_filename
|
||||
@ -315,7 +338,37 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type
|
||||
# Assume that 80th is just text and not a number
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.emit(ItemType.Number)
|
||||
# Used to check for a '$'
|
||||
endNumber = lex.pos
|
||||
|
||||
# Consume any spaces
|
||||
lex.accept_run(is_space)
|
||||
|
||||
# This number starts with a '$' emit it as Text instead of a Number
|
||||
if "Sc" == unicodedata.category(lex.input[lex.start]):
|
||||
lex.pos = endNumber
|
||||
lex.emit(ItemType.Text)
|
||||
|
||||
# This number ends in a '$' if there is a number on the other side we assume it belongs to the following number
|
||||
elif "Sc" == unicodedata.category(lex.get()):
|
||||
# Store the end of the number '$'. We still need to check to see if there is a number coming up
|
||||
endCurrency = lex.pos
|
||||
# Consume any spaces
|
||||
lex.accept_run(is_space)
|
||||
|
||||
# This is a number
|
||||
if lex.peek().isnumeric():
|
||||
# We go back to the original number before the '$' and emit a number
|
||||
lex.pos = endNumber
|
||||
lex.emit(ItemType.Number)
|
||||
else:
|
||||
# There was no following number, reset to the '$' and emit a number
|
||||
lex.pos = endCurrency
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
# We go back to the original number there is no '$'
|
||||
lex.pos = endNumber
|
||||
lex.emit(ItemType.Number)
|
||||
|
||||
return lex_filename
|
||||
|
||||
@ -323,21 +376,13 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type
|
||||
def lex_issue_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type: ignore[type-arg]
|
||||
# Only called when lex.input[lex.start] == "#"
|
||||
original_start = lex.pos
|
||||
found_number = False
|
||||
while True:
|
||||
r = lex.get()
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric():
|
||||
found_number = True
|
||||
else:
|
||||
lex.backup()
|
||||
break
|
||||
lex.accept_run(str.isalpha)
|
||||
|
||||
if not found_number:
|
||||
if lex.peek().isnumeric():
|
||||
return lex_number
|
||||
else:
|
||||
lex.pos = original_start
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
lex.emit(ItemType.IssueNumber)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
@ -20,6 +20,8 @@ This should probably be re-written, but, well, it mostly works!
|
||||
# http://code.google.com/p/pycomicmetathis/
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
@ -389,6 +391,7 @@ class Parser:
|
||||
self.in_s_brace = 0 # In []
|
||||
self.in_paren = 0 # In ()
|
||||
self.year_candidates: list[tuple[bool, bool, filenamelexer.Item]] = []
|
||||
self.series: list[list[filenamelexer.Item]] = []
|
||||
self.series_parts: list[filenamelexer.Item] = []
|
||||
self.title_parts: list[filenamelexer.Item] = []
|
||||
self.used_items: list[filenamelexer.Item] = []
|
||||
@ -424,11 +427,11 @@ class Parser:
|
||||
return self.input[self.pos]
|
||||
|
||||
# Peek returns but does not consume the next Item in the input.
|
||||
def peek(self) -> filenamelexer.Item:
|
||||
if int(self.pos) >= len(self.input) - 1:
|
||||
def peek(self, length: int = 1) -> filenamelexer.Item:
|
||||
if int(self.pos) + length >= len(self.input):
|
||||
return eof
|
||||
|
||||
return self.input[self.pos + 1]
|
||||
return self.input[self.pos + length]
|
||||
|
||||
# Peek_back returns but does not step back the previous Item in the input.
|
||||
def peek_back(self, length: int = 1) -> filenamelexer.Item:
|
||||
@ -473,7 +476,7 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
if p.issue_number_at is None:
|
||||
# Series has already been started/parsed,
|
||||
# filters out leading alternate numbers leading alternate number
|
||||
if len(p.series_parts) > 0:
|
||||
if len(p.series) > 0:
|
||||
return parse_issue_number
|
||||
else:
|
||||
p.operator_rejected.append(item)
|
||||
@ -490,12 +493,9 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
|
||||
# Is either a full year '2001' or a short year "'89"
|
||||
if len(item.val.lstrip("0")) == 4 or item.val[0] == "'":
|
||||
series = " ".join([x.val for x in p.series_parts])
|
||||
if p.series_parts and series.casefold().endswith("free comic book day"):
|
||||
series = " ".join([x.val for x in (p.series[-1] if p.series else [])])
|
||||
if p.series and series.casefold().endswith("free comic book day"):
|
||||
likely_issue_number = False
|
||||
if p.in_something == 0:
|
||||
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
||||
p.series_parts.append(item)
|
||||
|
||||
# Look for a full date as in 2022-04-22
|
||||
if p.peek().typ in [
|
||||
@ -533,24 +533,19 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
|
||||
likely_issue_number = likely_issue_number and item.val[0] != "'"
|
||||
p.year_candidates.append((likely_year, likely_issue_number, item))
|
||||
if p.in_something == 0:
|
||||
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
||||
if not p.series:
|
||||
p.series.append([])
|
||||
p.series[-1].append(item)
|
||||
|
||||
# We would use i=item but we want to force a split after year candidates
|
||||
return functools.partial(parse_series, i=None)
|
||||
# Ensures that IG-88 gets added back to the series/title
|
||||
else:
|
||||
if p.in_something == 0:
|
||||
if p.peek_back().typ in (filenamelexer.ItemType.IssueNumber, filenamelexer.ItemType.Number) or (
|
||||
p.peek_back().typ == filenamelexer.ItemType.Space
|
||||
and p.peek_back(2).typ in (filenamelexer.ItemType.IssueNumber, filenamelexer.ItemType.Number)
|
||||
):
|
||||
return parse_series
|
||||
if (
|
||||
p.peek_back().typ == filenamelexer.ItemType.Operator
|
||||
or p.peek().typ == filenamelexer.ItemType.Operator
|
||||
):
|
||||
# Were not in something and the next or previous type is an operator, add it to the series
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
|
||||
p.get()
|
||||
return parse_series
|
||||
# We're not in something add it to the series
|
||||
return functools.partial(parse_series, i=item)
|
||||
|
||||
# Number with a leading hash e.g. #003
|
||||
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
@ -583,10 +578,10 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=item)
|
||||
p.publisher_removed.append(item)
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=item)
|
||||
|
||||
# Attempts to identify the type e.g. annual
|
||||
elif item.typ == filenamelexer.ItemType.ComicType:
|
||||
@ -601,7 +596,7 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
number = p.get()
|
||||
# Mark volume info. Text will be added to the title/series later
|
||||
if item.val.casefold() in ["tpb"]:
|
||||
p.title_parts.extend([item, number])
|
||||
# p.title_parts.extend([item, number])
|
||||
p.filename_info["volume"] = t2do.convert(number.val)
|
||||
p.filename_info["issue"] = t2do.convert(number.val)
|
||||
|
||||
@ -622,19 +617,19 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
|
||||
# If we don't have a reason to exclude it from the series go back to parsing the series immediately
|
||||
if series_append:
|
||||
p.series_parts.append(item)
|
||||
p.used_items.append(item)
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=item)
|
||||
|
||||
# We found text, it's probably the title or series
|
||||
elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
|
||||
# Unset first item
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
if p.in_something == 0:
|
||||
return parse_series
|
||||
if p.in_something == 0 and not p.skip:
|
||||
p.backup()
|
||||
return functools.partial(parse_series, i=None)
|
||||
|
||||
# Usually the word 'of' eg 1 (of 6)
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
@ -662,15 +657,13 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign
|
||||
p.backup()
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=item)
|
||||
# This is text that just happens to also be a month/day
|
||||
else:
|
||||
# Add this to the series and get the next item, parse_series expects the next item to be the current item
|
||||
p.series_parts.append(item)
|
||||
p.get()
|
||||
if p.firstItem:
|
||||
p.firstItem = False
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=item)
|
||||
|
||||
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
@ -745,26 +738,29 @@ def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None:
|
||||
return parse
|
||||
|
||||
|
||||
def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
||||
item = p.input[p.pos]
|
||||
current_part = 0
|
||||
# i=None is a split in the series
|
||||
def parse_series(p: Parser, i: filenamelexer.Item | None) -> Callable[[Parser], Callable | None] | None:
|
||||
current = []
|
||||
prev_space = False
|
||||
|
||||
title_parts: list[filenamelexer.Item] = []
|
||||
series_parts: list[filenamelexer.Item] = []
|
||||
series: list[list[filenamelexer.Item]] = [[]]
|
||||
issue_marked_or_passed = (
|
||||
p.issue_number_marked and p.issue_number_passed or p.issue_number_at is not None and not p.issue_number_marked
|
||||
)
|
||||
|
||||
# We stop parsing the series when certain things come up if nothing was done with them continue where we left off
|
||||
if p.peek_back().typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.Calendar]:
|
||||
series_parts = p.series_parts
|
||||
p.series_parts = []
|
||||
if i:
|
||||
if not issue_marked_or_passed:
|
||||
if p.series:
|
||||
current = p.series.pop()
|
||||
current.append(i)
|
||||
else:
|
||||
# If we are splitting we don't want to sart with these
|
||||
while p.peek().typ in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.Operator,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
p.irrelevant.append(p.get())
|
||||
|
||||
# Space and Dots are not useful at the beginning of a title/series
|
||||
if not p.skip and item.typ not in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Dot]:
|
||||
if item.typ == filenamelexer.ItemType.Text:
|
||||
p.backup()
|
||||
else:
|
||||
series[0].append(item)
|
||||
# Skip is only true if we have come across '--' or '__'
|
||||
while not p.skip:
|
||||
item = p.get()
|
||||
@ -779,14 +775,14 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
filenamelexer.ItemType.Publisher,
|
||||
filenamelexer.ItemType.Honorific,
|
||||
]:
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
if p.peek().typ == filenamelexer.ItemType.Dot:
|
||||
dot = p.get()
|
||||
if item.typ == filenamelexer.ItemType.Honorific or (
|
||||
p.peek().typ == filenamelexer.ItemType.Space
|
||||
and item.typ in (filenamelexer.ItemType.Text, filenamelexer.ItemType.Publisher)
|
||||
):
|
||||
series[current_part].append(dot)
|
||||
current.append(dot)
|
||||
else:
|
||||
p.backup()
|
||||
if item.typ == filenamelexer.ItemType.Publisher:
|
||||
@ -796,14 +792,14 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
||||
# Exception for 'of'
|
||||
if item.val.casefold() == "of":
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
else:
|
||||
# This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
|
||||
while len(series[current_part]) > 0 and series[current_part][-1].typ not in [
|
||||
while len(current) > 0 and current[-1].typ not in [
|
||||
filenamelexer.ItemType.Text,
|
||||
filenamelexer.ItemType.Symbol,
|
||||
]:
|
||||
p.irrelevant.append(series[current_part].pop())
|
||||
p.irrelevant.append(current.pop())
|
||||
p.backup()
|
||||
break
|
||||
|
||||
@ -811,14 +807,13 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
peek = p.peek()
|
||||
# ': ' separates the title from the series, only the last section is considered the title
|
||||
if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
series.append([]) # Starts a new section
|
||||
series[current_part].append(item)
|
||||
current_part += 1
|
||||
current.append(item)
|
||||
break
|
||||
else:
|
||||
# Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
|
||||
if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
||||
item.val = " " + item.val + " "
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
|
||||
# Stop processing series/title if a skip item is found
|
||||
elif item.typ == filenamelexer.ItemType.Skip:
|
||||
@ -827,14 +822,18 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
|
||||
elif item.typ == filenamelexer.ItemType.Number:
|
||||
# Special case for the word 'book'
|
||||
if series[current_part] and series[current_part][-1].val.casefold() == "book":
|
||||
title_parts.append(series[current_part].pop())
|
||||
title_parts.append(item)
|
||||
if current and current[-1].val.casefold() == "book":
|
||||
# Mark the volume
|
||||
p.filename_info["volume"] = t2do.convert(item.val)
|
||||
|
||||
# Add this section to the series EG [['bloodshot', 'book']]
|
||||
p.series.append(current)
|
||||
# Pop the last item and break to end this section EG [['bloodshot'], ['book', '3']]
|
||||
current = [current.pop(), item]
|
||||
break
|
||||
|
||||
count = get_number(p, p.pos + 1)
|
||||
# this is an issue or volume number
|
||||
# this is an issue or volume number eg '1 of 2'
|
||||
if count is not None:
|
||||
p.backup()
|
||||
break
|
||||
@ -843,18 +842,13 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
p.get()
|
||||
# We have 2 numbers, add the first to the series and then go back to parse
|
||||
if p.peek().typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
break
|
||||
|
||||
# the issue number has been marked and passed, keep it as a part of the series
|
||||
if (
|
||||
p.issue_number_marked
|
||||
and p.issue_number_passed
|
||||
or p.issue_number_at is not None
|
||||
and not p.issue_number_marked
|
||||
):
|
||||
if issue_marked_or_passed:
|
||||
# We already have an issue number, this should be a part of the series
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
else:
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
p.backup() # Whitespace
|
||||
@ -864,14 +858,9 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
# We have 1 number break here, it's possible it's the issue
|
||||
else:
|
||||
# the issue number has been #marked or passed, keep it as a part of the series
|
||||
if (
|
||||
p.issue_number_marked
|
||||
and p.issue_number_passed
|
||||
or p.issue_number_at is not None
|
||||
and not p.issue_number_marked
|
||||
):
|
||||
if issue_marked_or_passed:
|
||||
# We already have an issue number, this should be a part of the series
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
else:
|
||||
p.backup() # The number
|
||||
break
|
||||
@ -880,13 +869,13 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
# Ensure 'ms. marvel' parses 'ms.' correctly
|
||||
if item.typ == filenamelexer.ItemType.Dot:
|
||||
if p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
elif (
|
||||
p.peek().typ == filenamelexer.ItemType.Number
|
||||
or p.peek_back().typ == filenamelexer.ItemType.Text
|
||||
and len(p.peek_back().val) == 1
|
||||
):
|
||||
series[current_part].append(item)
|
||||
current.append(item)
|
||||
item.no_space = True
|
||||
# Allows avengers.hulk to parse correctly
|
||||
elif p.peek().typ in (filenamelexer.ItemType.Text,):
|
||||
@ -898,24 +887,7 @@ def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
|
||||
prev_space = False
|
||||
|
||||
# We have a title separator e.g. ': "
|
||||
if len(series) > 1:
|
||||
title_parts.extend(series.pop())
|
||||
for s in series:
|
||||
if s and s[-1].typ == filenamelexer.ItemType.Operator:
|
||||
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
||||
series_parts.extend(s)
|
||||
p.used_items.append(series_parts.pop())
|
||||
else:
|
||||
series_parts.extend(series[0])
|
||||
|
||||
# If the series has already been set assume all of this is the title.
|
||||
if len(p.series_parts) > 0:
|
||||
p.title_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
else:
|
||||
p.series_parts.extend(series_parts)
|
||||
p.title_parts.extend(title_parts)
|
||||
p.series.append(current)
|
||||
return parse
|
||||
|
||||
|
||||
@ -1017,7 +989,38 @@ def resolve_issue(p: Parser) -> None:
|
||||
p.filename_info["format"] = protofolius_issue_number_scheme[p.filename_info["issue"][0].upper()]
|
||||
|
||||
|
||||
def split_series(items: list[list[filenamelexer.Item]]) -> tuple[list[filenamelexer.Item], list[filenamelexer.Item]]:
|
||||
series_parts: list[list[filenamelexer.Item]] = []
|
||||
title_parts: list[list[filenamelexer.Item]] = []
|
||||
current = series_parts
|
||||
# We probably have a title
|
||||
if len(items) > 1:
|
||||
for i, s in enumerate(items):
|
||||
# Switch to title if we are on the last part
|
||||
if i == len(items) - 1:
|
||||
current = title_parts
|
||||
if s:
|
||||
current.append(s)
|
||||
if s[-1].typ == filenamelexer.ItemType.Operator:
|
||||
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
||||
else: # We don't have an operator separating the parts, it's probably an issue number
|
||||
current = title_parts
|
||||
else:
|
||||
if items:
|
||||
series_parts.extend(items)
|
||||
|
||||
series: list[filenamelexer.Item] = list(itertools.chain.from_iterable(series_parts))
|
||||
title: list[filenamelexer.Item] = list(itertools.chain.from_iterable(title_parts))
|
||||
if series and series[-1].typ == filenamelexer.ItemType.Operator:
|
||||
series.pop()
|
||||
return series, title
|
||||
|
||||
|
||||
def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
||||
for part in p.series:
|
||||
p.used_items.extend(part)
|
||||
p.series_parts, p.title_parts = split_series(p.series)
|
||||
|
||||
resolve_year(p)
|
||||
resolve_issue(p)
|
||||
|
||||
@ -1032,7 +1035,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
|
||||
if p.series_parts:
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
p.used_items.extend(p.series_parts)
|
||||
else:
|
||||
p.filename_info["series"] = p.filename_info.get("issue", "")
|
||||
|
||||
@ -1040,7 +1042,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty
|
||||
p.filename_info["fcbd"] = True
|
||||
|
||||
p.filename_info["title"] = join_title(p.title_parts)
|
||||
p.used_items.extend(p.title_parts)
|
||||
|
||||
p.irrelevant.extend([x for x in p.input if x.typ in p.remove_from_remainder])
|
||||
|
||||
@ -1137,11 +1138,15 @@ def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | Non
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
|
||||
# This is not for the issue number it is not in either the issue or the title,
|
||||
# assume it is the volume number and count
|
||||
elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
|
||||
# This is not for the issue number
|
||||
# assume it is the volume number and count, remove from series
|
||||
elif p.issue_number_at != i.pos:
|
||||
p.filename_info["volume"] = i.val
|
||||
p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
|
||||
for part in p.series:
|
||||
if i in part:
|
||||
part.remove(i)
|
||||
break
|
||||
p.used_items.append(i)
|
||||
p.used_items.append(item)
|
||||
p.used_items.append(number)
|
||||
@ -1149,11 +1154,12 @@ def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | Non
|
||||
# TODO: Figure out what to do here if it ever happens
|
||||
pass
|
||||
else:
|
||||
# Lets 'The Wrath of Foobar-Man, Part 1 of 2' parse correctly as the title
|
||||
# Resets back to '1' in 'The Wrath of Foobar-Man, Part 1 of 2'
|
||||
# we then go to parse_series it adds i (the '1') and then continues parsing at of
|
||||
p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
|
||||
|
||||
if not p.in_something:
|
||||
return parse_series
|
||||
return functools.partial(parse_series, i=i)
|
||||
return parse
|
||||
|
||||
|
||||
@ -1233,13 +1239,7 @@ def join_title(lst: list[filenamelexer.Item]) -> str:
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [filenamelexer.ItemType.Operator, filenamelexer.ItemType.Symbol]:
|
||||
# exept if followed by a dollarsign
|
||||
if not (
|
||||
(
|
||||
lst[i].typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]
|
||||
and lst[i + 1].val == "$"
|
||||
)
|
||||
or lst[i + 1].val == "&"
|
||||
):
|
||||
if lst[i + 1].val != "&":
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
|
@ -955,6 +955,21 @@ names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [
|
||||
},
|
||||
(True, True),
|
||||
),
|
||||
(
|
||||
"Cory Doctorow's Futuristic Tales of the Here and Now $1$2 3 #0.0.1 (2007).cbz",
|
||||
"$",
|
||||
{
|
||||
"archive": "cbz",
|
||||
"issue": "0.1",
|
||||
"series": "Cory Doctorow's Futuristic Tales of the Here and Now $1 $2 3",
|
||||
"title": "",
|
||||
"volume": "",
|
||||
"year": "2007",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
},
|
||||
(True, True),
|
||||
),
|
||||
]
|
||||
|
||||
oldfnames = []
|
||||
|
Loading…
Reference in New Issue
Block a user