Improve lexing numbers
lex currency amounts as text lex a '.' followed by a number as a number if there is a preceding space
This commit is contained in:
parent
29ddc3779a
commit
f03b2e58cf
@ -30,10 +30,10 @@ class ItemType(Enum):
|
||||
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
|
||||
ArchiveType = auto()
|
||||
Honorific = auto()
|
||||
Publisher = auto()
|
||||
Keywords = auto()
|
||||
FCBD = auto()
|
||||
ComicType = auto()
|
||||
Publisher = auto()
|
||||
C2C = auto()
|
||||
|
||||
|
||||
@ -189,6 +189,8 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
return lex_space
|
||||
elif r == ".":
|
||||
r = lex.peek()
|
||||
if r.isnumeric() and lex.pos > 0 and is_space(lex.input[lex.pos - 1]):
|
||||
return lex_number
|
||||
lex.emit(ItemType.Dot)
|
||||
return lex_filename
|
||||
elif r == "'":
|
||||
@ -196,7 +198,7 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
if r.isdigit():
|
||||
return lex_number
|
||||
lex.emit(ItemType.Text) # TODO: Change to Text
|
||||
elif "0" <= r <= "9":
|
||||
elif r.isnumeric():
|
||||
lex.backup()
|
||||
return lex_number
|
||||
elif r == "#":
|
||||
@ -241,6 +243,8 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
if lex.sbrace_depth < 0:
|
||||
return errorf(lex, "unexpected right brace " + r)
|
||||
elif is_symbol(r):
|
||||
if unicodedata.category(r) == "Sc":
|
||||
return lex_currency
|
||||
lex.emit(ItemType.Symbol)
|
||||
else:
|
||||
return errorf(lex, "unrecognized character in action: " + r)
|
||||
@ -248,6 +252,19 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_currency(lex: Lexer) -> Callable:
|
||||
orig = lex.pos
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
if lex.peek().isnumeric():
|
||||
return lex_number
|
||||
else:
|
||||
lex.pos = orig
|
||||
# We don't have a number with this currency symbol. Don't treat it special
|
||||
lex.emit(ItemType.Symbol)
|
||||
return lex_filename
|
||||
|
||||
|
||||
def lex_operator(lex: Lexer) -> Callable: # type: ignore[type-arg]
|
||||
lex.accept_run("-|:;")
|
||||
lex.emit(ItemType.Operator)
|
||||
@ -315,7 +332,14 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type
|
||||
# Assume that 80th is just text and not a number
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.emit(ItemType.Number)
|
||||
orig = lex.pos
|
||||
while is_space(lex.peek()):
|
||||
lex.get()
|
||||
if "Sc" in [unicodedata.category(lex.input[lex.start]), unicodedata.category(lex.get())]:
|
||||
lex.emit(ItemType.Text)
|
||||
else:
|
||||
lex.pos = orig
|
||||
lex.emit(ItemType.Number)
|
||||
|
||||
return lex_filename
|
||||
|
||||
|
@ -1233,13 +1233,7 @@ def join_title(lst: list[filenamelexer.Item]) -> str:
|
||||
# No space if the next item is an operator or symbol
|
||||
if lst[i + 1].typ in [filenamelexer.ItemType.Operator, filenamelexer.ItemType.Symbol]:
|
||||
# exept if followed by a dollarsign
|
||||
if not (
|
||||
(
|
||||
lst[i].typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]
|
||||
and lst[i + 1].val == "$"
|
||||
)
|
||||
or lst[i + 1].val == "&"
|
||||
):
|
||||
if lst[i + 1].val != "&":
|
||||
continue
|
||||
|
||||
# Add a space
|
||||
|
Loading…
Reference in New Issue
Block a user