Improve lexing numbers

lex currency amounts as text
lex a '.' followed by a number as a number if there is a preceding space
This commit is contained in:
Timmy Welch 2023-10-15 15:47:04 -07:00
parent 29ddc3779a
commit f03b2e58cf
2 changed files with 28 additions and 10 deletions

View File

@ -30,10 +30,10 @@ class ItemType(Enum):
InfoSpecifier = auto() # Specifies type of info e.g. v1 for 'volume': 1
ArchiveType = auto()
Honorific = auto()
Publisher = auto()
Keywords = auto()
FCBD = auto()
ComicType = auto()
Publisher = auto()
C2C = auto()
@ -189,6 +189,8 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
return lex_space
elif r == ".":
r = lex.peek()
if r.isnumeric() and lex.pos > 0 and is_space(lex.input[lex.pos - 1]):
return lex_number
lex.emit(ItemType.Dot)
return lex_filename
elif r == "'":
@ -196,7 +198,7 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
if r.isdigit():
return lex_number
lex.emit(ItemType.Text) # TODO: Change to Text
elif "0" <= r <= "9":
elif r.isnumeric():
lex.backup()
return lex_number
elif r == "#":
@ -241,6 +243,8 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
if lex.sbrace_depth < 0:
return errorf(lex, "unexpected right brace " + r)
elif is_symbol(r):
if unicodedata.category(r) == "Sc":
return lex_currency
lex.emit(ItemType.Symbol)
else:
return errorf(lex, "unrecognized character in action: " + r)
@ -248,6 +252,19 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty
return lex_filename
def lex_currency(lex: Lexer) -> Callable:
orig = lex.pos
while is_space(lex.peek()):
lex.get()
if lex.peek().isnumeric():
return lex_number
else:
lex.pos = orig
# We don't have a number with this currency symbol. Don't treat it special
lex.emit(ItemType.Symbol)
return lex_filename
def lex_operator(lex: Lexer) -> Callable: # type: ignore[type-arg]
lex.accept_run("-|:;")
lex.emit(ItemType.Operator)
@ -315,7 +332,14 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type
# Assume that 80th is just text and not a number
lex.emit(ItemType.Text)
else:
lex.emit(ItemType.Number)
orig = lex.pos
while is_space(lex.peek()):
lex.get()
if "Sc" in [unicodedata.category(lex.input[lex.start]), unicodedata.category(lex.get())]:
lex.emit(ItemType.Text)
else:
lex.pos = orig
lex.emit(ItemType.Number)
return lex_filename

View File

@ -1233,13 +1233,7 @@ def join_title(lst: list[filenamelexer.Item]) -> str:
# No space if the next item is an operator or symbol
if lst[i + 1].typ in [filenamelexer.ItemType.Operator, filenamelexer.ItemType.Symbol]:
# exept if followed by a dollarsign
if not (
(
lst[i].typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]
and lst[i + 1].val == "$"
)
or lst[i + 1].val == "&"
):
if lst[i + 1].val != "&":
continue
# Add a space