From bf0a46055af21f8fe6a10ac2033329baeb29c698 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Fri, 6 Dec 2024 21:29:26 -0800 Subject: [PATCH] Fix parsing ' in filenames Fixes #672 --- comicapi/filenamelexer.py | 14 +++++++------- testing/filenames.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py index e08a628..62cd880 100644 --- a/comicapi/filenamelexer.py +++ b/comicapi/filenamelexer.py @@ -213,8 +213,11 @@ def lex_filename(lex: Lexer) -> LexerFunc | None: r = lex.peek() if r.isdigit(): return lex_number - lex.accept_run(is_symbol) - lex.emit(ItemType.Symbol) + if is_symbol(r): + lex.accept_run(is_symbol) + lex.emit(ItemType.Symbol) + else: + return lex_text elif r.isnumeric(): lex.backup() return lex_number @@ -305,7 +308,7 @@ def lex_space(lex: Lexer) -> LexerFunc: def lex_text(lex: Lexer) -> LexerFunc: while True: r = lex.get() - if is_alpha_numeric(r): + if is_alpha_numeric(r) or r in "'": if r.isnumeric(): # E.g. v1 word = lex.input[lex.start : lex.pos] if key.get(word.casefold(), None) == ItemType.InfoSpecifier: @@ -313,10 +316,7 @@ def lex_text(lex: Lexer) -> LexerFunc: lex.emit(key[word.casefold()]) return lex_filename else: - if r == "'" and lex.peek().casefold() == "s": - lex.get() - else: - lex.backup() + lex.backup() word = lex.input[lex.start : lex.pos + 1] if word.casefold() in key: diff --git a/testing/filenames.py b/testing/filenames.py index 14a1c6d..2cabfd2 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -25,6 +25,38 @@ datadir = importlib.resources.files(__package__).joinpath("data") cbz_path = datadir.joinpath("Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz") names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [ + ( + "De Psy #6 Bonjour l'angoisse!.cbz", + "'", + { + "issue": "6", + "series": "De Psy", + "title": "Bonjour l'angoisse!", + "volume": "", + "year": "", + "remainder": "", + "issue_count": "", + "alternate": "", + "archive": "cbz", + }, + (False, True), + ), + ( + "Airfiles #4 The 'Big Show'.cbz", + "'", + { + "issue": "4", + "series": "Airfiles", + "title": "The 'Big Show'", + "volume": "", + "year": "", + "remainder": "", + "issue_count": "", + "alternate": "", + "archive": "cbz", + }, + (False, True), + ), ( "Conceptions #1 Conceptions I.cbz", "&",