diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py index e780c48..7dc713a 100644 --- a/comicapi/filenamelexer.py +++ b/comicapi/filenamelexer.py @@ -130,17 +130,25 @@ class Lexer: self.start = self.pos # Accept consumes the next rune if it's from the valid se: - def accept(self, valid: str) -> bool: - if self.get() in valid: - return True + def accept(self, valid: str | Callable[[str], bool]) -> bool: + if isinstance(valid, str): + if self.get() in valid: + return True + else: + if valid(self.get()): + return True self.backup() return False # AcceptRun consumes a run of runes from the valid set. - def accept_run(self, valid: str) -> None: - while self.get() in valid: - continue + def accept_run(self, valid: str | Callable[[str], bool]) -> None: + if isinstance(valid, str): + while self.get() in valid: + continue + else: + while valid(self.get()): + continue self.backup() @@ -150,9 +158,7 @@ class Lexer: self.accept_run(digits) if self.input[self.pos] == ".": self.backup() - while self.get().isalpha(): - ... - self.backup() + self.accept_run(str.isalpha) return True @@ -197,7 +203,8 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty r = lex.peek() if r.isdigit(): return lex_number - lex.emit(ItemType.Text) # TODO: Change to Text + lex.accept_run(is_symbol) + lex.emit(ItemType.Symbol) elif r.isnumeric(): lex.backup() return lex_number @@ -245,17 +252,17 @@ def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # ty elif is_symbol(r): if unicodedata.category(r) == "Sc": return lex_currency + lex.accept_run(is_symbol) lex.emit(ItemType.Symbol) else: - return errorf(lex, "unrecognized character in action: " + r) + return errorf(lex, "unrecognized character in action: " + repr(r)) return lex_filename def lex_currency(lex: Lexer) -> Callable: orig = lex.pos - while is_space(lex.peek()): - lex.get() + lex.accept_run(is_space) if lex.peek().isnumeric(): return lex_number else: @@ -274,8 +281,7 @@ def lex_operator(lex: Lexer) -> Callable: # type: ignore[type-arg] # LexSpace scans a run of space characters. # One space has already been seen. def lex_space(lex: Lexer) -> Callable: # type: ignore[type-arg] - while is_space(lex.peek()): - lex.get() + lex.accept_run(is_space) lex.emit(ItemType.Space) return lex_filename @@ -332,17 +338,37 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type # Assume that 80th is just text and not a number lex.emit(ItemType.Text) else: - orig = lex.pos - while is_space(lex.peek()): - lex.get() - if "Sc" == unicodedata.category(lex.get()): + # Used to check for a '$' + endNumber = lex.pos + + # Consume any spaces + lex.accept_run(is_space) + + # This number starts with a '$' emit it as Text instead of a Number + if "Sc" == unicodedata.category(lex.input[lex.start]): + lex.pos = endNumber lex.emit(ItemType.Text) - else: - lex.pos = orig - if "Sc" == unicodedata.category(lex.input[lex.start]): - lex.emit(ItemType.Text) - else: + + # This number ends in a '$' if there is a number on the other side we assume it belongs to the following number + elif "Sc" == unicodedata.category(lex.get()): + # Store the end of the number '$'. We still need to check to see if there is a number coming up + endCurrency = lex.pos + # Consume any spaces + lex.accept_run(is_space) + + # This is a number + if lex.peek().isnumeric(): + # We go back to the original number before the '$' and emit a number + lex.pos = endNumber lex.emit(ItemType.Number) + else: + # There was no following number, reset to the '$' and emit a number + lex.pos = endCurrency + lex.emit(ItemType.Text) + else: + # We go back to the original number there is no '$' + lex.pos = endNumber + lex.emit(ItemType.Number) return lex_filename @@ -350,21 +376,13 @@ def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type def lex_issue_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: # type: ignore[type-arg] # Only called when lex.input[lex.start] == "#" original_start = lex.pos - found_number = False - while True: - r = lex.get() - if is_alpha_numeric(r): - if r.isnumeric(): - found_number = True - else: - lex.backup() - break + lex.accept_run(str.isalpha) - if not found_number: + if lex.peek().isnumeric(): + return lex_number + else: lex.pos = original_start lex.emit(ItemType.Symbol) - else: - lex.emit(ItemType.IssueNumber) return lex_filename diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index f21fcd5..52dc92e 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -544,19 +544,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ign # Ensures that IG-88 gets added back to the series/title else: if p.in_something == 0: - to_series = ( - filenamelexer.ItemType.IssueNumber, - filenamelexer.ItemType.Number, - filenamelexer.ItemType.Operator, - ) - if ( - p.peek().typ in to_series - or (p.peek().typ == filenamelexer.ItemType.Space and p.peek(2).typ in to_series) - or p.peek_back().typ in to_series - or (p.peek_back().typ == filenamelexer.ItemType.Space and p.peek_back(2).typ in to_series) - ): - # Were not in something and the next or previous type is an operator or number, add it to the series - return functools.partial(parse_series, i=item) + # We're not in something add it to the series + return functools.partial(parse_series, i=item) # Number with a leading hash e.g. #003 elif item.typ == filenamelexer.ItemType.IssueNumber: @@ -1031,8 +1020,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty for part in p.series: p.used_items.extend(part) p.series_parts, p.title_parts = split_series(p.series) - p.filename_info["series"] = join_title(p.series_parts) - p.filename_info["title"] = join_title(p.title_parts) resolve_year(p) resolve_issue(p) @@ -1048,7 +1035,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty if p.series_parts: p.filename_info["series"] = join_title(p.series_parts) - p.used_items.extend(p.series_parts) else: p.filename_info["series"] = p.filename_info.get("issue", "") @@ -1056,7 +1042,6 @@ def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # ty p.filename_info["fcbd"] = True p.filename_info["title"] = join_title(p.title_parts) - p.used_items.extend(p.title_parts) p.irrelevant.extend([x for x in p.input if x.typ in p.remove_from_remainder]) @@ -1153,11 +1138,15 @@ def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | Non p.used_items.append(item) p.used_items.append(number) - # This is not for the issue number it is not in either the issue or the title, - # assume it is the volume number and count - elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts: + # This is not for the issue number + # assume it is the volume number and count, remove from series + elif p.issue_number_at != i.pos: p.filename_info["volume"] = i.val p.filename_info["volume_count"] = str(int(t2do.convert(number.val))) + for part in p.series: + if i in part: + part.remove(i) + break p.used_items.append(i) p.used_items.append(item) p.used_items.append(number) diff --git a/testing/filenames.py b/testing/filenames.py index a2325bc..fa9138c 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -955,6 +955,21 @@ names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [ }, (True, True), ), + ( + "Cory Doctorow's Futuristic Tales of the Here and Now $1$2 3 #0.0.1 (2007).cbz", + "$", + { + "archive": "cbz", + "issue": "0.1", + "series": "Cory Doctorow's Futuristic Tales of the Here and Now $1 $2 3", + "title": "", + "volume": "", + "year": "2007", + "remainder": "", + "issue_count": "", + }, + (True, True), + ), ] oldfnames = []