Improve filename parsing
This commit is contained in:
parent
56d8c507e2
commit
c28dc19df6
@ -6,6 +6,7 @@ import calendar
|
||||
import os
|
||||
import unicodedata
|
||||
from enum import Enum, auto
|
||||
from itertools import chain
|
||||
from typing import Any, Callable, Protocol
|
||||
|
||||
|
||||
@ -307,21 +308,20 @@ def lex_text(lex: Lexer) -> LexerFunc:
|
||||
if is_alpha_numeric(r):
|
||||
if r.isnumeric(): # E.g. v1
|
||||
word = lex.input[lex.start : lex.pos]
|
||||
if word.casefold() in key and key[word.casefold()] == ItemType.InfoSpecifier:
|
||||
if key.get(word.casefold(), None) == ItemType.InfoSpecifier:
|
||||
lex.backup()
|
||||
lex.emit(key[word.casefold()])
|
||||
return lex_filename
|
||||
else:
|
||||
if r == "'" and lex.peek() == "s":
|
||||
if r == "'" and lex.peek().casefold() == "s":
|
||||
lex.get()
|
||||
else:
|
||||
lex.backup()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
if word.casefold() == "vol" and lex.peek() == ".":
|
||||
lex.get()
|
||||
word = lex.input[lex.start : lex.pos + 1]
|
||||
|
||||
if word.casefold() in key:
|
||||
if key[word.casefold()] in (ItemType.Honorific, ItemType.InfoSpecifier):
|
||||
lex.accept(".")
|
||||
lex.emit(key[word.casefold()])
|
||||
elif cal(word):
|
||||
lex.emit(ItemType.Calendar)
|
||||
@ -332,12 +332,8 @@ def lex_text(lex: Lexer) -> LexerFunc:
|
||||
return lex_filename
|
||||
|
||||
|
||||
def cal(value: str) -> set[Any]:
|
||||
month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()]
|
||||
month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()]
|
||||
day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()]
|
||||
day_name = [i for i, x in enumerate(calendar.day_name) if x == value.title()]
|
||||
return set(month_abbr + month_name + day_abbr + day_name)
|
||||
def cal(value: str) -> bool:
|
||||
return value.title() in set(chain(calendar.month_abbr, calendar.month_name, calendar.day_abbr, calendar.day_name))
|
||||
|
||||
|
||||
def lex_number(lex: Lexer) -> LexerFunc | None:
|
||||
|
@ -417,10 +417,14 @@ class Parser:
|
||||
self.remove_from_remainder.append(filenamelexer.ItemType.FCBD)
|
||||
|
||||
self.input = lexer_result
|
||||
for i, item in enumerate(self.input):
|
||||
self.error = None
|
||||
for i, item in list(enumerate(self.input)):
|
||||
if item.typ == filenamelexer.ItemType.IssueNumber:
|
||||
self.issue_number_at = i
|
||||
self.issue_number_marked = True
|
||||
if item.typ == filenamelexer.ItemType.Error:
|
||||
self.error = item
|
||||
self.input.remove(self.error)
|
||||
|
||||
# Get returns the next Item in the input.
|
||||
def get(self) -> filenamelexer.Item:
|
||||
@ -1043,10 +1047,9 @@ def parse_finish(p: Parser) -> None:
|
||||
if item in p.title_parts:
|
||||
p.title_parts.remove(item)
|
||||
|
||||
p.filename_info["series"] = p.filename_info.get("issue", "")
|
||||
if p.series_parts:
|
||||
p.filename_info["series"] = join_title(p.series_parts)
|
||||
else:
|
||||
p.filename_info["series"] = p.filename_info.get("issue", "")
|
||||
|
||||
if "free comic book" in p.filename_info["series"].casefold():
|
||||
p.filename_info["fcbd"] = True
|
||||
@ -1092,7 +1095,6 @@ def get_remainder(p: Parser) -> str:
|
||||
elif (
|
||||
item.typ
|
||||
in [
|
||||
filenamelexer.ItemType.Space,
|
||||
filenamelexer.ItemType.RightBrace,
|
||||
filenamelexer.ItemType.RightParen,
|
||||
filenamelexer.ItemType.RightSBrace,
|
||||
@ -1111,7 +1113,7 @@ def get_remainder(p: Parser) -> str:
|
||||
|
||||
# Remove empty parentheses
|
||||
remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
|
||||
return remainder.strip()
|
||||
return remainder.strip().rstrip("[{(")
|
||||
|
||||
|
||||
def parse_info_specifier(p: Parser) -> ParserFunc:
|
||||
|
@ -162,6 +162,8 @@ def parse_filename(
|
||||
remove_publisher=remove_publisher,
|
||||
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
|
||||
)
|
||||
if p.error:
|
||||
logger.info("Issue parsing filename: '%s': %s ", filename, p.error.val)
|
||||
fni = p.filename_info
|
||||
elif parser == Parser.COMICFN2DICT:
|
||||
fn2d = comicfn2dict(filename)
|
||||
|
@ -74,7 +74,7 @@ names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [
|
||||
),
|
||||
(
|
||||
"Michel Vaillant #5 Nr. 13 aan de start",
|
||||
"Shortened word followed by a number eg No. 13, Mr. 13",
|
||||
"Shortened word followed by a number eg No. 13, Nr. 13",
|
||||
{
|
||||
"issue": "5",
|
||||
"series": "Michel Vaillant",
|
||||
@ -276,6 +276,23 @@ names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"batman #3 title (DC.cbz",
|
||||
"publisher in title",
|
||||
{
|
||||
"archive": "cbz",
|
||||
"issue": "3",
|
||||
"series": "batman",
|
||||
"title": "title",
|
||||
"publisher": "DC",
|
||||
"volume": "",
|
||||
"year": "",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, True),
|
||||
),
|
||||
(
|
||||
"ms. Marvel #3.cbz",
|
||||
"honorific and publisher in series",
|
||||
@ -293,6 +310,23 @@ names: list[tuple[str, str, dict[str, str | bool], tuple[bool, bool]]] = [
|
||||
},
|
||||
(False, False),
|
||||
),
|
||||
(
|
||||
"Dr. Doom And The Masters Of Evil #1 (2009).cbz",
|
||||
"honorific and publisher in series",
|
||||
{
|
||||
"archive": "cbz",
|
||||
"issue": "1",
|
||||
"series": "Dr. Doom And The Masters Of Evil",
|
||||
"title": "",
|
||||
"publisher": "",
|
||||
"volume": "",
|
||||
"year": "2009",
|
||||
"remainder": "",
|
||||
"issue_count": "",
|
||||
"alternate": "",
|
||||
},
|
||||
(False, False),
|
||||
),
|
||||
(
|
||||
f"action comics #{datetime.datetime.now().year}.cbz",
|
||||
"issue number is current year (digits == 4)",
|
||||
|
Loading…
Reference in New Issue
Block a user