lint and add type notations

This commit is contained in:
AJ Slater 2024-02-24 19:40:33 -08:00
parent 0a17bbc0d9
commit 32f8cb0f22
7 changed files with 61 additions and 64 deletions

View File

@ -3,10 +3,11 @@
from argparse import ArgumentParser
from pathlib import Path
from pprint import pprint
from comicfn2dict.parse import ComicFilenameParser
def main():
def main() -> None:
"""Test parser."""
description = "Comic book archive read/write tool."
parser = ArgumentParser(description=description)
@ -23,7 +24,7 @@ def main():
cfnparser = ComicFilenameParser(name, verbose=args.verbose)
metadata = cfnparser.parse()
if args.verbose:
print("=" * 80)
print("=" * 80) # noqa:T201
pprint(metadata) # noqa:T203

View File

@ -6,4 +6,4 @@ def print_log_header(label: str) -> None:
prefix = "-" * 3 + label
suffix_len = 80 - len(prefix)
suffix = "-" * suffix_len
print(prefix + suffix)
print(prefix + suffix) # noqa: T201

View File

@ -1,10 +1,11 @@
"""Parse comic book archive names using the simple 'parse' parser."""
from pprint import pformat
from calendar import month_abbr
from copy import copy
from pathlib import Path
from pprint import pformat
from re import Match, Pattern
from sys import maxsize
from comicfn2dict.log import print_log_header
from comicfn2dict.regex import (
ALPHA_MONTH_RANGE_RE,
@ -18,8 +19,8 @@ from comicfn2dict.regex import (
ORIGINAL_FORMAT_SCAN_INFO_RE,
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
PUBLISHER_AMBIGUOUS_RE,
PUBLISHER_UNAMBIGUOUS_RE,
PUBLISHER_AMBIGUOUS_TOKEN_RE,
PUBLISHER_UNAMBIGUOUS_RE,
PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
REGEX_SUBS,
REMAINING_GROUP_RE,
@ -51,10 +52,7 @@ class ComicFilenameParser:
if value not in self._path_indexes:
# XXX This is fragile, but it's difficult to calculate the original
# position at match time from the ever changing _unparsed_path.
if key == "ext":
index = self.path.rfind(value)
else:
index = self.path.find(value)
index = self.path.rfind(value) if key == "ext" else self.path.find(value)
self._path_indexes[value] = index
return self._path_indexes[value]
@ -65,8 +63,8 @@ class ComicFilenameParser:
combined = {}
for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_index(key))
print(" " + self._unparsed_path)
print(" " + pformat(combined))
print(" " + self._unparsed_path) # noqa: T201
print(" " + pformat(combined)) # noqa: T201
def _parse_ext(self) -> None:
"""Pop the extension from the pathname."""
@ -121,7 +119,7 @@ class ComicFilenameParser:
parts.append(token)
self._unparsed_path = TOKEN_DELIMETER.join(parts)
def _parse_items(
def _parse_items( # noqa: PLR0913
self,
regex: Pattern,
require_all: bool = False,
@ -244,7 +242,7 @@ class ComicFilenameParser:
self._log("After publisher")
def _is_at_title_position(self, value: str) -> bool:
"""Does the title come after series and one other token if they exist."""
"""Title is in correct position."""
title_index = self.path.find(value)
# Titles must come after series but before format and scan_info
@ -286,9 +284,8 @@ class ComicFilenameParser:
if not match:
return token
value = match.group()
if key == "title":
if not self._is_at_title_position(value):
return token
if key == "title" and not self._is_at_title_position(value):
return token
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
value = self._grouping_operators_strip(value)
if value:
@ -311,7 +308,6 @@ class ComicFilenameParser:
unused_tokens.append(unused_token)
remaining_key_index += 1
print(f"{unused_tokens=}")
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
self._log("After Series & Title")
@ -363,6 +359,6 @@ class ComicFilenameParser:
def comicfn2dict(
path: str | Path, verbose: int = 0
) -> dict[str, str | tuple[str, ...]]:
"""Simple API."""
"""Simplfily the API."""
parser = ComicFilenameParser(path, verbose=verbose)
return parser.parse()

View File

@ -1,16 +1,8 @@
"""Parsing regexes."""
import re
from re import IGNORECASE, Pattern, compile
from types import MappingProxyType
def re_compile(exp, parenthify=False):
"""Compile regex with options."""
if parenthify:
exp = r"\(" + exp + r"\)"
return re.compile(exp, flags=re.IGNORECASE)
PUBLISHERS_UNAMBIGUOUS = (
PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
r"Abrams ComicArts",
r"BOOM! Studios",
r"DC(\sComics)?",
@ -26,7 +18,7 @@ PUBLISHERS_UNAMBIGUOUS = (
r"SelfMadeHero",
r"Titan Comics",
)
PUBLISHERS_AMBIGUOUS = (
PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
r"Marvel",
r"Heavy Metal",
r"Epic",
@ -34,7 +26,7 @@ PUBLISHERS_AMBIGUOUS = (
r"Mirage",
)
ORIGINAL_FORMAT_PATTERNS = (
ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
r"Anthology",
r"(One|1)[-\s]Shot",
r"Annual",
@ -63,7 +55,7 @@ ORIGINAL_FORMAT_PATTERNS = (
r"Web([-\s]?(Comic|Rip))?",
)
MONTHS = (
MONTHS: tuple[str, ...] = (
r"Jan(uary)?",
r"Feb(ruary)?",
r"Mar(ch)?",
@ -78,7 +70,15 @@ MONTHS = (
r"Dec(ember)?",
)
TOKEN_DELIMETER = r"/"
TOKEN_DELIMETER: str = r"/"
def re_compile(exp: str, parenthify: bool = False) -> Pattern:
"""Compile regex with options."""
if parenthify:
exp = r"\(" + exp + r"\)"
return compile(exp, flags=IGNORECASE)
# CLEAN
_TOKEN_DIVIDERS_RE = re_compile(r":")
@ -87,7 +87,7 @@ _EXTRA_SPACES_RE = re_compile(r"\s\s+")
_LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
_RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
_DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
REGEX_SUBS: MappingProxyType[re.Pattern, tuple[str, int]] = MappingProxyType(
REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
{
_DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
_TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
@ -104,7 +104,7 @@ _MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r"
_MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
_MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
_ALPHA_MONTH_RANGE = (
r"\b"
r"\b" # noqa: ISC003
+ r"("
+ r"|".join(MONTHS)
+ r")"
@ -115,7 +115,7 @@ _ALPHA_MONTH_RANGE = (
+ r")"
+ r")\b"
)
ALPHA_MONTH_RANGE_RE = re_compile(_ALPHA_MONTH_RANGE)
ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
_DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
_DATE_DELIM = r"[-\s]+"
@ -144,10 +144,10 @@ _YEAR_FIRST_DATE_RE_EXP = (
+ r"\b\)?)"
)
MONTH_FIRST_DATE_RE = re_compile(_MONTH_FIRST_DATE_RE_EXP)
YEAR_FIRST_DATE_RE = re_compile(_YEAR_FIRST_DATE_RE_EXP)
YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True)
YEAR_END_RE = re_compile(_YEAR_RE_EXP + r"\/|$")
MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
# PAREN GROUPS
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
@ -157,38 +157,38 @@ _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
_ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
)
# Keep this even though comicfn2dict doesn't use it directly
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
)
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile(
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
)
SCAN_INFO_SECONDARY_RE = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
# ISSUE
_ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
_ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
ISSUE_NUMBER_RE = re_compile(
ISSUE_NUMBER_RE: Pattern = re_compile(
r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
)
ISSUE_WITH_COUNT_RE = re_compile(
ISSUE_WITH_COUNT_RE: Pattern = re_compile(
r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
)
ISSUE_END_RE = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
ISSUE_BEGIN_RE = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
# Volume
_VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
VOLUME_RE = re_compile(
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)"
VOLUME_RE: Pattern = re_compile(
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" # noqa: ISC003
r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
)
VOLUME_WITH_COUNT_RE = re_compile(
VOLUME_WITH_COUNT_RE: Pattern = re_compile(
r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
)
BOOK_VOLUME_RE = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
# Publisher
_PUBLISHER_UNAMBIGUOUS_RE_EXP = (
@ -197,15 +197,15 @@ _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
_PUBLISHER_AMBIGUOUS_RE_EXP = (
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
)
PUBLISHER_UNAMBIGUOUS_TOKEN_RE = re_compile(
PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
)
PUBLISHER_AMBIGUOUS_TOKEN_RE = re_compile(
PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
)
PUBLISHER_UNAMBIGUOUS_RE = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
# LONG STRINGS
REMAINING_GROUP_RE = re_compile(r"^[^\(].*[^\)]")
NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)")
REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")

View File

@ -1,8 +1,9 @@
"""Unparse comic filenames."""
from calendar import month_abbr
from collections.abc import Callable, Mapping, Sequence
from contextlib import suppress
from calendar import month_abbr
from types import MappingProxyType
from comicfn2dict.log import print_log_header
@ -44,7 +45,7 @@ class ComicFilenameSerializer:
if not self._debug:
return
print_log_header(label)
print(fn)
print(fn) # noqa: T201
def _add_date(self) -> None:
"""Construct date from Y-m-D if they exist."""
@ -73,8 +74,7 @@ class ComicFilenameSerializer:
if val in _EMPTY_VALUES:
return ""
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
token = final_fmt.format(val).strip()
return token
return final_fmt.format(val).strip()
def _add_remainder(self) -> str:
"""Add the remainders specially."""
@ -109,12 +109,13 @@ class ComicFilenameSerializer:
return fn
def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
"""Initialize."""
self.metadata: Mapping = metadata
self._ext: bool = ext
self._debug: bool = bool(verbose)
def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
"""Simple API."""
"""Simplify API."""
serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
return serializer.serialize()

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "comicfn2dict"
version = "0.2.0a3"
version = "0.2.0a4"
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
license = "GPL-3.0-only"
authors = ["AJ Slater <aj@slater.net>"]
@ -125,7 +125,7 @@ exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,
extend-exclude = ["typings"]
target-version = "py310"
[tool.lint.ruff]
[tool.ruff.lint]
extend-ignore = [
"S101",
"D203",

View File

@ -2,7 +2,6 @@
from types import MappingProxyType
TEST_COMIC_FIELDS = {
"series": "Long Series Name",
"issue": "001",