lint and add type notations

2024-02-24 19:40:33 -08:00 · 2024-02-24 19:40:33 -08:00 · 32f8cb0f22
commit 32f8cb0f22
parent 0a17bbc0d9
7 changed files with 61 additions and 64 deletions
--- a/comicfn2dict/cli.py
+++ b/comicfn2dict/cli.py
@ -3,10 +3,11 @@
 from argparse import ArgumentParser
 from pathlib import Path
 from pprint import pprint
 from comicfn2dict.parse import ComicFilenameParser
-def main():
+def main() -> None:
    """Test parser."""
    description = "Comic book archive read/write tool."
    parser = ArgumentParser(description=description)
@ -23,7 +24,7 @@ def main():
    cfnparser = ComicFilenameParser(name, verbose=args.verbose)
    metadata = cfnparser.parse()
    if args.verbose:
-        print("=" * 80)
+        print("=" * 80)  # noqa:T201
    pprint(metadata)  # noqa:T203
--- a/comicfn2dict/log.py
+++ b/comicfn2dict/log.py
@ -6,4 +6,4 @@ def print_log_header(label: str) -> None:
    prefix = "-" * 3 + label
    suffix_len = 80 - len(prefix)
    suffix = "-" * suffix_len
-    print(prefix + suffix)
+    print(prefix + suffix)  # noqa: T201
--- a/comicfn2dict/parse.py
+++ b/comicfn2dict/parse.py
@ -1,10 +1,11 @@
 """Parse comic book archive names using the simple 'parse' parser."""
 from pprint import pformat
 from calendar import month_abbr
 from copy import copy
 from pathlib import Path
 from pprint import pformat
 from re import Match, Pattern
 from sys import maxsize
 from comicfn2dict.log import print_log_header
 from comicfn2dict.regex import (
    ALPHA_MONTH_RANGE_RE,
@ -18,8 +19,8 @@ from comicfn2dict.regex import (
    ORIGINAL_FORMAT_SCAN_INFO_RE,
    ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
    PUBLISHER_AMBIGUOUS_RE,
    PUBLISHER_UNAMBIGUOUS_RE,
    PUBLISHER_AMBIGUOUS_TOKEN_RE,
    PUBLISHER_UNAMBIGUOUS_RE,
    PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
    REGEX_SUBS,
    REMAINING_GROUP_RE,
@ -51,10 +52,7 @@ class ComicFilenameParser:
        if value not in self._path_indexes:
            # XXX This is fragile, but it's difficult to calculate the original
            #     position at match time from the ever changing _unparsed_path.
-            if key == "ext":
+            index = self.path.rfind(value) if key == "ext" else self.path.find(value)
                index = self.path.rfind(value)
            else:
                index = self.path.find(value)
            self._path_indexes[value] = index
        return self._path_indexes[value]
@ -65,8 +63,8 @@ class ComicFilenameParser:
        combined = {}
        for key in self.metadata:
            combined[key] = (self.metadata.get(key), self.path_index(key))
-        print("  " + self._unparsed_path)
+        print("  " + self._unparsed_path)  # noqa: T201
-        print("  " + pformat(combined))
+        print("  " + pformat(combined))  # noqa: T201
    def _parse_ext(self) -> None:
        """Pop the extension from the pathname."""
@ -121,7 +119,7 @@ class ComicFilenameParser:
                parts.append(token)
        self._unparsed_path = TOKEN_DELIMETER.join(parts)
-    def _parse_items(
+    def _parse_items(  # noqa: PLR0913
        self,
        regex: Pattern,
        require_all: bool = False,
@ -244,7 +242,7 @@ class ComicFilenameParser:
        self._log("After publisher")
    def _is_at_title_position(self, value: str) -> bool:
-        """Does the title come after series and one other token if they exist."""
+        """Title is in correct position."""
        title_index = self.path.find(value)
        # Titles must come after series but before format and scan_info
@ -286,9 +284,8 @@ class ComicFilenameParser:
        if not match:
            return token
        value = match.group()
-        if key == "title":
+        if key == "title" and not self._is_at_title_position(value):
-            if not self._is_at_title_position(value):
+            return token
                return token
        value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
        value = self._grouping_operators_strip(value)
        if value:
@ -311,7 +308,6 @@ class ComicFilenameParser:
                unused_tokens.append(unused_token)
            remaining_key_index += 1
        print(f"{unused_tokens=}")
        self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
        self._log("After Series & Title")
@ -363,6 +359,6 @@ class ComicFilenameParser:
 def comicfn2dict(
    path: str | Path, verbose: int = 0
 ) -> dict[str, str | tuple[str, ...]]:
-    """Simple API."""
+    """Simplfily the API."""
    parser = ComicFilenameParser(path, verbose=verbose)
    return parser.parse()
--- a/comicfn2dict/regex.py
+++ b/comicfn2dict/regex.py
@ -1,16 +1,8 @@
 """Parsing regexes."""
-import re
+from re import IGNORECASE, Pattern, compile
 from types import MappingProxyType
-
+PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
 def re_compile(exp, parenthify=False):
    """Compile regex with options."""
    if parenthify:
        exp = r"\(" + exp + r"\)"
    return re.compile(exp, flags=re.IGNORECASE)
 PUBLISHERS_UNAMBIGUOUS = (
    r"Abrams ComicArts",
    r"BOOM! Studios",
    r"DC(\sComics)?",
@ -26,7 +18,7 @@ PUBLISHERS_UNAMBIGUOUS = (
    r"SelfMadeHero",
    r"Titan Comics",
 )
-PUBLISHERS_AMBIGUOUS = (
+PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
    r"Marvel",
    r"Heavy Metal",
    r"Epic",
@ -34,7 +26,7 @@ PUBLISHERS_AMBIGUOUS = (
    r"Mirage",
 )
-ORIGINAL_FORMAT_PATTERNS = (
+ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
    r"Anthology",
    r"(One|1)[-\s]Shot",
    r"Annual",
@ -63,7 +55,7 @@ ORIGINAL_FORMAT_PATTERNS = (
    r"Web([-\s]?(Comic|Rip))?",
 )
-MONTHS = (
+MONTHS: tuple[str, ...] = (
    r"Jan(uary)?",
    r"Feb(ruary)?",
    r"Mar(ch)?",
@ -78,7 +70,15 @@ MONTHS = (
    r"Dec(ember)?",
 )
-TOKEN_DELIMETER = r"/"
+TOKEN_DELIMETER: str = r"/"
 def re_compile(exp: str, parenthify: bool = False) -> Pattern:
    """Compile regex with options."""
    if parenthify:
        exp = r"\(" + exp + r"\)"
    return compile(exp, flags=IGNORECASE)
 # CLEAN
 _TOKEN_DIVIDERS_RE = re_compile(r":")
@ -87,7 +87,7 @@ _EXTRA_SPACES_RE = re_compile(r"\s\s+")
 _LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
 _RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
 _DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
-REGEX_SUBS: MappingProxyType[re.Pattern, tuple[str, int]] = MappingProxyType(
+REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
    {
        _DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
        _TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
@ -104,7 +104,7 @@ _MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r"
 _MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
 _MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
 _ALPHA_MONTH_RANGE = (
-    r"\b"
+    r"\b"  # noqa: ISC003
    + r"("
    + r"|".join(MONTHS)
    + r")"
@ -115,7 +115,7 @@ _ALPHA_MONTH_RANGE = (
    + r")"
    + r")\b"
 )
-ALPHA_MONTH_RANGE_RE = re_compile(_ALPHA_MONTH_RANGE)
+ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
 _DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
 _DATE_DELIM = r"[-\s]+"
@ -144,10 +144,10 @@ _YEAR_FIRST_DATE_RE_EXP = (
    + r"\b\)?)"
 )
-MONTH_FIRST_DATE_RE = re_compile(_MONTH_FIRST_DATE_RE_EXP)
+MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
-YEAR_FIRST_DATE_RE = re_compile(_YEAR_FIRST_DATE_RE_EXP)
+YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
-YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True)
+YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
-YEAR_END_RE = re_compile(_YEAR_RE_EXP + r"\/|$")
+YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
 # PAREN GROUPS
 _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
@ -157,38 +157,38 @@ _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
    _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP  # + r")?"
 )
 # Keep this even though comicfn2dict doesn't use it directly
-ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
+ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
-ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
+ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
    _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
 )
-ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile(
+ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
    r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
 )
-SCAN_INFO_SECONDARY_RE = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
+SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
 # ISSUE
 _ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
 _ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
-ISSUE_NUMBER_RE = re_compile(
+ISSUE_NUMBER_RE: Pattern = re_compile(
    r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
 )
-ISSUE_WITH_COUNT_RE = re_compile(
+ISSUE_WITH_COUNT_RE: Pattern = re_compile(
    r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
 )
-ISSUE_END_RE = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
+ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
-ISSUE_BEGIN_RE = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
+ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
 # Volume
 _VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
-VOLUME_RE = re_compile(
+VOLUME_RE: Pattern = re_compile(
-    r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)"
+    r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)"  # noqa: ISC003
    r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
 )
-VOLUME_WITH_COUNT_RE = re_compile(
+VOLUME_WITH_COUNT_RE: Pattern = re_compile(
    r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
 )
-BOOK_VOLUME_RE = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
+BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
 # Publisher
 _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
@ -197,15 +197,15 @@ _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
 _PUBLISHER_AMBIGUOUS_RE_EXP = (
    r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
 )
-PUBLISHER_UNAMBIGUOUS_TOKEN_RE = re_compile(
+PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
    r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
 )
-PUBLISHER_AMBIGUOUS_TOKEN_RE = re_compile(
+PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
    r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
 )
-PUBLISHER_UNAMBIGUOUS_RE = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
+PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
 PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
 # LONG STRINGS
-REMAINING_GROUP_RE = re_compile(r"^[^\(].*[^\)]")
+REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
-NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)")
+NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")
--- a/comicfn2dict/unparse.py
+++ b/comicfn2dict/unparse.py
@ -1,8 +1,9 @@
 """Unparse comic filenames."""
 from calendar import month_abbr
 from collections.abc import Callable, Mapping, Sequence
 from contextlib import suppress
 from calendar import month_abbr
 from types import MappingProxyType
 from comicfn2dict.log import print_log_header
@ -44,7 +45,7 @@ class ComicFilenameSerializer:
        if not self._debug:
            return
        print_log_header(label)
-        print(fn)
+        print(fn)  # noqa: T201
    def _add_date(self) -> None:
        """Construct date from Y-m-D if they exist."""
@ -73,8 +74,7 @@ class ComicFilenameSerializer:
        if val in _EMPTY_VALUES:
            return ""
        final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
-        token = final_fmt.format(val).strip()
+        return final_fmt.format(val).strip()
        return token
    def _add_remainder(self) -> str:
        """Add the remainders specially."""
@ -109,12 +109,13 @@ class ComicFilenameSerializer:
        return fn
    def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
        """Initialize."""
        self.metadata: Mapping = metadata
        self._ext: bool = ext
        self._debug: bool = bool(verbose)
 def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
-    """Simple API."""
+    """Simplify API."""
    serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
    return serializer.serialize()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "comicfn2dict"
-version = "0.2.0a3"
+version = "0.2.0a4"
 description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
 license = "GPL-3.0-only"
 authors = ["AJ Slater <aj@slater.net>"]
@ -125,7 +125,7 @@ exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,
 extend-exclude = ["typings"]
 target-version = "py310"
-[tool.lint.ruff]
+[tool.ruff.lint]
 extend-ignore = [
  "S101",
  "D203",
--- a/tests/comic_filenames.py
+++ b/tests/comic_filenames.py
@ -2,7 +2,6 @@
 from types import MappingProxyType
 TEST_COMIC_FIELDS = {
    "series": "Long Series Name",
    "issue": "001",