lint and add type notations

This commit is contained in:
AJ Slater 2024-02-24 19:40:33 -08:00
parent 0a17bbc0d9
commit 32f8cb0f22
7 changed files with 61 additions and 64 deletions

View File

@ -3,10 +3,11 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from pprint import pprint from pprint import pprint
from comicfn2dict.parse import ComicFilenameParser from comicfn2dict.parse import ComicFilenameParser
def main(): def main() -> None:
"""Test parser.""" """Test parser."""
description = "Comic book archive read/write tool." description = "Comic book archive read/write tool."
parser = ArgumentParser(description=description) parser = ArgumentParser(description=description)
@ -23,7 +24,7 @@ def main():
cfnparser = ComicFilenameParser(name, verbose=args.verbose) cfnparser = ComicFilenameParser(name, verbose=args.verbose)
metadata = cfnparser.parse() metadata = cfnparser.parse()
if args.verbose: if args.verbose:
print("=" * 80) print("=" * 80) # noqa:T201
pprint(metadata) # noqa:T203 pprint(metadata) # noqa:T203

View File

@ -6,4 +6,4 @@ def print_log_header(label: str) -> None:
prefix = "-" * 3 + label prefix = "-" * 3 + label
suffix_len = 80 - len(prefix) suffix_len = 80 - len(prefix)
suffix = "-" * suffix_len suffix = "-" * suffix_len
print(prefix + suffix) print(prefix + suffix) # noqa: T201

View File

@ -1,10 +1,11 @@
"""Parse comic book archive names using the simple 'parse' parser.""" """Parse comic book archive names using the simple 'parse' parser."""
from pprint import pformat
from calendar import month_abbr from calendar import month_abbr
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
from pprint import pformat
from re import Match, Pattern from re import Match, Pattern
from sys import maxsize from sys import maxsize
from comicfn2dict.log import print_log_header from comicfn2dict.log import print_log_header
from comicfn2dict.regex import ( from comicfn2dict.regex import (
ALPHA_MONTH_RANGE_RE, ALPHA_MONTH_RANGE_RE,
@ -18,8 +19,8 @@ from comicfn2dict.regex import (
ORIGINAL_FORMAT_SCAN_INFO_RE, ORIGINAL_FORMAT_SCAN_INFO_RE,
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE, ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
PUBLISHER_AMBIGUOUS_RE, PUBLISHER_AMBIGUOUS_RE,
PUBLISHER_UNAMBIGUOUS_RE,
PUBLISHER_AMBIGUOUS_TOKEN_RE, PUBLISHER_AMBIGUOUS_TOKEN_RE,
PUBLISHER_UNAMBIGUOUS_RE,
PUBLISHER_UNAMBIGUOUS_TOKEN_RE, PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
REGEX_SUBS, REGEX_SUBS,
REMAINING_GROUP_RE, REMAINING_GROUP_RE,
@ -51,10 +52,7 @@ class ComicFilenameParser:
if value not in self._path_indexes: if value not in self._path_indexes:
# XXX This is fragile, but it's difficult to calculate the original # XXX This is fragile, but it's difficult to calculate the original
# position at match time from the ever changing _unparsed_path. # position at match time from the ever changing _unparsed_path.
if key == "ext": index = self.path.rfind(value) if key == "ext" else self.path.find(value)
index = self.path.rfind(value)
else:
index = self.path.find(value)
self._path_indexes[value] = index self._path_indexes[value] = index
return self._path_indexes[value] return self._path_indexes[value]
@ -65,8 +63,8 @@ class ComicFilenameParser:
combined = {} combined = {}
for key in self.metadata: for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_index(key)) combined[key] = (self.metadata.get(key), self.path_index(key))
print(" " + self._unparsed_path) print(" " + self._unparsed_path) # noqa: T201
print(" " + pformat(combined)) print(" " + pformat(combined)) # noqa: T201
def _parse_ext(self) -> None: def _parse_ext(self) -> None:
"""Pop the extension from the pathname.""" """Pop the extension from the pathname."""
@ -121,7 +119,7 @@ class ComicFilenameParser:
parts.append(token) parts.append(token)
self._unparsed_path = TOKEN_DELIMETER.join(parts) self._unparsed_path = TOKEN_DELIMETER.join(parts)
def _parse_items( def _parse_items( # noqa: PLR0913
self, self,
regex: Pattern, regex: Pattern,
require_all: bool = False, require_all: bool = False,
@ -244,7 +242,7 @@ class ComicFilenameParser:
self._log("After publisher") self._log("After publisher")
def _is_at_title_position(self, value: str) -> bool: def _is_at_title_position(self, value: str) -> bool:
"""Does the title come after series and one other token if they exist.""" """Title is in correct position."""
title_index = self.path.find(value) title_index = self.path.find(value)
# Titles must come after series but before format and scan_info # Titles must come after series but before format and scan_info
@ -286,8 +284,7 @@ class ComicFilenameParser:
if not match: if not match:
return token return token
value = match.group() value = match.group()
if key == "title": if key == "title" and not self._is_at_title_position(value):
if not self._is_at_title_position(value):
return token return token
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value) value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
value = self._grouping_operators_strip(value) value = self._grouping_operators_strip(value)
@ -311,7 +308,6 @@ class ComicFilenameParser:
unused_tokens.append(unused_token) unused_tokens.append(unused_token)
remaining_key_index += 1 remaining_key_index += 1
print(f"{unused_tokens=}")
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else "" self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
self._log("After Series & Title") self._log("After Series & Title")
@ -363,6 +359,6 @@ class ComicFilenameParser:
def comicfn2dict( def comicfn2dict(
path: str | Path, verbose: int = 0 path: str | Path, verbose: int = 0
) -> dict[str, str | tuple[str, ...]]: ) -> dict[str, str | tuple[str, ...]]:
"""Simple API.""" """Simplfily the API."""
parser = ComicFilenameParser(path, verbose=verbose) parser = ComicFilenameParser(path, verbose=verbose)
return parser.parse() return parser.parse()

View File

@ -1,16 +1,8 @@
"""Parsing regexes.""" """Parsing regexes."""
import re from re import IGNORECASE, Pattern, compile
from types import MappingProxyType from types import MappingProxyType
PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
def re_compile(exp, parenthify=False):
"""Compile regex with options."""
if parenthify:
exp = r"\(" + exp + r"\)"
return re.compile(exp, flags=re.IGNORECASE)
PUBLISHERS_UNAMBIGUOUS = (
r"Abrams ComicArts", r"Abrams ComicArts",
r"BOOM! Studios", r"BOOM! Studios",
r"DC(\sComics)?", r"DC(\sComics)?",
@ -26,7 +18,7 @@ PUBLISHERS_UNAMBIGUOUS = (
r"SelfMadeHero", r"SelfMadeHero",
r"Titan Comics", r"Titan Comics",
) )
PUBLISHERS_AMBIGUOUS = ( PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
r"Marvel", r"Marvel",
r"Heavy Metal", r"Heavy Metal",
r"Epic", r"Epic",
@ -34,7 +26,7 @@ PUBLISHERS_AMBIGUOUS = (
r"Mirage", r"Mirage",
) )
ORIGINAL_FORMAT_PATTERNS = ( ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
r"Anthology", r"Anthology",
r"(One|1)[-\s]Shot", r"(One|1)[-\s]Shot",
r"Annual", r"Annual",
@ -63,7 +55,7 @@ ORIGINAL_FORMAT_PATTERNS = (
r"Web([-\s]?(Comic|Rip))?", r"Web([-\s]?(Comic|Rip))?",
) )
MONTHS = ( MONTHS: tuple[str, ...] = (
r"Jan(uary)?", r"Jan(uary)?",
r"Feb(ruary)?", r"Feb(ruary)?",
r"Mar(ch)?", r"Mar(ch)?",
@ -78,7 +70,15 @@ MONTHS = (
r"Dec(ember)?", r"Dec(ember)?",
) )
TOKEN_DELIMETER = r"/" TOKEN_DELIMETER: str = r"/"
def re_compile(exp: str, parenthify: bool = False) -> Pattern:
"""Compile regex with options."""
if parenthify:
exp = r"\(" + exp + r"\)"
return compile(exp, flags=IGNORECASE)
# CLEAN # CLEAN
_TOKEN_DIVIDERS_RE = re_compile(r":") _TOKEN_DIVIDERS_RE = re_compile(r":")
@ -87,7 +87,7 @@ _EXTRA_SPACES_RE = re_compile(r"\s\s+")
_LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[") _LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
_RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]") _RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
_DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__") _DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
REGEX_SUBS: MappingProxyType[re.Pattern, tuple[str, int]] = MappingProxyType( REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
{ {
_DOUBLE_UNDERSCORE_RE: (r"(\1)", 0), _DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
_TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1), _TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
@ -104,7 +104,7 @@ _MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r"
_MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)" _MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
_MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")" _MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
_ALPHA_MONTH_RANGE = ( _ALPHA_MONTH_RANGE = (
r"\b" r"\b" # noqa: ISC003
+ r"(" + r"("
+ r"|".join(MONTHS) + r"|".join(MONTHS)
+ r")" + r")"
@ -115,7 +115,7 @@ _ALPHA_MONTH_RANGE = (
+ r")" + r")"
+ r")\b" + r")\b"
) )
ALPHA_MONTH_RANGE_RE = re_compile(_ALPHA_MONTH_RANGE) ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
_DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))" _DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
_DATE_DELIM = r"[-\s]+" _DATE_DELIM = r"[-\s]+"
@ -144,10 +144,10 @@ _YEAR_FIRST_DATE_RE_EXP = (
+ r"\b\)?)" + r"\b\)?)"
) )
MONTH_FIRST_DATE_RE = re_compile(_MONTH_FIRST_DATE_RE_EXP) MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
YEAR_FIRST_DATE_RE = re_compile(_YEAR_FIRST_DATE_RE_EXP) YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True) YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
YEAR_END_RE = re_compile(_YEAR_RE_EXP + r"\/|$") YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
# PAREN GROUPS # PAREN GROUPS
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS) _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
@ -157,38 +157,38 @@ _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
_ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?" _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
) )
# Keep this even though comicfn2dict doesn't use it directly # Keep this even though comicfn2dict doesn't use it directly
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True) ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile( ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
) )
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile( ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)" r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
) )
SCAN_INFO_SECONDARY_RE = re_compile(r"\b(?P<secondary_scan_info>c2c)\b") SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
# ISSUE # ISSUE
_ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)" _ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
_ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)" _ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
ISSUE_NUMBER_RE = re_compile( ISSUE_NUMBER_RE: Pattern = re_compile(
r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?" r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
) )
ISSUE_WITH_COUNT_RE = re_compile( ISSUE_WITH_COUNT_RE: Pattern = re_compile(
r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")" r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
) )
ISSUE_END_RE = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))") ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
ISSUE_BEGIN_RE = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])") ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
# Volume # Volume
_VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)" _VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
VOLUME_RE = re_compile( VOLUME_RE: Pattern = re_compile(
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" # noqa: ISC003
r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")" r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
) )
VOLUME_WITH_COUNT_RE = re_compile( VOLUME_WITH_COUNT_RE: Pattern = re_compile(
r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")" r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
) )
BOOK_VOLUME_RE = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")") BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
# Publisher # Publisher
_PUBLISHER_UNAMBIGUOUS_RE_EXP = ( _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
@ -197,15 +197,15 @@ _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
_PUBLISHER_AMBIGUOUS_RE_EXP = ( _PUBLISHER_AMBIGUOUS_RE_EXP = (
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)" r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
) )
PUBLISHER_UNAMBIGUOUS_TOKEN_RE = re_compile( PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)" r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
) )
PUBLISHER_AMBIGUOUS_TOKEN_RE = re_compile( PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)" r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
) )
PUBLISHER_UNAMBIGUOUS_RE = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP) PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP) PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
# LONG STRINGS # LONG STRINGS
REMAINING_GROUP_RE = re_compile(r"^[^\(].*[^\)]") REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)") NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")

View File

@ -1,8 +1,9 @@
"""Unparse comic filenames.""" """Unparse comic filenames."""
from calendar import month_abbr
from collections.abc import Callable, Mapping, Sequence from collections.abc import Callable, Mapping, Sequence
from contextlib import suppress from contextlib import suppress
from calendar import month_abbr
from types import MappingProxyType from types import MappingProxyType
from comicfn2dict.log import print_log_header from comicfn2dict.log import print_log_header
@ -44,7 +45,7 @@ class ComicFilenameSerializer:
if not self._debug: if not self._debug:
return return
print_log_header(label) print_log_header(label)
print(fn) print(fn) # noqa: T201
def _add_date(self) -> None: def _add_date(self) -> None:
"""Construct date from Y-m-D if they exist.""" """Construct date from Y-m-D if they exist."""
@ -73,8 +74,7 @@ class ComicFilenameSerializer:
if val in _EMPTY_VALUES: if val in _EMPTY_VALUES:
return "" return ""
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
token = final_fmt.format(val).strip() return final_fmt.format(val).strip()
return token
def _add_remainder(self) -> str: def _add_remainder(self) -> str:
"""Add the remainders specially.""" """Add the remainders specially."""
@ -109,12 +109,13 @@ class ComicFilenameSerializer:
return fn return fn
def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0): def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
"""Initialize."""
self.metadata: Mapping = metadata self.metadata: Mapping = metadata
self._ext: bool = ext self._ext: bool = ext
self._debug: bool = bool(verbose) self._debug: bool = bool(verbose)
def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str: def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
"""Simple API.""" """Simplify API."""
serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose) serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
return serializer.serialize() return serializer.serialize()

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "comicfn2dict" name = "comicfn2dict"
version = "0.2.0a3" version = "0.2.0a4"
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli." description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
license = "GPL-3.0-only" license = "GPL-3.0-only"
authors = ["AJ Slater <aj@slater.net>"] authors = ["AJ Slater <aj@slater.net>"]
@ -125,7 +125,7 @@ exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,
extend-exclude = ["typings"] extend-exclude = ["typings"]
target-version = "py310" target-version = "py310"
[tool.lint.ruff] [tool.ruff.lint]
extend-ignore = [ extend-ignore = [
"S101", "S101",
"D203", "D203",

View File

@ -2,7 +2,6 @@
from types import MappingProxyType from types import MappingProxyType
TEST_COMIC_FIELDS = { TEST_COMIC_FIELDS = {
"series": "Long Series Name", "series": "Long Series Name",
"issue": "001", "issue": "001",