From 32f8cb0f226877193c7c5084d231b6c6e7b7001a Mon Sep 17 00:00:00 2001 From: AJ Slater Date: Sat, 24 Feb 2024 19:40:33 -0800 Subject: [PATCH] lint and add type notations --- comicfn2dict/cli.py | 5 +-- comicfn2dict/log.py | 2 +- comicfn2dict/parse.py | 26 ++++++-------- comicfn2dict/regex.py | 76 ++++++++++++++++++++-------------------- comicfn2dict/unparse.py | 11 +++--- pyproject.toml | 4 +-- tests/comic_filenames.py | 1 - 7 files changed, 61 insertions(+), 64 deletions(-) diff --git a/comicfn2dict/cli.py b/comicfn2dict/cli.py index c0a7199..e8ab8cd 100755 --- a/comicfn2dict/cli.py +++ b/comicfn2dict/cli.py @@ -3,10 +3,11 @@ from argparse import ArgumentParser from pathlib import Path from pprint import pprint + from comicfn2dict.parse import ComicFilenameParser -def main(): +def main() -> None: """Test parser.""" description = "Comic book archive read/write tool." parser = ArgumentParser(description=description) @@ -23,7 +24,7 @@ def main(): cfnparser = ComicFilenameParser(name, verbose=args.verbose) metadata = cfnparser.parse() if args.verbose: - print("=" * 80) + print("=" * 80) # noqa:T201 pprint(metadata) # noqa:T203 diff --git a/comicfn2dict/log.py b/comicfn2dict/log.py index 3265889..0626325 100644 --- a/comicfn2dict/log.py +++ b/comicfn2dict/log.py @@ -6,4 +6,4 @@ def print_log_header(label: str) -> None: prefix = "-" * 3 + label suffix_len = 80 - len(prefix) suffix = "-" * suffix_len - print(prefix + suffix) + print(prefix + suffix) # noqa: T201 diff --git a/comicfn2dict/parse.py b/comicfn2dict/parse.py index a754d22..0cca5af 100644 --- a/comicfn2dict/parse.py +++ b/comicfn2dict/parse.py @@ -1,10 +1,11 @@ """Parse comic book archive names using the simple 'parse' parser.""" -from pprint import pformat from calendar import month_abbr from copy import copy from pathlib import Path +from pprint import pformat from re import Match, Pattern from sys import maxsize + from comicfn2dict.log import print_log_header from comicfn2dict.regex import ( ALPHA_MONTH_RANGE_RE, @@ -18,8 +19,8 @@ from comicfn2dict.regex import ( ORIGINAL_FORMAT_SCAN_INFO_RE, ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE, PUBLISHER_AMBIGUOUS_RE, - PUBLISHER_UNAMBIGUOUS_RE, PUBLISHER_AMBIGUOUS_TOKEN_RE, + PUBLISHER_UNAMBIGUOUS_RE, PUBLISHER_UNAMBIGUOUS_TOKEN_RE, REGEX_SUBS, REMAINING_GROUP_RE, @@ -51,10 +52,7 @@ class ComicFilenameParser: if value not in self._path_indexes: # XXX This is fragile, but it's difficult to calculate the original # position at match time from the ever changing _unparsed_path. - if key == "ext": - index = self.path.rfind(value) - else: - index = self.path.find(value) + index = self.path.rfind(value) if key == "ext" else self.path.find(value) self._path_indexes[value] = index return self._path_indexes[value] @@ -65,8 +63,8 @@ class ComicFilenameParser: combined = {} for key in self.metadata: combined[key] = (self.metadata.get(key), self.path_index(key)) - print(" " + self._unparsed_path) - print(" " + pformat(combined)) + print(" " + self._unparsed_path) # noqa: T201 + print(" " + pformat(combined)) # noqa: T201 def _parse_ext(self) -> None: """Pop the extension from the pathname.""" @@ -121,7 +119,7 @@ class ComicFilenameParser: parts.append(token) self._unparsed_path = TOKEN_DELIMETER.join(parts) - def _parse_items( + def _parse_items( # noqa: PLR0913 self, regex: Pattern, require_all: bool = False, @@ -244,7 +242,7 @@ class ComicFilenameParser: self._log("After publisher") def _is_at_title_position(self, value: str) -> bool: - """Does the title come after series and one other token if they exist.""" + """Title is in correct position.""" title_index = self.path.find(value) # Titles must come after series but before format and scan_info @@ -286,9 +284,8 @@ class ComicFilenameParser: if not match: return token value = match.group() - if key == "title": - if not self._is_at_title_position(value): - return token + if key == "title" and not self._is_at_title_position(value): + return token value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value) value = self._grouping_operators_strip(value) if value: @@ -311,7 +308,6 @@ class ComicFilenameParser: unused_tokens.append(unused_token) remaining_key_index += 1 - print(f"{unused_tokens=}") self._unparsed_path = " ".join(unused_tokens) if unused_tokens else "" self._log("After Series & Title") @@ -363,6 +359,6 @@ class ComicFilenameParser: def comicfn2dict( path: str | Path, verbose: int = 0 ) -> dict[str, str | tuple[str, ...]]: - """Simple API.""" + """Simplfily the API.""" parser = ComicFilenameParser(path, verbose=verbose) return parser.parse() diff --git a/comicfn2dict/regex.py b/comicfn2dict/regex.py index 9168438..6daee7d 100644 --- a/comicfn2dict/regex.py +++ b/comicfn2dict/regex.py @@ -1,16 +1,8 @@ """Parsing regexes.""" -import re +from re import IGNORECASE, Pattern, compile from types import MappingProxyType - -def re_compile(exp, parenthify=False): - """Compile regex with options.""" - if parenthify: - exp = r"\(" + exp + r"\)" - return re.compile(exp, flags=re.IGNORECASE) - - -PUBLISHERS_UNAMBIGUOUS = ( +PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = ( r"Abrams ComicArts", r"BOOM! Studios", r"DC(\sComics)?", @@ -26,7 +18,7 @@ PUBLISHERS_UNAMBIGUOUS = ( r"SelfMadeHero", r"Titan Comics", ) -PUBLISHERS_AMBIGUOUS = ( +PUBLISHERS_AMBIGUOUS: tuple[str, ...] = ( r"Marvel", r"Heavy Metal", r"Epic", @@ -34,7 +26,7 @@ PUBLISHERS_AMBIGUOUS = ( r"Mirage", ) -ORIGINAL_FORMAT_PATTERNS = ( +ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = ( r"Anthology", r"(One|1)[-\s]Shot", r"Annual", @@ -63,7 +55,7 @@ ORIGINAL_FORMAT_PATTERNS = ( r"Web([-\s]?(Comic|Rip))?", ) -MONTHS = ( +MONTHS: tuple[str, ...] = ( r"Jan(uary)?", r"Feb(ruary)?", r"Mar(ch)?", @@ -78,7 +70,15 @@ MONTHS = ( r"Dec(ember)?", ) -TOKEN_DELIMETER = r"/" +TOKEN_DELIMETER: str = r"/" + + +def re_compile(exp: str, parenthify: bool = False) -> Pattern: + """Compile regex with options.""" + if parenthify: + exp = r"\(" + exp + r"\)" + return compile(exp, flags=IGNORECASE) + # CLEAN _TOKEN_DIVIDERS_RE = re_compile(r":") @@ -87,7 +87,7 @@ _EXTRA_SPACES_RE = re_compile(r"\s\s+") _LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[") _RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]") _DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__") -REGEX_SUBS: MappingProxyType[re.Pattern, tuple[str, int]] = MappingProxyType( +REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType( { _DOUBLE_UNDERSCORE_RE: (r"(\1)", 0), _TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1), @@ -104,7 +104,7 @@ _MONTH_ALPHA_RE_EXP = r"(" + "(?P" + r"|".join(MONTHS) + r")\.?" r" _MONTH_NUMERIC_RE_EXP = r"(?P0?\d|1[0-2]?)" _MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")" _ALPHA_MONTH_RANGE = ( - r"\b" + r"\b" # noqa: ISC003 + r"(" + r"|".join(MONTHS) + r")" @@ -115,7 +115,7 @@ _ALPHA_MONTH_RANGE = ( + r")" + r")\b" ) -ALPHA_MONTH_RANGE_RE = re_compile(_ALPHA_MONTH_RANGE) +ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE) _DAY_RE_EXP = r"(?P([0-2]?\d|(3)[0-1]))" _DATE_DELIM = r"[-\s]+" @@ -144,10 +144,10 @@ _YEAR_FIRST_DATE_RE_EXP = ( + r"\b\)?)" ) -MONTH_FIRST_DATE_RE = re_compile(_MONTH_FIRST_DATE_RE_EXP) -YEAR_FIRST_DATE_RE = re_compile(_YEAR_FIRST_DATE_RE_EXP) -YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True) -YEAR_END_RE = re_compile(_YEAR_RE_EXP + r"\/|$") +MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP) +YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP) +YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True) +YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$") # PAREN GROUPS _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS) @@ -157,38 +157,38 @@ _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = ( _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?" ) # Keep this even though comicfn2dict doesn't use it directly -ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True) -ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile( +ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True) +ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile( _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True ) -ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile( +ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile( r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)" ) -SCAN_INFO_SECONDARY_RE = re_compile(r"\b(?Pc2c)\b") +SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?Pc2c)\b") # ISSUE _ISSUE_RE_EXP = r"(?P\w*(½|\d+)[\.\d+]*\w*)" _ISSUE_COUNT_RE_EXP = r"\(of\s*(?P\d+)\)" -ISSUE_NUMBER_RE = re_compile( +ISSUE_NUMBER_RE: Pattern = re_compile( r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?" ) -ISSUE_WITH_COUNT_RE = re_compile( +ISSUE_WITH_COUNT_RE: Pattern = re_compile( r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")" ) -ISSUE_END_RE = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))") -ISSUE_BEGIN_RE = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])") +ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))") +ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])") # Volume _VOLUME_COUNT_RE_EXP = r"\(of\s*(?P\d+)\)" -VOLUME_RE = re_compile( - r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P\d+)" +VOLUME_RE: Pattern = re_compile( + r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P\d+)" # noqa: ISC003 r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")" ) -VOLUME_WITH_COUNT_RE = re_compile( +VOLUME_WITH_COUNT_RE: Pattern = re_compile( r"(\(?" + r"(?P\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")" ) -BOOK_VOLUME_RE = re_compile(r"(?P" + r"book\s*(?P<volume>\d+)" + r")") +BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")") # Publisher _PUBLISHER_UNAMBIGUOUS_RE_EXP = ( @@ -197,15 +197,15 @@ _PUBLISHER_UNAMBIGUOUS_RE_EXP = ( _PUBLISHER_AMBIGUOUS_RE_EXP = ( r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)" ) -PUBLISHER_UNAMBIGUOUS_TOKEN_RE = re_compile( +PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile( r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)" ) -PUBLISHER_AMBIGUOUS_TOKEN_RE = re_compile( +PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile( r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)" ) -PUBLISHER_UNAMBIGUOUS_RE = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP) +PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP) PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP) # LONG STRINGS -REMAINING_GROUP_RE = re_compile(r"^[^\(].*[^\)]") -NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)") +REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]") +NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)") diff --git a/comicfn2dict/unparse.py b/comicfn2dict/unparse.py index 7907113..2b454b1 100644 --- a/comicfn2dict/unparse.py +++ b/comicfn2dict/unparse.py @@ -1,8 +1,9 @@ """Unparse comic filenames.""" +from calendar import month_abbr from collections.abc import Callable, Mapping, Sequence from contextlib import suppress -from calendar import month_abbr from types import MappingProxyType + from comicfn2dict.log import print_log_header @@ -44,7 +45,7 @@ class ComicFilenameSerializer: if not self._debug: return print_log_header(label) - print(fn) + print(fn) # noqa: T201 def _add_date(self) -> None: """Construct date from Y-m-D if they exist.""" @@ -73,8 +74,7 @@ class ComicFilenameSerializer: if val in _EMPTY_VALUES: return "" final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt - token = final_fmt.format(val).strip() - return token + return final_fmt.format(val).strip() def _add_remainder(self) -> str: """Add the remainders specially.""" @@ -109,12 +109,13 @@ class ComicFilenameSerializer: return fn def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0): + """Initialize.""" self.metadata: Mapping = metadata self._ext: bool = ext self._debug: bool = bool(verbose) def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str: - """Simple API.""" + """Simplify API.""" serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose) return serializer.serialize() diff --git a/pyproject.toml b/pyproject.toml index 3d63bad..0fc18b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "comicfn2dict" -version = "0.2.0a3" +version = "0.2.0a4" description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli." license = "GPL-3.0-only" authors = ["AJ Slater <aj@slater.net>"] @@ -125,7 +125,7 @@ exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*, extend-exclude = ["typings"] target-version = "py310" -[tool.lint.ruff] +[tool.ruff.lint] extend-ignore = [ "S101", "D203", diff --git a/tests/comic_filenames.py b/tests/comic_filenames.py index b52edf9..3d00ddd 100644 --- a/tests/comic_filenames.py +++ b/tests/comic_filenames.py @@ -2,7 +2,6 @@ from types import MappingProxyType - TEST_COMIC_FIELDS = { "series": "Long Series Name", "issue": "001",