lint and add type notations
This commit is contained in:
parent
0a17bbc0d9
commit
32f8cb0f22
@ -3,10 +3,11 @@
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
|
||||
from comicfn2dict.parse import ComicFilenameParser
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
"""Test parser."""
|
||||
description = "Comic book archive read/write tool."
|
||||
parser = ArgumentParser(description=description)
|
||||
@ -23,7 +24,7 @@ def main():
|
||||
cfnparser = ComicFilenameParser(name, verbose=args.verbose)
|
||||
metadata = cfnparser.parse()
|
||||
if args.verbose:
|
||||
print("=" * 80)
|
||||
print("=" * 80) # noqa:T201
|
||||
pprint(metadata) # noqa:T203
|
||||
|
||||
|
||||
|
@ -6,4 +6,4 @@ def print_log_header(label: str) -> None:
|
||||
prefix = "-" * 3 + label
|
||||
suffix_len = 80 - len(prefix)
|
||||
suffix = "-" * suffix_len
|
||||
print(prefix + suffix)
|
||||
print(prefix + suffix) # noqa: T201
|
||||
|
@ -1,10 +1,11 @@
|
||||
"""Parse comic book archive names using the simple 'parse' parser."""
|
||||
from pprint import pformat
|
||||
from calendar import month_abbr
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
from pprint import pformat
|
||||
from re import Match, Pattern
|
||||
from sys import maxsize
|
||||
|
||||
from comicfn2dict.log import print_log_header
|
||||
from comicfn2dict.regex import (
|
||||
ALPHA_MONTH_RANGE_RE,
|
||||
@ -18,8 +19,8 @@ from comicfn2dict.regex import (
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||
PUBLISHER_AMBIGUOUS_RE,
|
||||
PUBLISHER_UNAMBIGUOUS_RE,
|
||||
PUBLISHER_AMBIGUOUS_TOKEN_RE,
|
||||
PUBLISHER_UNAMBIGUOUS_RE,
|
||||
PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
|
||||
REGEX_SUBS,
|
||||
REMAINING_GROUP_RE,
|
||||
@ -51,10 +52,7 @@ class ComicFilenameParser:
|
||||
if value not in self._path_indexes:
|
||||
# XXX This is fragile, but it's difficult to calculate the original
|
||||
# position at match time from the ever changing _unparsed_path.
|
||||
if key == "ext":
|
||||
index = self.path.rfind(value)
|
||||
else:
|
||||
index = self.path.find(value)
|
||||
index = self.path.rfind(value) if key == "ext" else self.path.find(value)
|
||||
self._path_indexes[value] = index
|
||||
return self._path_indexes[value]
|
||||
|
||||
@ -65,8 +63,8 @@ class ComicFilenameParser:
|
||||
combined = {}
|
||||
for key in self.metadata:
|
||||
combined[key] = (self.metadata.get(key), self.path_index(key))
|
||||
print(" " + self._unparsed_path)
|
||||
print(" " + pformat(combined))
|
||||
print(" " + self._unparsed_path) # noqa: T201
|
||||
print(" " + pformat(combined)) # noqa: T201
|
||||
|
||||
def _parse_ext(self) -> None:
|
||||
"""Pop the extension from the pathname."""
|
||||
@ -121,7 +119,7 @@ class ComicFilenameParser:
|
||||
parts.append(token)
|
||||
self._unparsed_path = TOKEN_DELIMETER.join(parts)
|
||||
|
||||
def _parse_items(
|
||||
def _parse_items( # noqa: PLR0913
|
||||
self,
|
||||
regex: Pattern,
|
||||
require_all: bool = False,
|
||||
@ -244,7 +242,7 @@ class ComicFilenameParser:
|
||||
self._log("After publisher")
|
||||
|
||||
def _is_at_title_position(self, value: str) -> bool:
|
||||
"""Does the title come after series and one other token if they exist."""
|
||||
"""Title is in correct position."""
|
||||
title_index = self.path.find(value)
|
||||
|
||||
# Titles must come after series but before format and scan_info
|
||||
@ -286,9 +284,8 @@ class ComicFilenameParser:
|
||||
if not match:
|
||||
return token
|
||||
value = match.group()
|
||||
if key == "title":
|
||||
if not self._is_at_title_position(value):
|
||||
return token
|
||||
if key == "title" and not self._is_at_title_position(value):
|
||||
return token
|
||||
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
|
||||
value = self._grouping_operators_strip(value)
|
||||
if value:
|
||||
@ -311,7 +308,6 @@ class ComicFilenameParser:
|
||||
unused_tokens.append(unused_token)
|
||||
remaining_key_index += 1
|
||||
|
||||
print(f"{unused_tokens=}")
|
||||
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
|
||||
self._log("After Series & Title")
|
||||
|
||||
@ -363,6 +359,6 @@ class ComicFilenameParser:
|
||||
def comicfn2dict(
|
||||
path: str | Path, verbose: int = 0
|
||||
) -> dict[str, str | tuple[str, ...]]:
|
||||
"""Simple API."""
|
||||
"""Simplfily the API."""
|
||||
parser = ComicFilenameParser(path, verbose=verbose)
|
||||
return parser.parse()
|
||||
|
@ -1,16 +1,8 @@
|
||||
"""Parsing regexes."""
|
||||
import re
|
||||
from re import IGNORECASE, Pattern, compile
|
||||
from types import MappingProxyType
|
||||
|
||||
|
||||
def re_compile(exp, parenthify=False):
|
||||
"""Compile regex with options."""
|
||||
if parenthify:
|
||||
exp = r"\(" + exp + r"\)"
|
||||
return re.compile(exp, flags=re.IGNORECASE)
|
||||
|
||||
|
||||
PUBLISHERS_UNAMBIGUOUS = (
|
||||
PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
|
||||
r"Abrams ComicArts",
|
||||
r"BOOM! Studios",
|
||||
r"DC(\sComics)?",
|
||||
@ -26,7 +18,7 @@ PUBLISHERS_UNAMBIGUOUS = (
|
||||
r"SelfMadeHero",
|
||||
r"Titan Comics",
|
||||
)
|
||||
PUBLISHERS_AMBIGUOUS = (
|
||||
PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
|
||||
r"Marvel",
|
||||
r"Heavy Metal",
|
||||
r"Epic",
|
||||
@ -34,7 +26,7 @@ PUBLISHERS_AMBIGUOUS = (
|
||||
r"Mirage",
|
||||
)
|
||||
|
||||
ORIGINAL_FORMAT_PATTERNS = (
|
||||
ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
|
||||
r"Anthology",
|
||||
r"(One|1)[-\s]Shot",
|
||||
r"Annual",
|
||||
@ -63,7 +55,7 @@ ORIGINAL_FORMAT_PATTERNS = (
|
||||
r"Web([-\s]?(Comic|Rip))?",
|
||||
)
|
||||
|
||||
MONTHS = (
|
||||
MONTHS: tuple[str, ...] = (
|
||||
r"Jan(uary)?",
|
||||
r"Feb(ruary)?",
|
||||
r"Mar(ch)?",
|
||||
@ -78,7 +70,15 @@ MONTHS = (
|
||||
r"Dec(ember)?",
|
||||
)
|
||||
|
||||
TOKEN_DELIMETER = r"/"
|
||||
TOKEN_DELIMETER: str = r"/"
|
||||
|
||||
|
||||
def re_compile(exp: str, parenthify: bool = False) -> Pattern:
|
||||
"""Compile regex with options."""
|
||||
if parenthify:
|
||||
exp = r"\(" + exp + r"\)"
|
||||
return compile(exp, flags=IGNORECASE)
|
||||
|
||||
|
||||
# CLEAN
|
||||
_TOKEN_DIVIDERS_RE = re_compile(r":")
|
||||
@ -87,7 +87,7 @@ _EXTRA_SPACES_RE = re_compile(r"\s\s+")
|
||||
_LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
|
||||
_RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
|
||||
_DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
|
||||
REGEX_SUBS: MappingProxyType[re.Pattern, tuple[str, int]] = MappingProxyType(
|
||||
REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
|
||||
{
|
||||
_DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
|
||||
_TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
|
||||
@ -104,7 +104,7 @@ _MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r"
|
||||
_MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
|
||||
_MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
|
||||
_ALPHA_MONTH_RANGE = (
|
||||
r"\b"
|
||||
r"\b" # noqa: ISC003
|
||||
+ r"("
|
||||
+ r"|".join(MONTHS)
|
||||
+ r")"
|
||||
@ -115,7 +115,7 @@ _ALPHA_MONTH_RANGE = (
|
||||
+ r")"
|
||||
+ r")\b"
|
||||
)
|
||||
ALPHA_MONTH_RANGE_RE = re_compile(_ALPHA_MONTH_RANGE)
|
||||
ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
|
||||
|
||||
_DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
|
||||
_DATE_DELIM = r"[-\s]+"
|
||||
@ -144,10 +144,10 @@ _YEAR_FIRST_DATE_RE_EXP = (
|
||||
+ r"\b\)?)"
|
||||
)
|
||||
|
||||
MONTH_FIRST_DATE_RE = re_compile(_MONTH_FIRST_DATE_RE_EXP)
|
||||
YEAR_FIRST_DATE_RE = re_compile(_YEAR_FIRST_DATE_RE_EXP)
|
||||
YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True)
|
||||
YEAR_END_RE = re_compile(_YEAR_RE_EXP + r"\/|$")
|
||||
MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
|
||||
YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
|
||||
YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
|
||||
YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
|
||||
|
||||
# PAREN GROUPS
|
||||
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
|
||||
@ -157,38 +157,38 @@ _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
|
||||
_ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
|
||||
)
|
||||
# Keep this even though comicfn2dict doesn't use it directly
|
||||
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
|
||||
ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
|
||||
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
|
||||
)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile(
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
|
||||
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
|
||||
)
|
||||
|
||||
SCAN_INFO_SECONDARY_RE = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
|
||||
SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
|
||||
|
||||
# ISSUE
|
||||
_ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
|
||||
_ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
|
||||
ISSUE_NUMBER_RE = re_compile(
|
||||
ISSUE_NUMBER_RE: Pattern = re_compile(
|
||||
r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
|
||||
)
|
||||
ISSUE_WITH_COUNT_RE = re_compile(
|
||||
ISSUE_WITH_COUNT_RE: Pattern = re_compile(
|
||||
r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
|
||||
)
|
||||
ISSUE_END_RE = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
|
||||
ISSUE_BEGIN_RE = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
|
||||
ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
|
||||
ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
|
||||
|
||||
# Volume
|
||||
_VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
|
||||
VOLUME_RE = re_compile(
|
||||
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)"
|
||||
VOLUME_RE: Pattern = re_compile(
|
||||
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" # noqa: ISC003
|
||||
r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
|
||||
)
|
||||
VOLUME_WITH_COUNT_RE = re_compile(
|
||||
VOLUME_WITH_COUNT_RE: Pattern = re_compile(
|
||||
r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
|
||||
)
|
||||
BOOK_VOLUME_RE = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
|
||||
BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
|
||||
|
||||
# Publisher
|
||||
_PUBLISHER_UNAMBIGUOUS_RE_EXP = (
|
||||
@ -197,15 +197,15 @@ _PUBLISHER_UNAMBIGUOUS_RE_EXP = (
|
||||
_PUBLISHER_AMBIGUOUS_RE_EXP = (
|
||||
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
|
||||
)
|
||||
PUBLISHER_UNAMBIGUOUS_TOKEN_RE = re_compile(
|
||||
PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||
r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
|
||||
)
|
||||
PUBLISHER_AMBIGUOUS_TOKEN_RE = re_compile(
|
||||
PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||
r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
|
||||
)
|
||||
PUBLISHER_UNAMBIGUOUS_RE = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
|
||||
PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
|
||||
PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
|
||||
|
||||
# LONG STRINGS
|
||||
REMAINING_GROUP_RE = re_compile(r"^[^\(].*[^\)]")
|
||||
NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)")
|
||||
REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
|
||||
NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")
|
||||
|
@ -1,8 +1,9 @@
|
||||
"""Unparse comic filenames."""
|
||||
from calendar import month_abbr
|
||||
from collections.abc import Callable, Mapping, Sequence
|
||||
from contextlib import suppress
|
||||
from calendar import month_abbr
|
||||
from types import MappingProxyType
|
||||
|
||||
from comicfn2dict.log import print_log_header
|
||||
|
||||
|
||||
@ -44,7 +45,7 @@ class ComicFilenameSerializer:
|
||||
if not self._debug:
|
||||
return
|
||||
print_log_header(label)
|
||||
print(fn)
|
||||
print(fn) # noqa: T201
|
||||
|
||||
def _add_date(self) -> None:
|
||||
"""Construct date from Y-m-D if they exist."""
|
||||
@ -73,8 +74,7 @@ class ComicFilenameSerializer:
|
||||
if val in _EMPTY_VALUES:
|
||||
return ""
|
||||
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
|
||||
token = final_fmt.format(val).strip()
|
||||
return token
|
||||
return final_fmt.format(val).strip()
|
||||
|
||||
def _add_remainder(self) -> str:
|
||||
"""Add the remainders specially."""
|
||||
@ -109,12 +109,13 @@ class ComicFilenameSerializer:
|
||||
return fn
|
||||
|
||||
def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
|
||||
"""Initialize."""
|
||||
self.metadata: Mapping = metadata
|
||||
self._ext: bool = ext
|
||||
self._debug: bool = bool(verbose)
|
||||
|
||||
|
||||
def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
|
||||
"""Simple API."""
|
||||
"""Simplify API."""
|
||||
serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
|
||||
return serializer.serialize()
|
||||
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "comicfn2dict"
|
||||
version = "0.2.0a3"
|
||||
version = "0.2.0a4"
|
||||
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
||||
license = "GPL-3.0-only"
|
||||
authors = ["AJ Slater <aj@slater.net>"]
|
||||
@ -125,7 +125,7 @@ exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,
|
||||
extend-exclude = ["typings"]
|
||||
target-version = "py310"
|
||||
|
||||
[tool.lint.ruff]
|
||||
[tool.ruff.lint]
|
||||
extend-ignore = [
|
||||
"S101",
|
||||
"D203",
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
from types import MappingProxyType
|
||||
|
||||
|
||||
TEST_COMIC_FIELDS = {
|
||||
"series": "Long Series Name",
|
||||
"issue": "001",
|
||||
|
Loading…
Reference in New Issue
Block a user