commit
624b64d6ca
60
.circleci/config.yml
Normal file
60
.circleci/config.yml
Normal file
@ -0,0 +1,60 @@
|
||||
jobs:
|
||||
build:
|
||||
machine:
|
||||
image: ubuntu-2204:current
|
||||
environment:
|
||||
DOCKER_CLI_EXPERIMENTAL: enabled
|
||||
DOCKER_BUILDKIT: 1
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
command: docker compose build comicfn2dict-builder
|
||||
name: Build Builder
|
||||
- run:
|
||||
command: ./bin/docker-compose-exit.sh comicfn2dict-lint
|
||||
name: comicfn2dict Lint
|
||||
- run:
|
||||
command: ./bin/docker-compose-exit.sh comicfn2dict-test
|
||||
name: comicfn2dict Test
|
||||
- store_test_results:
|
||||
path: test-results/pytest
|
||||
- store_artifacts:
|
||||
path: test-results/coverage
|
||||
- run:
|
||||
command: ./bin/docker-compose-exit.sh comicfn2dict-build
|
||||
name: Build comicfn2dict Dist
|
||||
- persist_to_workspace:
|
||||
paths:
|
||||
- ./README.md
|
||||
- ./bin
|
||||
- ./dist
|
||||
- ./pyproject.toml
|
||||
root: .
|
||||
deploy:
|
||||
docker:
|
||||
- image: cimg/python:3.12.1
|
||||
steps:
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run:
|
||||
command: ./bin/publish-pypi.sh
|
||||
version: 2.1
|
||||
workflows:
|
||||
main:
|
||||
jobs:
|
||||
- build:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- develop
|
||||
- pre-release
|
||||
- main
|
||||
- deploy:
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- pre-release
|
||||
- main
|
||||
requires:
|
||||
- build
|
||||
version: 2.1
|
13
.eslintignore
Normal file
13
.eslintignore
Normal file
@ -0,0 +1,13 @@
|
||||
!.circleci
|
||||
**/__pycache__
|
||||
*test-results*
|
||||
*~
|
||||
.git
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
.venv
|
||||
dist
|
||||
node_modules
|
||||
package-lock.json
|
||||
typings
|
101
.eslintrc.cjs
101
.eslintrc.cjs
@ -1,101 +0,0 @@
|
||||
module.exports = {
|
||||
root: true,
|
||||
env: {
|
||||
browser: true,
|
||||
es2022: true,
|
||||
node: true,
|
||||
},
|
||||
extends: [
|
||||
"eslint:recommended",
|
||||
// LANGS
|
||||
"plugin:json/recommended",
|
||||
"plugin:mdx/recommended",
|
||||
"plugin:yaml/recommended",
|
||||
// CODE QUALITY
|
||||
"plugin:sonarjs/recommended",
|
||||
"plugin:unicorn/all",
|
||||
// PRACTICES
|
||||
"plugin:array-func/recommended",
|
||||
"plugin:eslint-comments/recommended",
|
||||
"plugin:no-use-extend-native/recommended",
|
||||
"plugin:optimize-regex/all",
|
||||
"plugin:promise/recommended",
|
||||
"plugin:import/recommended",
|
||||
"plugin:switch-case/recommended",
|
||||
// PRETTIER
|
||||
"plugin:prettier/recommended",
|
||||
"prettier", // prettier-config
|
||||
// SECURITY
|
||||
"plugin:no-unsanitized/DOM",
|
||||
"plugin:security/recommended-legacy",
|
||||
],
|
||||
overrides: [
|
||||
{
|
||||
files: ["*.md"],
|
||||
rules: {
|
||||
"prettier/prettier": ["warn", { parser: "markdown" }],
|
||||
},
|
||||
},
|
||||
],
|
||||
parserOptions: {
|
||||
ecmaVersion: "latest",
|
||||
ecmaFeatures: {
|
||||
impliedStrict: true,
|
||||
},
|
||||
},
|
||||
plugins: [
|
||||
"array-func",
|
||||
"eslint-comments",
|
||||
"json",
|
||||
"import",
|
||||
"no-constructor-bind",
|
||||
"no-secrets",
|
||||
"no-unsanitized",
|
||||
"no-use-extend-native",
|
||||
"optimize-regex",
|
||||
"prettier",
|
||||
"promise",
|
||||
"simple-import-sort",
|
||||
"switch-case",
|
||||
"security",
|
||||
"sonarjs",
|
||||
"unicorn",
|
||||
"yaml",
|
||||
],
|
||||
rules: {
|
||||
"array-func/prefer-array-from": "off", // for modern browsers the spread operator, as preferred by unicorn, works fine.
|
||||
"max-params": ["warn", 4],
|
||||
"no-console": process.env.NODE_ENV === "production" ? "warn" : "off",
|
||||
"no-debugger": process.env.NODE_ENV === "production" ? "warn" : "off",
|
||||
"no-constructor-bind/no-constructor-bind": "error",
|
||||
"no-constructor-bind/no-constructor-state": "error",
|
||||
"no-secrets/no-secrets": "error",
|
||||
"eslint-comments/no-unused-disable": 1,
|
||||
"prettier/prettier": "warn",
|
||||
"security/detect-object-injection": "off",
|
||||
"simple-import-sort/exports": "warn",
|
||||
"simple-import-sort/imports": "warn",
|
||||
"space-before-function-paren": "off",
|
||||
"switch-case/newline-between-switch-case": "off", // Malfunctioning
|
||||
"unicorn/switch-case-braces": ["warn", "avoid"],
|
||||
"unicorn/prefer-node-protocol": 0,
|
||||
"unicorn/prevent-abbreviations": "off",
|
||||
"unicorn/filename-case": [
|
||||
"error",
|
||||
{ case: "kebabCase", ignore: [".*.md"] },
|
||||
],
|
||||
},
|
||||
ignorePatterns: [
|
||||
"*~",
|
||||
"**/__pycache__",
|
||||
".git",
|
||||
"!.circleci",
|
||||
".mypy_cache",
|
||||
".pytest_cache",
|
||||
".venv*",
|
||||
"dist",
|
||||
"package-lock.json",
|
||||
"test-results",
|
||||
"typings",
|
||||
],
|
||||
};
|
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@ -0,0 +1,20 @@
|
||||
FROM python:3.12.1-bookworm
|
||||
LABEL maintainer="AJ Slater <aj@slater.net>"
|
||||
|
||||
COPY debian.sources /etc/apt/sources.list.d/
|
||||
# hadolint ignore=DL3008
|
||||
RUN apt-get clean \
|
||||
&& apt-get update \
|
||||
&& apt-get install --no-install-recommends -y \
|
||||
bash \
|
||||
npm \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY bin ./bin
|
||||
COPY package.json package-lock.json pyproject.toml poetry.lock Makefile ./
|
||||
RUN make install-all
|
||||
|
||||
COPY . .
|
6
Makefile
6
Makefile
@ -1,28 +1,28 @@
|
||||
.PHONY: install-deps
|
||||
## Update pip and install poetry
|
||||
## @category Install
|
||||
install-deps:
|
||||
pip install --upgrade pip
|
||||
pip install --upgrade poetry
|
||||
npm install
|
||||
|
||||
.PHONY: install
|
||||
## Install for production
|
||||
## @category Install
|
||||
install-prod: install-deps
|
||||
poetry install --no-root --only-root
|
||||
npm install
|
||||
|
||||
.PHONY: install-dev
|
||||
## Install dev requirements
|
||||
## @category Install
|
||||
install-dev: install-deps
|
||||
poetry install --no-root --only-root --with dev
|
||||
npm install
|
||||
|
||||
.PHONY: install-all
|
||||
## Install with all extras
|
||||
## @category Install
|
||||
install-all: install-deps
|
||||
poetry install --no-root --all-extras
|
||||
npm install
|
||||
|
||||
.PHONY: clean
|
||||
## Clean pycaches
|
||||
|
19
NEWS.md
19
NEWS.md
@ -1,5 +1,24 @@
|
||||
# 📰 comicfn2dict News
|
||||
|
||||
## v0.2.0
|
||||
|
||||
- Titles are now parsed only if they occur after the series token AND after
|
||||
either issue, year or volume.
|
||||
- A more sophisticated date parser.
|
||||
- Issue numbers that lead with a '#' character may start with alphabetical
|
||||
characters.
|
||||
- If volume is parsed, but issue number is not, the issue number is copied from
|
||||
the volume number.
|
||||
- ComicFilenameParser and ComicFilenameSerializer classes are available as well
|
||||
as the old function API.
|
||||
- New test cases thanks to @lordwelch & @bpepple
|
||||
- Titles must come after series and one other token, but before format and scan
|
||||
info.
|
||||
|
||||
## v0.1.4
|
||||
|
||||
- Require Python 3.10
|
||||
|
||||
## v0.1.3
|
||||
|
||||
- Fix README
|
||||
|
16
README.md
16
README.md
@ -4,16 +4,30 @@ An API and CLI for extracting structured comic metadata from filenames.
|
||||
|
||||
## Install
|
||||
|
||||
<!-- eslint-skip -->
|
||||
|
||||
```sh
|
||||
pip install comicfn2dict
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
look at `comicfn2dict/comicfn2dict.py`
|
||||
<!-- eslint-skip -->
|
||||
|
||||
```python
|
||||
from comicfn2dict import comicfn2dict, dict2comicfn
|
||||
|
||||
path = "Comic Series #001 Title (2024).cbz"
|
||||
|
||||
metadata: dict[str, str| tuple[str,...]] = comicfn2dict(path, verbose=0)
|
||||
|
||||
filename: str = dict2comicfn(metadata, bool=True, verbose=0)
|
||||
```
|
||||
|
||||
## CLI
|
||||
|
||||
<!-- eslint-skip -->
|
||||
|
||||
```sh
|
||||
comicfn2dict "Series Name #01 - Title (2023).cbz"
|
||||
{'ext': 'cbz',
|
||||
|
6
bin/docker-compose-exit.sh
Executable file
6
bin/docker-compose-exit.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
# Run a docker compose service and return its exit code
|
||||
set -euo pipefail
|
||||
SERVICE=$1
|
||||
# docker compose without the dash doesn't have the exit-code-from param
|
||||
docker compose up --exit-code-from "$SERVICE" "$SERVICE"
|
7
bin/publish-pypi.sh
Executable file
7
bin/publish-pypi.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
# Publish the created package
|
||||
set -euo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
pip3 install --upgrade pip
|
||||
pip3 install --upgrade poetry
|
||||
poetry publish -u "$PYPI_USER" -p "$PYPI_PASS"
|
@ -1,3 +1,3 @@
|
||||
"""Comic Filename to Dict parser and unparser."""
|
||||
from .parse import comicfn2dict # noqa: F401
|
||||
from .unparse import dict2comicfn # noqa: F401
|
||||
from .parse import ComicFilenameParser, comicfn2dict # noqa: F401
|
||||
from .unparse import ComicFilenameSerializer, dict2comicfn # noqa: F401
|
||||
|
@ -4,17 +4,27 @@ from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from pprint import pprint
|
||||
|
||||
from comicfn2dict.parse import comicfn2dict
|
||||
from comicfn2dict.parse import ComicFilenameParser
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
"""Test parser."""
|
||||
description = "Comic book archive read/write tool."
|
||||
parser = ArgumentParser(description=description)
|
||||
parser.add_argument("path", help="Path of comic filename to parse", type=Path)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
default=0,
|
||||
action="count",
|
||||
help="Display intermediate parsing steps. Good for debugging.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
name = args.path.name
|
||||
metadata = comicfn2dict(name)
|
||||
cfnparser = ComicFilenameParser(name, verbose=args.verbose)
|
||||
metadata = cfnparser.parse()
|
||||
if args.verbose:
|
||||
print("=" * 80) # noqa:T201
|
||||
pprint(metadata) # noqa:T203
|
||||
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
"""API import source."""
|
||||
from comicfn2dict.parse import comicfn2dict # noqa: F401
|
||||
from comicfn2dict.unparse import dict2comicfn # noqa: F401
|
9
comicfn2dict/log.py
Normal file
9
comicfn2dict/log.py
Normal file
@ -0,0 +1,9 @@
|
||||
"""Print log header."""
|
||||
|
||||
|
||||
def print_log_header(label: str) -> None:
|
||||
"""Print log header."""
|
||||
prefix = "-" * 3 + label
|
||||
suffix_len = 80 - len(prefix)
|
||||
suffix = "-" * suffix_len
|
||||
print(prefix + suffix) # noqa: T201
|
@ -1,226 +1,364 @@
|
||||
"""Parse comic book archive names using the simple 'parse' parser."""
|
||||
import re
|
||||
from calendar import month_abbr
|
||||
from copy import copy
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from pprint import pformat
|
||||
from re import Match, Pattern
|
||||
from sys import maxsize
|
||||
|
||||
from comicfn2dict.log import print_log_header
|
||||
from comicfn2dict.regex import (
|
||||
DASH_SPLIT_RE,
|
||||
EXTRA_SPACES_RE,
|
||||
ISSUE_ANYWHERE_RE,
|
||||
ALPHA_MONTH_RANGE_RE,
|
||||
BOOK_VOLUME_RE,
|
||||
ISSUE_BEGIN_RE,
|
||||
ISSUE_COUNT_RE,
|
||||
ISSUE_END_RE,
|
||||
ISSUE_NUMBER_RE,
|
||||
ISSUE_TOKEN_RE,
|
||||
NON_SPACE_DIVIDER_RE,
|
||||
ORIGINAL_FORMAT_RE,
|
||||
ISSUE_WITH_COUNT_RE,
|
||||
MONTH_FIRST_DATE_RE,
|
||||
NON_NUMBER_DOT_RE,
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||
PUBLISHER_AMBIGUOUS_RE,
|
||||
PUBLISHER_AMBIGUOUS_TOKEN_RE,
|
||||
PUBLISHER_UNAMBIGUOUS_RE,
|
||||
PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
|
||||
REGEX_SUBS,
|
||||
REMAINING_GROUP_RE,
|
||||
SCAN_INFO_RE,
|
||||
SCAN_INFO_SECONDARY_RE,
|
||||
TOKEN_DELIMETER,
|
||||
VOLUME_RE,
|
||||
YEAR_BEGIN_RE,
|
||||
VOLUME_WITH_COUNT_RE,
|
||||
YEAR_END_RE,
|
||||
YEAR_FIRST_DATE_RE,
|
||||
YEAR_TOKEN_RE,
|
||||
)
|
||||
|
||||
_DATE_KEYS = frozenset({"year", "month", "day"})
|
||||
_REMAINING_GROUP_KEYS = ("series", "title")
|
||||
# Ordered by commonness.
|
||||
_TITLE_PRECEDING_KEYS = ("issue", "year", "volume", "month")
|
||||
|
||||
|
||||
def _parse_ext(name, suffix, metadata):
|
||||
class ComicFilenameParser:
|
||||
"""Parse a filename metadata into a dict."""
|
||||
|
||||
def path_index(self, key: str, default: int = -1) -> int:
|
||||
"""Lazily retrieve and memoize the key's location in the path."""
|
||||
if key == "remainders":
|
||||
return default
|
||||
value: str = self.metadata.get(key, "") # type: ignore
|
||||
if not value:
|
||||
return default
|
||||
if value not in self._path_indexes:
|
||||
# XXX This is fragile, but it's difficult to calculate the original
|
||||
# position at match time from the ever changing _unparsed_path.
|
||||
index = self.path.rfind(value) if key == "ext" else self.path.find(value)
|
||||
self._path_indexes[value] = index
|
||||
return self._path_indexes[value]
|
||||
|
||||
def _log(self, label: str) -> None:
|
||||
if not self._debug:
|
||||
return
|
||||
print_log_header(label)
|
||||
combined = {}
|
||||
for key in self.metadata:
|
||||
combined[key] = (self.metadata.get(key), self.path_index(key))
|
||||
print(" " + self._unparsed_path) # noqa: T201
|
||||
print(" " + pformat(combined)) # noqa: T201
|
||||
|
||||
def _parse_ext(self) -> None:
|
||||
"""Pop the extension from the pathname."""
|
||||
data = name.removesuffix(suffix)
|
||||
path = Path(self._unparsed_path)
|
||||
suffix = path.suffix
|
||||
if not suffix:
|
||||
return
|
||||
|
||||
data = path.name.removesuffix(suffix)
|
||||
ext = suffix.lstrip(".")
|
||||
if ext:
|
||||
metadata["ext"] = ext
|
||||
return data
|
||||
self.metadata["ext"] = ext
|
||||
self._unparsed_path = data
|
||||
|
||||
|
||||
def _clean_dividers(data):
|
||||
def _clean_dividers(self) -> None:
|
||||
"""Replace non space dividers and clean extra spaces out of string."""
|
||||
data = NON_SPACE_DIVIDER_RE.sub(" ", data)
|
||||
return EXTRA_SPACES_RE.sub(" ", data)
|
||||
data = self._unparsed_path
|
||||
|
||||
# Simple substitutions
|
||||
for regex, pair in REGEX_SUBS.items():
|
||||
replacement, count = pair
|
||||
data = regex.sub(replacement, data, count=count).strip()
|
||||
self._unparsed_path = data.strip()
|
||||
self._log("After Clean Path")
|
||||
|
||||
def _get_data_list(path, metadata):
|
||||
"""Prepare data list from a path or string."""
|
||||
def _parse_items_update_metadata(
|
||||
self, matches: Match, exclude: str, require_all: bool, first_only: bool
|
||||
) -> bool:
|
||||
"""Update Metadata."""
|
||||
matched_metadata = {}
|
||||
for key, value in matches.groupdict().items():
|
||||
if value == exclude:
|
||||
continue
|
||||
if not value:
|
||||
if require_all:
|
||||
return False
|
||||
continue
|
||||
matched_metadata[key] = value
|
||||
if first_only:
|
||||
break
|
||||
if not matched_metadata:
|
||||
return False
|
||||
self.metadata.update(matched_metadata)
|
||||
return True
|
||||
|
||||
def _parse_items_pop_tokens(self, regex: Pattern, first_only: bool) -> None:
|
||||
"""Pop tokens from unparsed path."""
|
||||
count = 1 if first_only else 0
|
||||
marked_str = regex.sub(TOKEN_DELIMETER, self._unparsed_path, count=count)
|
||||
parts = []
|
||||
for part in marked_str.split(TOKEN_DELIMETER):
|
||||
if token := part.strip():
|
||||
parts.append(token)
|
||||
self._unparsed_path = TOKEN_DELIMETER.join(parts)
|
||||
|
||||
def _parse_items( # noqa: PLR0913
|
||||
self,
|
||||
regex: Pattern,
|
||||
require_all: bool = False,
|
||||
exclude: str = "",
|
||||
first_only: bool = False,
|
||||
pop: bool = True,
|
||||
) -> None:
|
||||
"""Parse a value from the data list into metadata and alter the data list."""
|
||||
# Match
|
||||
matches = regex.search(self._unparsed_path)
|
||||
if not matches:
|
||||
return
|
||||
|
||||
if not self._parse_items_update_metadata(
|
||||
matches, exclude, require_all, first_only
|
||||
):
|
||||
return
|
||||
|
||||
if pop:
|
||||
self._parse_items_pop_tokens(regex, first_only)
|
||||
|
||||
def _parse_issue(self) -> None:
|
||||
"""Parse Issue."""
|
||||
self._parse_items(ISSUE_NUMBER_RE)
|
||||
if "issue" not in self.metadata:
|
||||
self._parse_items(ISSUE_WITH_COUNT_RE)
|
||||
self._log("After Issue")
|
||||
|
||||
def _parse_volume(self) -> None:
|
||||
"""Parse Volume."""
|
||||
self._parse_items(VOLUME_RE)
|
||||
if "volume" not in self.metadata:
|
||||
self._parse_items(VOLUME_WITH_COUNT_RE)
|
||||
self._log("After Volume")
|
||||
|
||||
def _alpha_month_to_numeric(self) -> None:
|
||||
"""Translate alpha_month to numeric month."""
|
||||
if alpha_month := self.metadata.pop("alpha_month", ""):
|
||||
alpha_month = alpha_month.capitalize() # type: ignore
|
||||
for index, abbr in enumerate(month_abbr):
|
||||
if abbr and alpha_month.startswith(abbr):
|
||||
month = f"{index:02d}"
|
||||
self.metadata["month"] = month
|
||||
break
|
||||
|
||||
def _parse_dates(self) -> None:
|
||||
"""Parse date schemes."""
|
||||
# Discard second month of alpha month ranges.
|
||||
self._unparsed_path = ALPHA_MONTH_RANGE_RE.sub(r"\1", self._unparsed_path)
|
||||
|
||||
# Month first date
|
||||
self._parse_items(MONTH_FIRST_DATE_RE)
|
||||
self._alpha_month_to_numeric()
|
||||
|
||||
# Year first date
|
||||
if _DATE_KEYS - self.metadata.keys():
|
||||
self._parse_items(YEAR_FIRST_DATE_RE)
|
||||
self._alpha_month_to_numeric()
|
||||
|
||||
if "year" not in self.metadata:
|
||||
self._parse_items(YEAR_TOKEN_RE, first_only=True)
|
||||
if "volume" in self.metadata:
|
||||
return
|
||||
# A second year will be the real year.
|
||||
# Move the first year to volume
|
||||
if volume := self.metadata.get("year", ""):
|
||||
self._parse_items(YEAR_TOKEN_RE)
|
||||
if self.metadata.get("year", "") != volume:
|
||||
self.metadata["volume"] = volume
|
||||
self._log("After Date")
|
||||
|
||||
def _parse_format_and_scan_info(self) -> None:
|
||||
"""Format & Scan Info."""
|
||||
self._parse_items(
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||
require_all=True,
|
||||
)
|
||||
if "original_format" not in self.metadata:
|
||||
self._parse_items(
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||
)
|
||||
self._parse_items(SCAN_INFO_SECONDARY_RE)
|
||||
if (
|
||||
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
|
||||
) and "scan_info" not in self.metadata:
|
||||
self.metadata["scan_info"] = scan_info_secondary # type: ignore
|
||||
self._log("After original_format & scan_info")
|
||||
|
||||
def _parse_ends_of_remaining_tokens(self):
|
||||
# Volume left on the end of string tokens
|
||||
if "volume" not in self.metadata:
|
||||
self._parse_items(BOOK_VOLUME_RE)
|
||||
self._log("After original_format & scan_info")
|
||||
|
||||
# Years left on the end of string tokens
|
||||
year_end_matched = False
|
||||
if "year" not in self.metadata:
|
||||
self._parse_items(YEAR_END_RE, pop=False)
|
||||
year_end_matched = "year" in self.metadata
|
||||
self._log("After Year on end of token")
|
||||
|
||||
# Issue left on the end of string tokens
|
||||
if "issue" not in self.metadata and not year_end_matched:
|
||||
exclude: str = self.metadata.get("year", "") # type: ignore
|
||||
self._parse_items(ISSUE_END_RE, exclude=exclude)
|
||||
if "issue" not in self.metadata:
|
||||
self._parse_items(ISSUE_BEGIN_RE)
|
||||
self._log("After Issue on ends of tokens")
|
||||
|
||||
def _parse_publisher(self) -> None:
|
||||
"""Parse Publisher."""
|
||||
# Pop single tokens so they don't end up titles.
|
||||
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
|
||||
if "publisher" not in self.metadata:
|
||||
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
|
||||
if "publisher" not in self.metadata:
|
||||
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
|
||||
if "publisher" not in self.metadata:
|
||||
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
|
||||
self._log("After publisher")
|
||||
|
||||
def _is_at_title_position(self, value: str) -> bool:
|
||||
"""Title is in correct position."""
|
||||
title_index = self.path.find(value)
|
||||
|
||||
# Titles must come after series but before format and scan_info
|
||||
if (
|
||||
title_index < self.path_index("series")
|
||||
or title_index > self.path_index("original_format", maxsize)
|
||||
or title_index > self.path_index("scan_info", maxsize)
|
||||
):
|
||||
return False
|
||||
|
||||
# Titles must be after the series and one other token.
|
||||
title_ok = False
|
||||
other_tokens_exist = False
|
||||
for preceding_key in _TITLE_PRECEDING_KEYS:
|
||||
other_tokens_exist = True
|
||||
if title_index > self.path_index(preceding_key):
|
||||
title_ok = True
|
||||
break
|
||||
return title_ok or not other_tokens_exist
|
||||
|
||||
def _grouping_operators_strip(self, value: str) -> str:
|
||||
"""Strip spaces and parens."""
|
||||
value = value.strip()
|
||||
value = value.strip("()").strip()
|
||||
value = value.strip("-").strip()
|
||||
value = value.strip(",").strip()
|
||||
value = value.strip("'").strip()
|
||||
return value.strip('"').strip()
|
||||
|
||||
def _parse_series_and_title_token(
|
||||
self, remaining_key_index: int, tokens: list[str]
|
||||
) -> str:
|
||||
"""Parse one series or title token."""
|
||||
key = _REMAINING_GROUP_KEYS[remaining_key_index]
|
||||
if key in self.metadata:
|
||||
return ""
|
||||
token = tokens.pop(0)
|
||||
match = REMAINING_GROUP_RE.search(token)
|
||||
if not match:
|
||||
return token
|
||||
value = match.group()
|
||||
if key == "title" and not self._is_at_title_position(value):
|
||||
return token
|
||||
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
|
||||
value = self._grouping_operators_strip(value)
|
||||
if value:
|
||||
self.metadata[key] = value
|
||||
return ""
|
||||
|
||||
def _parse_series_and_title(self) -> None:
|
||||
"""Assign series and title."""
|
||||
if not self._unparsed_path:
|
||||
return
|
||||
|
||||
remaining_key_index = 0
|
||||
unused_tokens = []
|
||||
tokens = self._unparsed_path.split(TOKEN_DELIMETER)
|
||||
while tokens and remaining_key_index < len(_REMAINING_GROUP_KEYS):
|
||||
unused_token = self._parse_series_and_title_token(
|
||||
remaining_key_index, tokens
|
||||
)
|
||||
if unused_token:
|
||||
unused_tokens.append(unused_token)
|
||||
remaining_key_index += 1
|
||||
|
||||
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
|
||||
self._log("After Series & Title")
|
||||
|
||||
def _add_remainders(self) -> None:
|
||||
"""Add Remainders."""
|
||||
remainders = []
|
||||
for token in self._unparsed_path.split(TOKEN_DELIMETER):
|
||||
if remainder := token.strip():
|
||||
remainders.append(remainder)
|
||||
|
||||
if remainders:
|
||||
self.metadata["remainders"] = tuple(remainders)
|
||||
|
||||
def parse(self) -> dict[str, str | tuple[str, ...]]:
|
||||
"""Parse the filename with a hierarchy of regexes."""
|
||||
self._log("Init")
|
||||
self._parse_ext()
|
||||
self._clean_dividers()
|
||||
self._parse_issue()
|
||||
self._parse_volume()
|
||||
self._parse_dates()
|
||||
self._parse_format_and_scan_info()
|
||||
self._parse_ends_of_remaining_tokens()
|
||||
self._parse_publisher()
|
||||
self._parse_series_and_title()
|
||||
|
||||
# Copy volume into issue if it's all we have.
|
||||
if "issue" not in self.metadata and "volume" in self.metadata:
|
||||
self.metadata["issue"] = self.metadata["volume"]
|
||||
self._log("After issue can be volume")
|
||||
|
||||
self._add_remainders()
|
||||
|
||||
return self.metadata
|
||||
|
||||
def __init__(self, path: str | Path, verbose: int = 0):
|
||||
"""Initialize."""
|
||||
self._debug: bool = verbose > 0
|
||||
# munge path
|
||||
if isinstance(path, str):
|
||||
path = path.strip()
|
||||
path = Path(path)
|
||||
data = _parse_ext(path.name, path.suffix, metadata)
|
||||
data = _clean_dividers(data)
|
||||
return DASH_SPLIT_RE.split(data)
|
||||
p_path = Path(path)
|
||||
self.path = str(p_path.name).strip()
|
||||
self.metadata: dict[str, str | tuple[str, ...]] = {}
|
||||
self._unparsed_path = copy(self.path)
|
||||
self._path_indexes: dict[str, int] = {}
|
||||
|
||||
|
||||
def _paren_strip(value: str):
|
||||
"""Strip spaces and parens."""
|
||||
return value.strip().strip("()").strip()
|
||||
|
||||
|
||||
def _splicey_dicey(data_list, index, match, match_group: Union[int, str] = 0):
|
||||
"""Replace a string token from a list with two strings and the value removed.
|
||||
|
||||
And return the value.
|
||||
"""
|
||||
value = match.group(match_group)
|
||||
data = data_list.pop(index)
|
||||
data_ends = []
|
||||
if data_before := data[: match.start()].strip():
|
||||
data_ends.append(data_before)
|
||||
if data_after := data[match.end() :].strip():
|
||||
data_ends.append(data_after)
|
||||
data_list[index:index] = data_ends
|
||||
return _paren_strip(value)
|
||||
|
||||
|
||||
def _parse_original_format_and_scan_info(data_list, metadata):
|
||||
"""Parse (ORIGINAL_FORMAT-SCAN_INFO)."""
|
||||
original_format = None
|
||||
scan_info = None
|
||||
index = 0
|
||||
match = None
|
||||
for data in data_list:
|
||||
match = ORIGINAL_FORMAT_SCAN_INFO_RE.search(data)
|
||||
if match:
|
||||
original_format = match.group("original_format")
|
||||
try:
|
||||
scan_info = match.group("scan_info")
|
||||
except IndexError:
|
||||
scan_info = None
|
||||
break
|
||||
index += 1
|
||||
if original_format:
|
||||
metadata["original_format"] = _paren_strip(original_format)
|
||||
match_group = 1
|
||||
if scan_info:
|
||||
metadata["scan_info"] = _paren_strip(scan_info)
|
||||
match_group = 0
|
||||
_splicey_dicey(data_list, index, match, match_group=match_group)
|
||||
else:
|
||||
index = 0
|
||||
return index
|
||||
|
||||
|
||||
def _pop_value_from_token(
|
||||
data_list: list,
|
||||
metadata: dict,
|
||||
regex: re.Pattern,
|
||||
key: str,
|
||||
index: int = 0,
|
||||
):
|
||||
"""Search token for value, splice and assign to metadata."""
|
||||
data = data_list[index]
|
||||
match = regex.search(data)
|
||||
if match:
|
||||
value = _splicey_dicey(data_list, index, match, key)
|
||||
metadata[key] = value
|
||||
return match
|
||||
|
||||
|
||||
def _parse_item(
|
||||
data_list,
|
||||
metadata,
|
||||
regex,
|
||||
key,
|
||||
start_index: int = 0,
|
||||
):
|
||||
"""Parse a value from the data list into metadata and alter the data list."""
|
||||
index = start_index
|
||||
dl_len = end_index = len(data_list)
|
||||
if index >= end_index:
|
||||
index = 0
|
||||
while index < end_index:
|
||||
match = _pop_value_from_token(data_list, metadata, regex, key, index)
|
||||
if match:
|
||||
break
|
||||
index += 1
|
||||
if index > dl_len and start_index > 0:
|
||||
index = 0
|
||||
end_index = start_index
|
||||
return index
|
||||
|
||||
|
||||
def _pop_issue_from_text_fields(data_list, metadata, index):
|
||||
"""Search issue from ends of text fields."""
|
||||
if "issue" not in metadata:
|
||||
_pop_value_from_token(data_list, metadata, ISSUE_END_RE, "issue", index=index)
|
||||
if "issue" not in metadata:
|
||||
_pop_value_from_token(data_list, metadata, ISSUE_BEGIN_RE, "issue", index=index)
|
||||
return data_list.pop(index)
|
||||
|
||||
|
||||
def _assign_remaining_groups(data_list, metadata):
|
||||
"""Assign series and title."""
|
||||
index = 0
|
||||
for key in _REMAINING_GROUP_KEYS:
|
||||
try:
|
||||
data = data_list[index]
|
||||
except (IndexError, TypeError):
|
||||
break
|
||||
match = REMAINING_GROUP_RE.search(data) if data else None
|
||||
if match:
|
||||
value = _pop_issue_from_text_fields(data_list, metadata, index)
|
||||
value = _paren_strip(value)
|
||||
if value:
|
||||
metadata[key] = value
|
||||
else:
|
||||
index += 1
|
||||
|
||||
|
||||
def _pickup_issue(remainders, metadata):
|
||||
"""Get issue from remaining tokens or anywhere in a pinch."""
|
||||
if "issue" in metadata:
|
||||
return
|
||||
_parse_item(remainders, metadata, ISSUE_TOKEN_RE, "issue")
|
||||
if "issue" in metadata:
|
||||
return
|
||||
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
|
||||
|
||||
|
||||
def comicfn2dict(path):
|
||||
"""Parse the filename with a hierarchy of regexes."""
|
||||
metadata = {}
|
||||
data_list = _get_data_list(path, metadata)
|
||||
|
||||
# Parse paren tokens
|
||||
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count")
|
||||
_parse_item(data_list, metadata, YEAR_TOKEN_RE, "year")
|
||||
of_index = _parse_original_format_and_scan_info(data_list, metadata)
|
||||
if "original_format" not in metadata:
|
||||
of_index = _parse_item(
|
||||
data_list, metadata, ORIGINAL_FORMAT_RE, "original_format"
|
||||
)
|
||||
if "scan_info" not in metadata:
|
||||
# Start searching for scan_info after original format.
|
||||
_parse_item(
|
||||
data_list,
|
||||
metadata,
|
||||
SCAN_INFO_RE,
|
||||
"scan_info",
|
||||
start_index=of_index + 1,
|
||||
)
|
||||
|
||||
# Parse regular tokens
|
||||
_parse_item(data_list, metadata, VOLUME_RE, "volume")
|
||||
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue")
|
||||
|
||||
# Pickup year if not gotten.
|
||||
if "year" not in metadata:
|
||||
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year")
|
||||
if "year" not in metadata:
|
||||
_parse_item(data_list, metadata, YEAR_END_RE, "year")
|
||||
|
||||
# Pickup issue if it's a standalone token
|
||||
if "issue" not in metadata:
|
||||
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue")
|
||||
|
||||
# Series and Title. Also looks for issue.
|
||||
_assign_remaining_groups(data_list, metadata)
|
||||
|
||||
# Final try for issue number.
|
||||
_pickup_issue(data_list, metadata)
|
||||
|
||||
# Add Remainders
|
||||
if data_list:
|
||||
metadata["remainders"] = tuple(data_list)
|
||||
|
||||
return metadata
|
||||
def comicfn2dict(
|
||||
path: str | Path, verbose: int = 0
|
||||
) -> dict[str, str | tuple[str, ...]]:
|
||||
"""Simplfily the API."""
|
||||
parser = ComicFilenameParser(path, verbose=verbose)
|
||||
return parser.parse()
|
||||
|
@ -1,15 +1,32 @@
|
||||
"""Parsing regexes."""
|
||||
import re
|
||||
from re import IGNORECASE, Pattern, compile
|
||||
from types import MappingProxyType
|
||||
|
||||
PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
|
||||
r"Abrams ComicArts",
|
||||
r"BOOM! Studios",
|
||||
r"DC(\sComics)?",
|
||||
r"Dark Horse Comics",
|
||||
r"Drawn & Quarterly",
|
||||
r"Dynamite Entertainment",
|
||||
r"IDW Publishing",
|
||||
r"Icon Comics",
|
||||
r"Kodansha",
|
||||
r"Oni Press",
|
||||
r"Pantheon Books",
|
||||
r"SLG Publishing",
|
||||
r"SelfMadeHero",
|
||||
r"Titan Comics",
|
||||
)
|
||||
PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
|
||||
r"(?<!Capt\.\s)(?<!Capt\s)(?<!Captain\s)Marvel",
|
||||
r"Heavy Metal",
|
||||
r"Epic",
|
||||
r"Image",
|
||||
r"Mirage",
|
||||
)
|
||||
|
||||
def re_compile(exp, parenthify=False):
|
||||
"""Compile regex with options."""
|
||||
if parenthify:
|
||||
exp = r"\(" + exp + r"\)"
|
||||
return re.compile(exp, flags=re.IGNORECASE)
|
||||
|
||||
|
||||
ORIGINAL_FORMAT_PATTERNS = (
|
||||
ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
|
||||
r"Anthology",
|
||||
r"(One|1)[-\s]Shot",
|
||||
r"Annual",
|
||||
@ -35,41 +52,160 @@ ORIGINAL_FORMAT_PATTERNS = (
|
||||
r"Sketch",
|
||||
r"TPB",
|
||||
r"Trade[-\s]Paper[-\s]?Back",
|
||||
r"Web([-\s]?Comic)?",
|
||||
r"Web([-\s]?(Comic|Rip))?",
|
||||
)
|
||||
|
||||
MONTHS: tuple[str, ...] = (
|
||||
r"Jan(uary)?",
|
||||
r"Feb(ruary)?",
|
||||
r"Mar(ch)?",
|
||||
r"Apr(il)?",
|
||||
r"May",
|
||||
r"Jun(e)?",
|
||||
r"Jul(y)?",
|
||||
r"Aug(ust)?",
|
||||
r"Sep(tember)?",
|
||||
r"Oct(ober)?",
|
||||
r"Nov(ember)?",
|
||||
r"Dec(ember)?",
|
||||
)
|
||||
|
||||
TOKEN_DELIMETER: str = r"/"
|
||||
|
||||
|
||||
def re_compile(exp: str, parenthify: bool = False) -> Pattern:
|
||||
"""Compile regex with options."""
|
||||
if parenthify:
|
||||
exp = r"\(" + exp + r"\)"
|
||||
return compile(exp, flags=IGNORECASE)
|
||||
|
||||
|
||||
# CLEAN
|
||||
NON_SPACE_DIVIDER_RE = re_compile(r"[_\+]")
|
||||
DASH_SPLIT_RE = re_compile(r"\s-\s")
|
||||
EXTRA_SPACES_RE = re_compile(r"\s\s+")
|
||||
_TOKEN_DIVIDERS_RE = re_compile(r":")
|
||||
_SPACE_EQUIVALENT_RE = re_compile(r"_")
|
||||
_EXTRA_SPACES_RE = re_compile(r"\s\s+")
|
||||
_LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
|
||||
_RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
|
||||
_DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
|
||||
REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
|
||||
{
|
||||
_DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
|
||||
_TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
|
||||
_SPACE_EQUIVALENT_RE: (r" ", 0),
|
||||
_EXTRA_SPACES_RE: (r" ", 0),
|
||||
_LEFT_PAREN_EQUIVALENT_RE: (r"(", 0),
|
||||
_RIGHT_PAREN_EQUIVALENT_RE: (r")", 0),
|
||||
}
|
||||
)
|
||||
|
||||
### DATES
|
||||
_YEAR_RE_EXP = r"(?P<year>[12]\d{3})"
|
||||
_MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r")"
|
||||
_MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
|
||||
_MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
|
||||
_ALPHA_MONTH_RANGE = (
|
||||
r"\b" # noqa: ISC003
|
||||
+ r"("
|
||||
+ r"|".join(MONTHS)
|
||||
+ r")"
|
||||
+ r"("
|
||||
+ r"\.?-"
|
||||
+ r"("
|
||||
+ r"|".join(MONTHS)
|
||||
+ r")"
|
||||
+ r")\b"
|
||||
)
|
||||
ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
|
||||
|
||||
_DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
|
||||
_DATE_DELIM = r"[-\s]+"
|
||||
_MONTH_FIRST_DATE_RE_EXP = (
|
||||
r"((\b|\(?)"
|
||||
# Month
|
||||
+ _MONTH_RE_EXP
|
||||
# Day
|
||||
+ r"("
|
||||
+ _DATE_DELIM
|
||||
+ _DAY_RE_EXP
|
||||
+ r")?"
|
||||
# Year
|
||||
+ r"[,]?"
|
||||
+ _DATE_DELIM
|
||||
+ _YEAR_RE_EXP
|
||||
+ r"(\)?|\b))"
|
||||
)
|
||||
_YEAR_FIRST_DATE_RE_EXP = (
|
||||
r"(\b\(?"
|
||||
+ _YEAR_RE_EXP
|
||||
+ _DATE_DELIM
|
||||
+ _MONTH_RE_EXP
|
||||
+ _DATE_DELIM
|
||||
+ _DAY_RE_EXP
|
||||
+ r"\b\)?)"
|
||||
)
|
||||
|
||||
MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
|
||||
YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
|
||||
YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
|
||||
YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
|
||||
|
||||
# PAREN GROUPS
|
||||
ISSUE_COUNT_RE = re_compile(r"of\s*(?P<issue_count>\d+)", parenthify=True)
|
||||
_YEAR_RE_EXP = r"(?P<year>[12]\d{3})"
|
||||
YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True)
|
||||
YEAR_BEGIN_RE = re_compile(r"^" + _YEAR_RE_EXP + r"\b")
|
||||
YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$")
|
||||
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
|
||||
_ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")"
|
||||
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
||||
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)"
|
||||
SCAN_INFO_RE = re_compile(_SCAN_INFO_RE_EXP, parenthify=True)
|
||||
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]*)"
|
||||
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
|
||||
_ORIGINAL_FORMAT_RE_EXP + r"(?:-" + _SCAN_INFO_RE_EXP + r")?"
|
||||
_ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
|
||||
)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
|
||||
# Keep this even though comicfn2dict doesn't use it directly
|
||||
ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
|
||||
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
|
||||
)
|
||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
|
||||
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
|
||||
)
|
||||
|
||||
# REGULAR TOKENS
|
||||
VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))")
|
||||
_ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)"
|
||||
ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_RE_EXP + r")")
|
||||
ISSUE_TOKEN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")$")
|
||||
ISSUE_END_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")$")
|
||||
ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b")
|
||||
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
|
||||
SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
|
||||
|
||||
# ISSUE
|
||||
_ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
|
||||
_ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
|
||||
ISSUE_NUMBER_RE: Pattern = re_compile(
|
||||
r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
|
||||
)
|
||||
ISSUE_WITH_COUNT_RE: Pattern = re_compile(
|
||||
r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
|
||||
)
|
||||
ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
|
||||
ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
|
||||
|
||||
# Volume
|
||||
_VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
|
||||
VOLUME_RE: Pattern = re_compile(
|
||||
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" # noqa: ISC003
|
||||
r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
|
||||
)
|
||||
VOLUME_WITH_COUNT_RE: Pattern = re_compile(
|
||||
r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
|
||||
)
|
||||
BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
|
||||
|
||||
# Publisher
|
||||
_PUBLISHER_UNAMBIGUOUS_RE_EXP = (
|
||||
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_UNAMBIGUOUS) + r")\b)"
|
||||
)
|
||||
_PUBLISHER_AMBIGUOUS_RE_EXP = (
|
||||
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
|
||||
)
|
||||
PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||
r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
|
||||
)
|
||||
PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||
r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
|
||||
)
|
||||
PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
|
||||
PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
|
||||
|
||||
# LONG STRINGS
|
||||
REMAINING_GROUP_RE = re_compile(r"^[\w].*[^\)]")
|
||||
REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
|
||||
NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")
|
||||
|
@ -1,8 +1,13 @@
|
||||
"""Unparse comic filenames."""
|
||||
from typing import Callable
|
||||
from calendar import month_abbr
|
||||
from collections.abc import Callable, Mapping, Sequence
|
||||
from contextlib import suppress
|
||||
from types import MappingProxyType
|
||||
|
||||
from comicfn2dict.log import print_log_header
|
||||
|
||||
|
||||
def issue_formatter(issue):
|
||||
def issue_formatter(issue: str) -> str:
|
||||
"""Formatter to zero pad issues."""
|
||||
i = 0
|
||||
issue = issue.lstrip("0")
|
||||
@ -14,37 +19,103 @@ def issue_formatter(issue):
|
||||
return "#{:0>" + str(pad) + "}"
|
||||
|
||||
|
||||
_PAREN_FMT = "({})"
|
||||
_FILENAME_FORMAT_TAGS = (
|
||||
_PAREN_FMT: str = "({})"
|
||||
_FILENAME_FORMAT_TAGS: tuple[tuple[str, str | Callable], ...] = (
|
||||
("series", "{}"),
|
||||
("volume", "v{}"),
|
||||
("volume_count", "(of {:03})"),
|
||||
("issue", issue_formatter),
|
||||
("issue_count", "(of {:03})"),
|
||||
("year", _PAREN_FMT),
|
||||
("date", _PAREN_FMT),
|
||||
("title", "{}"),
|
||||
("publisher", _PAREN_FMT),
|
||||
("original_format", _PAREN_FMT),
|
||||
("scan_info", _PAREN_FMT),
|
||||
)
|
||||
_EMPTY_VALUES = (None, "")
|
||||
_EMPTY_VALUES: tuple[None, str] = (None, "")
|
||||
_DEFAULT_EXT = "cbz"
|
||||
_DATE_KEYS = ("year", "month", "day")
|
||||
|
||||
|
||||
def dict2comicfn(md, ext=True):
|
||||
class ComicFilenameSerializer:
|
||||
"""Serialize Comic Filenames from dict."""
|
||||
|
||||
def _log(self, label: str, fn: str) -> None:
|
||||
"""Log progress."""
|
||||
if not self._debug:
|
||||
return
|
||||
print_log_header(label)
|
||||
print(fn) # noqa: T201
|
||||
|
||||
def _add_date(self) -> None:
|
||||
"""Construct date from Y-m-D if they exist."""
|
||||
if "date" in self.metadata:
|
||||
return
|
||||
parts = []
|
||||
for key in _DATE_KEYS:
|
||||
if part := self.metadata.get(key):
|
||||
if key == "month" and not parts:
|
||||
with suppress(TypeError):
|
||||
part = month_abbr[int(part)]
|
||||
|
||||
parts.append(part)
|
||||
if key == "month" and not parts:
|
||||
# noop if only day.
|
||||
break
|
||||
if parts:
|
||||
parts = (str(part) for part in parts)
|
||||
date = "-".join(parts)
|
||||
self._log("After date", date)
|
||||
self.metadata = MappingProxyType({**self.metadata, "date": date})
|
||||
|
||||
def _tokenize_tag(self, tag: str, fmt: str | Callable) -> str:
|
||||
"""Add tags to the string."""
|
||||
val = self.metadata.get(tag)
|
||||
if val in _EMPTY_VALUES:
|
||||
return ""
|
||||
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
|
||||
return final_fmt.format(val).strip()
|
||||
|
||||
def _add_remainder(self) -> str:
|
||||
"""Add the remainders specially."""
|
||||
if remainders := self.metadata.get("remainders"):
|
||||
if isinstance(remainders, Sequence):
|
||||
remainders = (str(remainder) for remainder in remainders)
|
||||
remainder = " ".join(remainders)
|
||||
else:
|
||||
remainder = str(remainders)
|
||||
return f"[{remainder}]"
|
||||
return ""
|
||||
|
||||
def serialize(self) -> str:
|
||||
"""Get our preferred basename from a metadata dict."""
|
||||
if not md:
|
||||
return None
|
||||
self._add_date()
|
||||
|
||||
tokens = []
|
||||
for tag, fmt in _FILENAME_FORMAT_TAGS:
|
||||
val = md.get(tag)
|
||||
if val in _EMPTY_VALUES:
|
||||
continue
|
||||
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
|
||||
token = final_fmt.format(val).strip()
|
||||
if token:
|
||||
if token := self._tokenize_tag(tag, fmt):
|
||||
tokens.append(token)
|
||||
self._log(f"After {tag}", str(tokens))
|
||||
fn = " ".join(tokens)
|
||||
if remainders := md.get("remainders"):
|
||||
remainder = " ".join(remainders)
|
||||
fn += f" - {remainder}"
|
||||
if ext:
|
||||
fn += "." + md.get("ext", "cbz")
|
||||
|
||||
fn += self._add_remainder()
|
||||
self._log("After remainder", fn)
|
||||
|
||||
if self._ext:
|
||||
ext = self.metadata.get("ext", _DEFAULT_EXT)
|
||||
fn += f".{ext}"
|
||||
self._log("After ext", fn)
|
||||
|
||||
return fn
|
||||
|
||||
def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
|
||||
"""Initialize."""
|
||||
self.metadata: Mapping = metadata
|
||||
self._ext: bool = ext
|
||||
self._debug: bool = bool(verbose)
|
||||
|
||||
|
||||
def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
|
||||
"""Simplify API."""
|
||||
serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
|
||||
return serializer.serialize()
|
||||
|
11
debian.sources
Normal file
11
debian.sources
Normal file
@ -0,0 +1,11 @@
|
||||
Types: deb
|
||||
URIs: http://deb.debian.org/debian
|
||||
Suites: bookworm bookworm-updates
|
||||
Components: main contrib non-free
|
||||
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
|
||||
|
||||
Types: deb
|
||||
URIs: http://deb.debian.org/debian-security
|
||||
Suites: bookworm-security
|
||||
Components: main contrib non-free
|
||||
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
|
21
docker-compose.yaml
Normal file
21
docker-compose.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
services:
|
||||
comicfn2dict-builder:
|
||||
build: .
|
||||
image: comicfn2dict-builder
|
||||
container_name: comicfn2dict-builder
|
||||
comicfn2dict-lint:
|
||||
image: comicfn2dict-builder
|
||||
container_name: comicfn2dict-lint
|
||||
command: make lint
|
||||
comicfn2dict-test:
|
||||
image: comicfn2dict-builder
|
||||
container_name: comicfn2dict-test
|
||||
command: make test
|
||||
volumes:
|
||||
- ./test-results/:/app/test-results/
|
||||
comicfn2dict-build:
|
||||
image: comicfn2dict-builder
|
||||
container_name: comicfn2dict-build
|
||||
volumes:
|
||||
- ./dist/:/app/dist/
|
||||
command: poetry build
|
186
eslint.config.js
Normal file
186
eslint.config.js
Normal file
@ -0,0 +1,186 @@
|
||||
import { FlatCompat } from "@eslint/eslintrc";
|
||||
import js from "@eslint/js";
|
||||
import arrayFunc from "eslint-plugin-array-func";
|
||||
// import plugin broken for flag config
|
||||
// https://github.com/import-js/eslint-plugin-import/issues/2556
|
||||
// import importPlugin from "eslint-plugin-import";
|
||||
import eslintPluginPrettierRecommended from "eslint-plugin-prettier/recommended";
|
||||
import pluginSecurity from "eslint-plugin-security";
|
||||
import eslintPluginUnicorn from "eslint-plugin-unicorn";
|
||||
import globals from "globals";
|
||||
|
||||
const compat = new FlatCompat();
|
||||
|
||||
export default [
|
||||
{
|
||||
languageOptions: {
|
||||
globals: {
|
||||
...globals.node,
|
||||
...globals.browser,
|
||||
},
|
||||
},
|
||||
linterOptions: {
|
||||
reportUnusedDisableDirectives: "warn",
|
||||
},
|
||||
plugins: {
|
||||
// import: importPlugin,
|
||||
unicorn: eslintPluginUnicorn,
|
||||
},
|
||||
rules: {
|
||||
"array-func/prefer-array-from": "off", // for modern browsers the spread operator, as preferred by unicorn, works fine.
|
||||
"max-params": ["warn", 4],
|
||||
"no-console": "warn",
|
||||
"no-debugger": "warn",
|
||||
"no-constructor-bind/no-constructor-bind": "error",
|
||||
"no-constructor-bind/no-constructor-state": "error",
|
||||
"no-secrets/no-secrets": "error",
|
||||
"prettier/prettier": "warn",
|
||||
"security/detect-object-injection": "off",
|
||||
"space-before-function-paren": "off",
|
||||
"unicorn/switch-case-braces": ["warn", "avoid"],
|
||||
"unicorn/prefer-node-protocol": 0,
|
||||
"unicorn/prevent-abbreviations": "off",
|
||||
"unicorn/filename-case": [
|
||||
"error",
|
||||
{ case: "kebabCase", ignore: [".*.md"] },
|
||||
],
|
||||
/*
|
||||
...importPlugin.configs["recommended"].rules,
|
||||
"import/no-unresolved": [
|
||||
"error",
|
||||
{
|
||||
ignore: ["^[@]"],
|
||||
},
|
||||
],
|
||||
*/
|
||||
},
|
||||
/*
|
||||
settings: {
|
||||
"import/parsers": {
|
||||
espree: [".js", ".cjs", ".mjs", ".jsx"],
|
||||
"@typescript-eslint/parser": [".ts"],
|
||||
},
|
||||
"import/resolver": {
|
||||
typescript: true,
|
||||
node: true,
|
||||
},
|
||||
},
|
||||
*/
|
||||
},
|
||||
js.configs.recommended,
|
||||
arrayFunc.configs.all,
|
||||
pluginSecurity.configs.recommended,
|
||||
eslintPluginPrettierRecommended,
|
||||
...compat.config({
|
||||
root: true,
|
||||
env: {
|
||||
browser: true,
|
||||
es2024: true,
|
||||
node: true,
|
||||
},
|
||||
extends: [
|
||||
// LANGS
|
||||
"plugin:jsonc/recommended-with-jsonc",
|
||||
"plugin:markdown/recommended",
|
||||
"plugin:toml/recommended",
|
||||
"plugin:yml/standard",
|
||||
"plugin:yml/prettier",
|
||||
// CODE QUALITY
|
||||
"plugin:sonarjs/recommended",
|
||||
// PRACTICES
|
||||
"plugin:eslint-comments/recommended",
|
||||
// "plugin:import/recommended",
|
||||
"plugin:no-use-extend-native/recommended",
|
||||
"plugin:optimize-regex/all",
|
||||
"plugin:promise/recommended",
|
||||
"plugin:switch-case/recommended",
|
||||
// SECURITY
|
||||
"plugin:no-unsanitized/DOM",
|
||||
],
|
||||
overrides: [
|
||||
{
|
||||
files: ["**/*.md"],
|
||||
processor: "markdown/markdown",
|
||||
rules: {
|
||||
"prettier/prettier": ["warn", { parser: "markdown" }],
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ["**/*.md/*.js"], // Will match js code inside *.md files
|
||||
rules: {
|
||||
"no-unused-vars": "off",
|
||||
"no-undef": "off",
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ["**/*.md/*.sh"],
|
||||
rules: {
|
||||
"prettier/prettier": ["error", { parser: "sh" }],
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ["*.yaml", "*.yml"],
|
||||
//parser: "yaml-eslint-parser",
|
||||
rules: {
|
||||
"unicorn/filename-case": "off",
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ["*.toml"],
|
||||
//parser: "toml-eslint-parser",
|
||||
rules: {
|
||||
"prettier/prettier": ["error", { parser: "toml" }],
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ["*.json", "*.json5", "*.jsonc"],
|
||||
//parser: "jsonc-eslint-parser",
|
||||
},
|
||||
],
|
||||
parserOptions: {
|
||||
ecmaFeatures: {
|
||||
impliedStrict: true,
|
||||
},
|
||||
ecmaVersion: "latest",
|
||||
},
|
||||
plugins: [
|
||||
"eslint-comments",
|
||||
//"import",
|
||||
"markdown",
|
||||
"no-constructor-bind",
|
||||
"no-secrets",
|
||||
"no-unsanitized",
|
||||
"no-use-extend-native",
|
||||
"optimize-regex",
|
||||
"promise",
|
||||
"simple-import-sort",
|
||||
"sonarjs",
|
||||
"switch-case",
|
||||
"unicorn",
|
||||
],
|
||||
rules: {
|
||||
"no-constructor-bind/no-constructor-bind": "error",
|
||||
"no-constructor-bind/no-constructor-state": "error",
|
||||
"no-secrets/no-secrets": "error",
|
||||
"eslint-comments/no-unused-disable": 1,
|
||||
"simple-import-sort/exports": "warn",
|
||||
"simple-import-sort/imports": "warn",
|
||||
"switch-case/newline-between-switch-case": "off", // Malfunctioning
|
||||
},
|
||||
ignorePatterns: [
|
||||
"*~",
|
||||
"**/__pycache__",
|
||||
".git",
|
||||
"!.circleci",
|
||||
".mypy_cache",
|
||||
".ruff_cache",
|
||||
".pytest_cache",
|
||||
".venv*",
|
||||
"dist",
|
||||
"node_modules",
|
||||
"package-lock.json",
|
||||
"test-results",
|
||||
"typings",
|
||||
],
|
||||
}),
|
||||
];
|
6335
package-lock.json
generated
6335
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
37
package.json
37
package.json
@ -1,10 +1,10 @@
|
||||
{
|
||||
"version": "0.1.0",
|
||||
"description": "linting",
|
||||
"version": "0.2.0",
|
||||
"description": "comicfn2dict linting",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"fix": "eslint_d --cache --fix --ignore-pattern frontend --ext .cjs,.mjs,.js,.json,.yaml,.md . && prettier --write .",
|
||||
"lint": "eslint_d --cache --ignore-pattern frontend --ext .cjs,.mjs,.js,.json,.yaml,.md . && prettier --check .",
|
||||
"fix": "eslint --cache --fix . && prettier --write .",
|
||||
"lint": "eslint --cache . && prettier --check .",
|
||||
"remark-check": "remark .",
|
||||
"remark-fix": "remark . --output"
|
||||
},
|
||||
@ -13,12 +13,13 @@
|
||||
"@prettier/plugin-xml",
|
||||
"prettier-plugin-nginx",
|
||||
"prettier-plugin-packagejson",
|
||||
"prettier-plugin-sh"
|
||||
"prettier-plugin-sh",
|
||||
"prettier-plugin-toml"
|
||||
],
|
||||
"overrides": [
|
||||
{
|
||||
"files": [
|
||||
"*.md"
|
||||
"**/*.md"
|
||||
],
|
||||
"options": {
|
||||
"proseWrap": "always"
|
||||
@ -28,6 +29,7 @@
|
||||
},
|
||||
"remarkConfig": {
|
||||
"plugins": [
|
||||
"gfm",
|
||||
"preset-lint-consistent",
|
||||
"preset-lint-recommended",
|
||||
"preset-lint-markdown-style-guide",
|
||||
@ -42,36 +44,37 @@
|
||||
"@prettier/plugin-xml": "^3.0.0",
|
||||
"eslint": "^8.34.0",
|
||||
"eslint-config-prettier": "^9.0.0",
|
||||
"eslint-plugin-array-func": "^4.0.0",
|
||||
"eslint-plugin-array-func": "^5.0.1",
|
||||
"eslint-plugin-eslint-comments": "^3.2.0",
|
||||
"eslint-plugin-import": "^2.25.4",
|
||||
"eslint-plugin-json": "^3.1.0",
|
||||
"eslint-plugin-mdx": "^3.0.0",
|
||||
"eslint-plugin-jsonc": "^2.13.0",
|
||||
"eslint-plugin-markdown": "^3.0.0",
|
||||
"eslint-plugin-no-constructor-bind": "^2.0.4",
|
||||
"eslint-plugin-no-secrets": "^0.8.9",
|
||||
"eslint-plugin-no-unsanitized": "^4.0.0",
|
||||
"eslint-plugin-no-use-extend-native": "^0.5.0",
|
||||
"eslint-plugin-only-warn": "^1.0.2",
|
||||
"eslint-plugin-optimize-regex": "^1.2.0",
|
||||
"eslint-plugin-prettier": "^5.0.0-alpha.2",
|
||||
"eslint-plugin-promise": "^6.0.0",
|
||||
"eslint-plugin-scanjs-rules": "^0.2.1",
|
||||
"eslint-plugin-security": "^2.1.0",
|
||||
"eslint-plugin-simple-import-sort": "^10.0.0",
|
||||
"eslint-plugin-sonarjs": "^0.23.0",
|
||||
"eslint-plugin-simple-import-sort": "^12.0.0",
|
||||
"eslint-plugin-sonarjs": "^0.24.0",
|
||||
"eslint-plugin-switch-case": "^1.1.2",
|
||||
"eslint-plugin-unicorn": "^50.0.1",
|
||||
"eslint-plugin-yaml": "^0.5.0",
|
||||
"eslint-plugin-toml": "^0.9.2",
|
||||
"eslint-plugin-unicorn": "^51.0.1",
|
||||
"eslint-plugin-yml": "^1.12.2",
|
||||
"eslint_d": "^13.0.0",
|
||||
"prettier": "^3.0.0",
|
||||
"prettier-plugin-nginx": "^1.0.3",
|
||||
"prettier-plugin-packagejson": "^2.4.4",
|
||||
"prettier-plugin-sh": "^0.13.0",
|
||||
"prettier-plugin-sh": "^0.14.0",
|
||||
"prettier-plugin-toml": "^2.0.1",
|
||||
"remark-cli": "^12.0.0",
|
||||
"remark-gfm": "^4.0.0",
|
||||
"remark-preset-lint-consistent": "^5.1.1",
|
||||
"remark-preset-lint-markdown-style-guide": "^5.1.2",
|
||||
"remark-preset-lint-recommended": "^6.1.2",
|
||||
"remark-preset-prettier": "^2.0.1",
|
||||
"toml": "^3.0.0"
|
||||
"remark-preset-prettier": "^2.0.1"
|
||||
}
|
||||
}
|
||||
|
942
poetry.lock
generated
942
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "comicfn2dict"
|
||||
version = "0.1.3"
|
||||
version = "0.2.0a4"
|
||||
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
||||
license = "GPL-3.0-only"
|
||||
authors = ["AJ Slater <aj@slater.net>"]
|
||||
@ -20,12 +20,15 @@ classifiers = [
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
]
|
||||
packages = [{ include = "comicfn2dict" }, { include = "tests", format = "sdist" }]
|
||||
packages = [
|
||||
{ include = "comicfn2dict" },
|
||||
{ include = "tests", format = "sdist" },
|
||||
]
|
||||
exclude = ["*/**/*~"]
|
||||
include = []
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
python = "^3.10"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
neovim = "^0.3.1"
|
||||
@ -42,7 +45,7 @@ pytest-gitignore = "^1.3"
|
||||
codespell = "^2.1.0"
|
||||
pyright = "^1.1.232"
|
||||
radon = { version = "^6.0.1", extras = ["toml"] }
|
||||
ruff = "^0.1.2"
|
||||
ruff = "^0.2.1"
|
||||
types-python-dateutil = "^2.8.19"
|
||||
vulture = "^2.3"
|
||||
|
||||
@ -77,7 +80,7 @@ omit = [
|
||||
"dist/*",
|
||||
"node_modules/*",
|
||||
"test-results/*",
|
||||
"typings/*"
|
||||
"typings/*",
|
||||
]
|
||||
|
||||
[tool.pyright]
|
||||
@ -98,12 +101,10 @@ exclude = [
|
||||
useLibraryCodeForTypes = true
|
||||
reportMissingImports = true
|
||||
reportImportCycles = true
|
||||
pythonVersion = "3.9"
|
||||
pythonVersion = "3.10"
|
||||
pythonPlatform = "All"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
junit_family = "xunit2"
|
||||
# --black
|
||||
addopts = """
|
||||
--junit-xml=test-results/pytest/results.xml
|
||||
-ra
|
||||
@ -113,21 +114,38 @@ addopts = """
|
||||
--cov-append
|
||||
--cov-report=html
|
||||
--cov-report=term
|
||||
--ignore=.git
|
||||
--ignore=cache
|
||||
--ignore=frontend
|
||||
--ignore=typings
|
||||
"""
|
||||
junit_family = "xunit2"
|
||||
testpaths = "tests"
|
||||
|
||||
[tool.radon]
|
||||
exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,dist/*,node_modules/*,test-results/*,typings/*"
|
||||
|
||||
[tool.ruff]
|
||||
extend-exclude = ["typings"]
|
||||
extend-ignore = ["S101", "D203", "D213",
|
||||
target-version = "py310"
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-ignore = [
|
||||
"S101",
|
||||
"D203",
|
||||
"D213",
|
||||
# Format ignores
|
||||
"W191", "E501", "E111", "E114", "E117", "D206", "D300", "Q000", "Q001",
|
||||
"Q002", "Q003", "COM812", "COM819", "ISC001", "ISC002"
|
||||
"W191",
|
||||
"E501",
|
||||
"E111",
|
||||
"E114",
|
||||
"E117",
|
||||
"D206",
|
||||
"D300",
|
||||
"Q000",
|
||||
"Q001",
|
||||
"Q002",
|
||||
"Q003",
|
||||
"COM812",
|
||||
"COM819",
|
||||
"ISC001",
|
||||
"ISC002",
|
||||
]
|
||||
extend-select = [
|
||||
"A",
|
||||
@ -168,19 +186,16 @@ extend-select = [
|
||||
"TRY",
|
||||
"UP",
|
||||
"W",
|
||||
"YTT"
|
||||
"YTT",
|
||||
# "ANN", "ERA", "COM"
|
||||
]
|
||||
external = ["V101"]
|
||||
# format = "grouped"
|
||||
# show-source = true
|
||||
target-version = "py39"
|
||||
task-tags = ["TODO", "FIXME", "XXX", "http", "HACK"]
|
||||
|
||||
[tool.ruff.per-file-ignores]
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/*" = ["SLF001", "T201", "T203"]
|
||||
|
||||
[tool.ruff.pycodestyle]
|
||||
[tool.ruff.lint.pycodestyle]
|
||||
ignore-overlong-task-comments = true
|
||||
|
||||
[tool.vulture]
|
||||
|
@ -1,5 +1,7 @@
|
||||
"""Test filenames with human parsed correct results."""
|
||||
|
||||
from types import MappingProxyType
|
||||
|
||||
TEST_COMIC_FIELDS = {
|
||||
"series": "Long Series Name",
|
||||
"issue": "001",
|
||||
@ -22,6 +24,7 @@ TEST_COMIC_FIELDS_VOL = {
|
||||
TEST_COMIC_VOL_ONLY = {
|
||||
"series": "Long Series Name",
|
||||
"volume": "1",
|
||||
"issue": "1",
|
||||
"title": "Title",
|
||||
"original_format": "TPB",
|
||||
"year": "2000",
|
||||
@ -29,6 +32,7 @@ TEST_COMIC_VOL_ONLY = {
|
||||
"ext": "cbr",
|
||||
}
|
||||
|
||||
# Tests for 0.1.0
|
||||
FNS = {
|
||||
"Night of 1000 Wolves 001 (2013).cbz": {
|
||||
"series": "Night of 1000 Wolves",
|
||||
@ -51,11 +55,6 @@ FNS = {
|
||||
"Long Series Name #001 (2000) Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||
"Long Series Name (2000) 001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||
"Long Series Name (2000) #001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||
"Long Series Name v1 (2000) #001 "
|
||||
"Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS_VOL,
|
||||
"Long Series Name 001 (2000) (TPB-Releaser) Title.cbz": TEST_COMIC_FIELDS,
|
||||
"Long Series Name Vol 1 "
|
||||
"(2000) (TPB) (Releaser & Releaser-Releaser) Title.cbr": TEST_COMIC_VOL_ONLY,
|
||||
"Ultimate Craziness (2019) (Digital) (Friends-of-Bill).cbr": {
|
||||
"series": "Ultimate Craziness",
|
||||
"year": "2019",
|
||||
@ -73,26 +72,17 @@ FNS = {
|
||||
"Arkenstone Vol. 01 - The Smell of Burnt Toast (2020) (digital) (My-brother).cbr": {
|
||||
"series": "Arkenstone",
|
||||
"volume": "01",
|
||||
"issue": "01",
|
||||
"year": "2020",
|
||||
"ext": "cbr",
|
||||
"scan_info": "My-brother",
|
||||
"title": "The Smell of Burnt Toast",
|
||||
"original_format": "digital",
|
||||
},
|
||||
"Bardude - The Last Thing I Remember.cbz": {
|
||||
"series": "Bardude",
|
||||
"title": "The Last Thing I Remember",
|
||||
"ext": "cbz",
|
||||
},
|
||||
"Drunkguy - The Man Without Fear - 01.cbz": {
|
||||
"series": "Drunkguy",
|
||||
"title": "The Man Without Fear",
|
||||
"issue": "01",
|
||||
"ext": "cbz",
|
||||
},
|
||||
"The_Arkenstone_v03_(2002)_(Digital)_(DR_&_Quenya-Elves).cbr": {
|
||||
"series": "The Arkenstone",
|
||||
"volume": "03",
|
||||
"issue": "03",
|
||||
"year": "2002",
|
||||
"ext": "cbr",
|
||||
"scan_info": "DR & Quenya-Elves",
|
||||
@ -111,6 +101,7 @@ FNS = {
|
||||
"Kartalk Library Edition v01 (1992) (digital) (Son of Ultron-Empire).cbr": {
|
||||
"series": "Kartalk Library Edition",
|
||||
"volume": "01",
|
||||
"issue": "01",
|
||||
"year": "1992",
|
||||
"ext": "cbr",
|
||||
"original_format": "digital",
|
||||
@ -119,15 +110,15 @@ FNS = {
|
||||
"Kind of Deadly v02 - Last Bullet (2006) (Digital) (Zone-Empire).cbr": {
|
||||
"series": "Kind of Deadly",
|
||||
"volume": "02",
|
||||
"issue": "02",
|
||||
"year": "2006",
|
||||
"ext": "cbr",
|
||||
"original_format": "Digital",
|
||||
"scan_info": "Zone-Empire",
|
||||
"title": "Last Bullet",
|
||||
},
|
||||
"Jeremy John - A Big Long Title (2017) (digital-Minutement).cbz": {
|
||||
"series": "Jeremy John",
|
||||
"title": "A Big Long Title",
|
||||
"Jeremy John - Not A Title (2017) (digital-Minutement).cbz": {
|
||||
"series": "Jeremy John - Not A Title",
|
||||
"year": "2017",
|
||||
"ext": "cbz",
|
||||
"original_format": "digital",
|
||||
@ -139,8 +130,7 @@ FNS = {
|
||||
"year": "2006",
|
||||
"ext": "cbz",
|
||||
"scan_info": "Minutemen-Faessla",
|
||||
# "original_format": "digital",
|
||||
"remainders": ("(digital",),
|
||||
"original_format": "digital",
|
||||
},
|
||||
"Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": {
|
||||
"series": "Jeremy John",
|
||||
@ -154,6 +144,7 @@ FNS = {
|
||||
"Jeremy John v01 - Uninterested! (2007) (Digital) (Asgard-Empire).cbr": {
|
||||
"series": "Jeremy John",
|
||||
"volume": "01",
|
||||
"issue": "01",
|
||||
"year": "2007",
|
||||
"ext": "cbr",
|
||||
"original_format": "Digital",
|
||||
@ -180,6 +171,7 @@ FNS = {
|
||||
"Darkwad by Carlos Zemo v01 - Knuckle Fight (2009) (Digital) (Zone-Empire).cbr": {
|
||||
"series": "Darkwad by Carlos Zemo",
|
||||
"volume": "01",
|
||||
"issue": "01",
|
||||
"year": "2009",
|
||||
"ext": "cbr",
|
||||
"title": "Knuckle Fight",
|
||||
@ -243,3 +235,273 @@ FNS = {
|
||||
"ext": "cbz",
|
||||
},
|
||||
}
|
||||
|
||||
# Tests for 0.2.0
|
||||
FNS.update(
|
||||
{
|
||||
# Philosopy change regarding dashes.
|
||||
"Bardude - The Last Thing I Remember.cbz": {
|
||||
"series": "Bardude - The Last Thing I Remember",
|
||||
"ext": "cbz",
|
||||
},
|
||||
"Drunkguy - The Man Without Fear - 01.cbz": {
|
||||
"series": "Drunkguy - The Man Without Fear",
|
||||
"issue": "01",
|
||||
"ext": "cbz",
|
||||
},
|
||||
# BIG Change. title after token. more stripping.
|
||||
"'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "022",
|
||||
"original_format": "Webrip",
|
||||
"series": "Batman - Superman - World's Finest",
|
||||
"scan_info": "The Last Kryptonian-DCP",
|
||||
"year": "2024",
|
||||
},
|
||||
# Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543
|
||||
# word characters now allowed to lead issue numbers only if preceded by a # marker
|
||||
"batman #B01 title.cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "B01",
|
||||
"series": "batman",
|
||||
"title": "title",
|
||||
},
|
||||
"Monster_Island_v1_#2__repaired__c2c.cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "2",
|
||||
"series": "Monster Island",
|
||||
"volume": "1",
|
||||
"scan_info": "c2c",
|
||||
"remainders": ("(repaired)",),
|
||||
},
|
||||
# Extra - in the series
|
||||
" X-Men-V1-#067.cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "067",
|
||||
"series": "X-Men",
|
||||
"volume": "1",
|
||||
"remainders": ("-",),
|
||||
},
|
||||
"Aquaman - Green Arrow - Deep Target #01 (of 07) (2021).cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "01",
|
||||
"series": "Aquaman - Green Arrow - Deep Target",
|
||||
"year": "2021",
|
||||
"issue_count": "07",
|
||||
},
|
||||
# CT only separates this into a title if the '-' is attached to the previous word eg 'aquaman- Green Arrow'. @bpepple opened a ticket for this https://github.com/ajslater/comicfn2dict/issues/1 already
|
||||
"Batman_-_Superman_#020_(2021).cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "020",
|
||||
"series": "Batman - Superman",
|
||||
"year": "2021",
|
||||
},
|
||||
# Publishers like to re-print some of their annuals using this format for the year
|
||||
"Batman '89 (2021) .cbr": {
|
||||
"ext": "cbr",
|
||||
"series": "Batman '89",
|
||||
"year": "2021",
|
||||
},
|
||||
# This made the parser in CT much more complicated. It's understandable that this isn't parsed on the first few iterations of this project
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 (2021).cbz": {
|
||||
"ext": "cbz",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"year": "2021",
|
||||
}, # The addition of the '#1' turns this into the same as 'Aquaman - Green Arrow - Deep Target' above
|
||||
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "1",
|
||||
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||
"year": "2021",
|
||||
},
|
||||
"Free Comic Book Day - Avengers.Hulk (2021).cbz": {
|
||||
"ext": "cbz",
|
||||
"series": "Free Comic Book Day - Avengers Hulk",
|
||||
"year": "2021",
|
||||
},
|
||||
# CT assumes the volume is also the issue number if it can't find an issue number
|
||||
"Avengers By Brian Michael Bendis volume 03 (2013).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "03",
|
||||
"series": "Avengers By Brian Michael Bendis",
|
||||
"volume": "03",
|
||||
"year": "2013",
|
||||
},
|
||||
# CT catches the year
|
||||
"Marvel Previews #002 (January 2022).cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "002",
|
||||
"series": "Marvel Previews",
|
||||
"publisher": "Marvel",
|
||||
"month": "01",
|
||||
"year": "2022",
|
||||
},
|
||||
"Test Numeric Year #2 2001-02-24.cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "2",
|
||||
"series": "Test Numeric Year",
|
||||
"year": "2001",
|
||||
"month": "02",
|
||||
"day": "24",
|
||||
},
|
||||
"Test Month First Date 02-24-2001.cbz": {
|
||||
"ext": "cbz",
|
||||
"series": "Test Month First Date",
|
||||
"year": "2001",
|
||||
"month": "02",
|
||||
"day": "24",
|
||||
},
|
||||
# CT notices that this is a full date, CT doesn't actually return the month or day though just removes it
|
||||
"X-Men, 2021-08-04 (#02).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "02",
|
||||
"series": "X-Men",
|
||||
"year": "2021",
|
||||
"month": "08",
|
||||
"day": "04",
|
||||
},
|
||||
# 4 digit issue number
|
||||
# should this be an issue number if year DONE?.
|
||||
"action comics 1024.cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "1024",
|
||||
"series": "action comics",
|
||||
},
|
||||
# This is a contrived test case. I've never seen this I just wanted to handle it with my parser
|
||||
"Cory Doctorow's Futuristic Tales of the Here and Now #0.0.1 (2007).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "0.0.1",
|
||||
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||
"year": "2007",
|
||||
},
|
||||
# CT treats ':' the same as '-' but here the ':' is attached to 'Now' which CT sees as a title separation
|
||||
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "001",
|
||||
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||
"title": "Anda's Game",
|
||||
"year": "2007",
|
||||
},
|
||||
# If a title ends in a year, it's not an issue (and is a year if no year)
|
||||
"Blade Runner Free Comic Book Day 2021 (2021).cbr": {
|
||||
"ext": "cbr",
|
||||
"series": "Blade Runner Free Comic Book Day 2021",
|
||||
"year": "2021",
|
||||
},
|
||||
# If a year occurs after another year, and no volume, do volume / year
|
||||
"Super Strange Yarns (1957) #92 (1969).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "92",
|
||||
"series": "Super Strange Yarns",
|
||||
"volume": "1957",
|
||||
"year": "1969",
|
||||
},
|
||||
# CT checks for the following '(of 06)' after the '03' and marks it as the volume
|
||||
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021).cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "008",
|
||||
"series": "Elephantmen 2259",
|
||||
"title": "Simple Truth",
|
||||
"volume": "03",
|
||||
"year": "2021",
|
||||
"volume_count": "06",
|
||||
},
|
||||
# CT treats book like 'v' but also adds it as the title (matches ComicVine for this particular series)
|
||||
"Bloodshot Book 03 (2020).cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "03",
|
||||
"series": "Bloodshot",
|
||||
"title": "Book 03",
|
||||
"volume": "03",
|
||||
"year": "2020",
|
||||
},
|
||||
# c2c aka "cover to cover" is fairly common and CT moves it to scan_info/remainder
|
||||
"Marvel Two In One V1 #090 c2c.cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "090",
|
||||
"series": "Marvel Two In One",
|
||||
"publisher": "Marvel",
|
||||
"volume": "1",
|
||||
"scan_info": "c2c",
|
||||
},
|
||||
# CT treats '[]' as equivalent to '()', catches DC as a publisher and 'Sep-Oct 1951' as dates and removes them. CT doesn't catch the digital though so that could be better but I blame whoever made this atrocious filename
|
||||
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "49",
|
||||
"series": "Wonder Woman",
|
||||
"publisher": "DC",
|
||||
"year": "1951",
|
||||
"month": "09",
|
||||
"remainders": (
|
||||
"digital (downsized, lightened, 4 missing story pages "
|
||||
"restored) (Shadowcat-Empire)",
|
||||
),
|
||||
},
|
||||
"Captain Science #001 (1950) The Beginning - nothing.cbz": {
|
||||
"ext": "cbz",
|
||||
"issue": "001",
|
||||
"title": "The Beginning - nothing",
|
||||
"series": "Captain Science",
|
||||
"year": "1950",
|
||||
},
|
||||
"Captain Science #001-cix-cbi.cbr": {
|
||||
"ext": "cbr",
|
||||
"issue": "001",
|
||||
"series": "Captain Science",
|
||||
"title": "cix-cbi",
|
||||
},
|
||||
"Long Series Name v1 (2000) #001 "
|
||||
"Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS_VOL,
|
||||
"Long Series Name 001 (2000) (TPB-Releaser) Title.cbz": {
|
||||
"series": "Long Series Name",
|
||||
"issue": "001",
|
||||
"year": "2000",
|
||||
"original_format": "TPB",
|
||||
"scan_info": "Releaser",
|
||||
"remainders": ("Title",),
|
||||
"ext": "cbz",
|
||||
},
|
||||
"Long Series Name Vol 1 "
|
||||
"(2000) (TPB) (Releaser & Releaser-Releaser) Title.cbr": {
|
||||
"series": "Long Series Name",
|
||||
"volume": "1",
|
||||
"issue": "1",
|
||||
"remainders": ("Title",),
|
||||
"original_format": "TPB",
|
||||
"year": "2000",
|
||||
"scan_info": "Releaser & Releaser-Releaser",
|
||||
"ext": "cbr",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# first_key, first_val = NEW.popitem()
|
||||
# FNS[first_key] = first_val
|
||||
PARSE_FNS = MappingProxyType(FNS)
|
||||
|
||||
SERIALIZE_FNS = MappingProxyType(
|
||||
{
|
||||
"Long Series Name #001 (2000) Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||
"Long Series Name v1 #001 "
|
||||
"(2000) Title (TPB) (Releaser & Releaser-Releaser).cbr": TEST_COMIC_VOL_ONLY,
|
||||
"Series Name (2000-12-31).cbz": {
|
||||
"series": "Series Name",
|
||||
"year": "2000",
|
||||
"month": "12",
|
||||
"day": "31",
|
||||
"ext": "cbz",
|
||||
},
|
||||
"Series Name (2000-12).cbz": {
|
||||
"series": "Series Name",
|
||||
"year": "2000",
|
||||
"month": "12",
|
||||
"ext": "cbz",
|
||||
},
|
||||
"Series Name (Dec-31).cbz": {
|
||||
"series": "Series Name",
|
||||
"month": "12",
|
||||
"day": "31",
|
||||
"ext": "cbz",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
@ -1,22 +1,18 @@
|
||||
"""Tests for filename parsing."""
|
||||
from pprint import pprint
|
||||
from types import MappingProxyType
|
||||
|
||||
import pytest
|
||||
from deepdiff.diff import DeepDiff
|
||||
|
||||
from comicfn2dict import comicfn2dict
|
||||
from tests.comic_filenames import FNS
|
||||
|
||||
ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"})
|
||||
FIELD_SCHEMA = MappingProxyType({key: None for key in ALL_FIELDS})
|
||||
from comicfn2dict import ComicFilenameParser
|
||||
from tests.comic_filenames import PARSE_FNS
|
||||
|
||||
|
||||
@pytest.mark.parametrize("item", FNS.items())
|
||||
@pytest.mark.parametrize("item", PARSE_FNS.items())
|
||||
def test_parse_filename(item):
|
||||
"""Test filename parsing."""
|
||||
fn, defined_fields = item
|
||||
md = comicfn2dict(fn)
|
||||
md = ComicFilenameParser(fn, verbose=1).parse()
|
||||
diff = DeepDiff(defined_fields, md, ignore_order=True)
|
||||
print(fn)
|
||||
pprint(defined_fields)
|
||||
|
13
tests/test_dict2comicfn.py
Normal file
13
tests/test_dict2comicfn.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""Tests for filename parsing."""
|
||||
import pytest
|
||||
|
||||
from comicfn2dict import ComicFilenameSerializer
|
||||
from tests.comic_filenames import SERIALIZE_FNS
|
||||
|
||||
|
||||
@pytest.mark.parametrize("item", SERIALIZE_FNS.items())
|
||||
def test_serialize_dict(item):
|
||||
"""Test metadata serialization."""
|
||||
test_fn, md = item
|
||||
fn = ComicFilenameSerializer(md).serialize()
|
||||
assert test_fn == fn
|
Loading…
Reference in New Issue
Block a user