commit
624b64d6ca
60
.circleci/config.yml
Normal file
60
.circleci/config.yml
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
machine:
|
||||||
|
image: ubuntu-2204:current
|
||||||
|
environment:
|
||||||
|
DOCKER_CLI_EXPERIMENTAL: enabled
|
||||||
|
DOCKER_BUILDKIT: 1
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
command: docker compose build comicfn2dict-builder
|
||||||
|
name: Build Builder
|
||||||
|
- run:
|
||||||
|
command: ./bin/docker-compose-exit.sh comicfn2dict-lint
|
||||||
|
name: comicfn2dict Lint
|
||||||
|
- run:
|
||||||
|
command: ./bin/docker-compose-exit.sh comicfn2dict-test
|
||||||
|
name: comicfn2dict Test
|
||||||
|
- store_test_results:
|
||||||
|
path: test-results/pytest
|
||||||
|
- store_artifacts:
|
||||||
|
path: test-results/coverage
|
||||||
|
- run:
|
||||||
|
command: ./bin/docker-compose-exit.sh comicfn2dict-build
|
||||||
|
name: Build comicfn2dict Dist
|
||||||
|
- persist_to_workspace:
|
||||||
|
paths:
|
||||||
|
- ./README.md
|
||||||
|
- ./bin
|
||||||
|
- ./dist
|
||||||
|
- ./pyproject.toml
|
||||||
|
root: .
|
||||||
|
deploy:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.12.1
|
||||||
|
steps:
|
||||||
|
- attach_workspace:
|
||||||
|
at: .
|
||||||
|
- run:
|
||||||
|
command: ./bin/publish-pypi.sh
|
||||||
|
version: 2.1
|
||||||
|
workflows:
|
||||||
|
main:
|
||||||
|
jobs:
|
||||||
|
- build:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- develop
|
||||||
|
- pre-release
|
||||||
|
- main
|
||||||
|
- deploy:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- pre-release
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- build
|
||||||
|
version: 2.1
|
13
.eslintignore
Normal file
13
.eslintignore
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
!.circleci
|
||||||
|
**/__pycache__
|
||||||
|
*test-results*
|
||||||
|
*~
|
||||||
|
.git
|
||||||
|
.mypy_cache
|
||||||
|
.pytest_cache
|
||||||
|
.ruff_cache
|
||||||
|
.venv
|
||||||
|
dist
|
||||||
|
node_modules
|
||||||
|
package-lock.json
|
||||||
|
typings
|
101
.eslintrc.cjs
101
.eslintrc.cjs
@ -1,101 +0,0 @@
|
|||||||
module.exports = {
|
|
||||||
root: true,
|
|
||||||
env: {
|
|
||||||
browser: true,
|
|
||||||
es2022: true,
|
|
||||||
node: true,
|
|
||||||
},
|
|
||||||
extends: [
|
|
||||||
"eslint:recommended",
|
|
||||||
// LANGS
|
|
||||||
"plugin:json/recommended",
|
|
||||||
"plugin:mdx/recommended",
|
|
||||||
"plugin:yaml/recommended",
|
|
||||||
// CODE QUALITY
|
|
||||||
"plugin:sonarjs/recommended",
|
|
||||||
"plugin:unicorn/all",
|
|
||||||
// PRACTICES
|
|
||||||
"plugin:array-func/recommended",
|
|
||||||
"plugin:eslint-comments/recommended",
|
|
||||||
"plugin:no-use-extend-native/recommended",
|
|
||||||
"plugin:optimize-regex/all",
|
|
||||||
"plugin:promise/recommended",
|
|
||||||
"plugin:import/recommended",
|
|
||||||
"plugin:switch-case/recommended",
|
|
||||||
// PRETTIER
|
|
||||||
"plugin:prettier/recommended",
|
|
||||||
"prettier", // prettier-config
|
|
||||||
// SECURITY
|
|
||||||
"plugin:no-unsanitized/DOM",
|
|
||||||
"plugin:security/recommended-legacy",
|
|
||||||
],
|
|
||||||
overrides: [
|
|
||||||
{
|
|
||||||
files: ["*.md"],
|
|
||||||
rules: {
|
|
||||||
"prettier/prettier": ["warn", { parser: "markdown" }],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
],
|
|
||||||
parserOptions: {
|
|
||||||
ecmaVersion: "latest",
|
|
||||||
ecmaFeatures: {
|
|
||||||
impliedStrict: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
plugins: [
|
|
||||||
"array-func",
|
|
||||||
"eslint-comments",
|
|
||||||
"json",
|
|
||||||
"import",
|
|
||||||
"no-constructor-bind",
|
|
||||||
"no-secrets",
|
|
||||||
"no-unsanitized",
|
|
||||||
"no-use-extend-native",
|
|
||||||
"optimize-regex",
|
|
||||||
"prettier",
|
|
||||||
"promise",
|
|
||||||
"simple-import-sort",
|
|
||||||
"switch-case",
|
|
||||||
"security",
|
|
||||||
"sonarjs",
|
|
||||||
"unicorn",
|
|
||||||
"yaml",
|
|
||||||
],
|
|
||||||
rules: {
|
|
||||||
"array-func/prefer-array-from": "off", // for modern browsers the spread operator, as preferred by unicorn, works fine.
|
|
||||||
"max-params": ["warn", 4],
|
|
||||||
"no-console": process.env.NODE_ENV === "production" ? "warn" : "off",
|
|
||||||
"no-debugger": process.env.NODE_ENV === "production" ? "warn" : "off",
|
|
||||||
"no-constructor-bind/no-constructor-bind": "error",
|
|
||||||
"no-constructor-bind/no-constructor-state": "error",
|
|
||||||
"no-secrets/no-secrets": "error",
|
|
||||||
"eslint-comments/no-unused-disable": 1,
|
|
||||||
"prettier/prettier": "warn",
|
|
||||||
"security/detect-object-injection": "off",
|
|
||||||
"simple-import-sort/exports": "warn",
|
|
||||||
"simple-import-sort/imports": "warn",
|
|
||||||
"space-before-function-paren": "off",
|
|
||||||
"switch-case/newline-between-switch-case": "off", // Malfunctioning
|
|
||||||
"unicorn/switch-case-braces": ["warn", "avoid"],
|
|
||||||
"unicorn/prefer-node-protocol": 0,
|
|
||||||
"unicorn/prevent-abbreviations": "off",
|
|
||||||
"unicorn/filename-case": [
|
|
||||||
"error",
|
|
||||||
{ case: "kebabCase", ignore: [".*.md"] },
|
|
||||||
],
|
|
||||||
},
|
|
||||||
ignorePatterns: [
|
|
||||||
"*~",
|
|
||||||
"**/__pycache__",
|
|
||||||
".git",
|
|
||||||
"!.circleci",
|
|
||||||
".mypy_cache",
|
|
||||||
".pytest_cache",
|
|
||||||
".venv*",
|
|
||||||
"dist",
|
|
||||||
"package-lock.json",
|
|
||||||
"test-results",
|
|
||||||
"typings",
|
|
||||||
],
|
|
||||||
};
|
|
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
FROM python:3.12.1-bookworm
|
||||||
|
LABEL maintainer="AJ Slater <aj@slater.net>"
|
||||||
|
|
||||||
|
COPY debian.sources /etc/apt/sources.list.d/
|
||||||
|
# hadolint ignore=DL3008
|
||||||
|
RUN apt-get clean \
|
||||||
|
&& apt-get update \
|
||||||
|
&& apt-get install --no-install-recommends -y \
|
||||||
|
bash \
|
||||||
|
npm \
|
||||||
|
&& apt-get clean \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY bin ./bin
|
||||||
|
COPY package.json package-lock.json pyproject.toml poetry.lock Makefile ./
|
||||||
|
RUN make install-all
|
||||||
|
|
||||||
|
COPY . .
|
6
Makefile
6
Makefile
@ -1,28 +1,28 @@
|
|||||||
.PHONY: install-deps
|
.PHONY: install-deps
|
||||||
## Update pip and install poetry
|
## Update pip and install poetry
|
||||||
|
## @category Install
|
||||||
|
install-deps:
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install --upgrade poetry
|
pip install --upgrade poetry
|
||||||
|
npm install
|
||||||
|
|
||||||
.PHONY: install
|
.PHONY: install
|
||||||
## Install for production
|
## Install for production
|
||||||
## @category Install
|
## @category Install
|
||||||
install-prod: install-deps
|
install-prod: install-deps
|
||||||
poetry install --no-root --only-root
|
poetry install --no-root --only-root
|
||||||
npm install
|
|
||||||
|
|
||||||
.PHONY: install-dev
|
.PHONY: install-dev
|
||||||
## Install dev requirements
|
## Install dev requirements
|
||||||
## @category Install
|
## @category Install
|
||||||
install-dev: install-deps
|
install-dev: install-deps
|
||||||
poetry install --no-root --only-root --with dev
|
poetry install --no-root --only-root --with dev
|
||||||
npm install
|
|
||||||
|
|
||||||
.PHONY: install-all
|
.PHONY: install-all
|
||||||
## Install with all extras
|
## Install with all extras
|
||||||
## @category Install
|
## @category Install
|
||||||
install-all: install-deps
|
install-all: install-deps
|
||||||
poetry install --no-root --all-extras
|
poetry install --no-root --all-extras
|
||||||
npm install
|
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
## Clean pycaches
|
## Clean pycaches
|
||||||
|
19
NEWS.md
19
NEWS.md
@ -1,5 +1,24 @@
|
|||||||
# 📰 comicfn2dict News
|
# 📰 comicfn2dict News
|
||||||
|
|
||||||
|
## v0.2.0
|
||||||
|
|
||||||
|
- Titles are now parsed only if they occur after the series token AND after
|
||||||
|
either issue, year or volume.
|
||||||
|
- A more sophisticated date parser.
|
||||||
|
- Issue numbers that lead with a '#' character may start with alphabetical
|
||||||
|
characters.
|
||||||
|
- If volume is parsed, but issue number is not, the issue number is copied from
|
||||||
|
the volume number.
|
||||||
|
- ComicFilenameParser and ComicFilenameSerializer classes are available as well
|
||||||
|
as the old function API.
|
||||||
|
- New test cases thanks to @lordwelch & @bpepple
|
||||||
|
- Titles must come after series and one other token, but before format and scan
|
||||||
|
info.
|
||||||
|
|
||||||
|
## v0.1.4
|
||||||
|
|
||||||
|
- Require Python 3.10
|
||||||
|
|
||||||
## v0.1.3
|
## v0.1.3
|
||||||
|
|
||||||
- Fix README
|
- Fix README
|
||||||
|
16
README.md
16
README.md
@ -4,16 +4,30 @@ An API and CLI for extracting structured comic metadata from filenames.
|
|||||||
|
|
||||||
## Install
|
## Install
|
||||||
|
|
||||||
|
<!-- eslint-skip -->
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
pip install comicfn2dict
|
pip install comicfn2dict
|
||||||
```
|
```
|
||||||
|
|
||||||
## API
|
## API
|
||||||
|
|
||||||
look at `comicfn2dict/comicfn2dict.py`
|
<!-- eslint-skip -->
|
||||||
|
|
||||||
|
```python
|
||||||
|
from comicfn2dict import comicfn2dict, dict2comicfn
|
||||||
|
|
||||||
|
path = "Comic Series #001 Title (2024).cbz"
|
||||||
|
|
||||||
|
metadata: dict[str, str| tuple[str,...]] = comicfn2dict(path, verbose=0)
|
||||||
|
|
||||||
|
filename: str = dict2comicfn(metadata, bool=True, verbose=0)
|
||||||
|
```
|
||||||
|
|
||||||
## CLI
|
## CLI
|
||||||
|
|
||||||
|
<!-- eslint-skip -->
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
comicfn2dict "Series Name #01 - Title (2023).cbz"
|
comicfn2dict "Series Name #01 - Title (2023).cbz"
|
||||||
{'ext': 'cbz',
|
{'ext': 'cbz',
|
||||||
|
6
bin/docker-compose-exit.sh
Executable file
6
bin/docker-compose-exit.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Run a docker compose service and return its exit code
|
||||||
|
set -euo pipefail
|
||||||
|
SERVICE=$1
|
||||||
|
# docker compose without the dash doesn't have the exit-code-from param
|
||||||
|
docker compose up --exit-code-from "$SERVICE" "$SERVICE"
|
7
bin/publish-pypi.sh
Executable file
7
bin/publish-pypi.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Publish the created package
|
||||||
|
set -euo pipefail
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
pip3 install --upgrade pip
|
||||||
|
pip3 install --upgrade poetry
|
||||||
|
poetry publish -u "$PYPI_USER" -p "$PYPI_PASS"
|
@ -1,3 +1,3 @@
|
|||||||
"""Comic Filename to Dict parser and unparser."""
|
"""Comic Filename to Dict parser and unparser."""
|
||||||
from .parse import comicfn2dict # noqa: F401
|
from .parse import ComicFilenameParser, comicfn2dict # noqa: F401
|
||||||
from .unparse import dict2comicfn # noqa: F401
|
from .unparse import ComicFilenameSerializer, dict2comicfn # noqa: F401
|
||||||
|
@ -4,17 +4,27 @@ from argparse import ArgumentParser
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
from comicfn2dict.parse import comicfn2dict
|
from comicfn2dict.parse import ComicFilenameParser
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
"""Test parser."""
|
"""Test parser."""
|
||||||
description = "Comic book archive read/write tool."
|
description = "Comic book archive read/write tool."
|
||||||
parser = ArgumentParser(description=description)
|
parser = ArgumentParser(description=description)
|
||||||
parser.add_argument("path", help="Path of comic filename to parse", type=Path)
|
parser.add_argument("path", help="Path of comic filename to parse", type=Path)
|
||||||
|
parser.add_argument(
|
||||||
|
"-v",
|
||||||
|
"--verbose",
|
||||||
|
default=0,
|
||||||
|
action="count",
|
||||||
|
help="Display intermediate parsing steps. Good for debugging.",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
name = args.path.name
|
name = args.path.name
|
||||||
metadata = comicfn2dict(name)
|
cfnparser = ComicFilenameParser(name, verbose=args.verbose)
|
||||||
|
metadata = cfnparser.parse()
|
||||||
|
if args.verbose:
|
||||||
|
print("=" * 80) # noqa:T201
|
||||||
pprint(metadata) # noqa:T203
|
pprint(metadata) # noqa:T203
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
"""API import source."""
|
|
||||||
from comicfn2dict.parse import comicfn2dict # noqa: F401
|
|
||||||
from comicfn2dict.unparse import dict2comicfn # noqa: F401
|
|
9
comicfn2dict/log.py
Normal file
9
comicfn2dict/log.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
"""Print log header."""
|
||||||
|
|
||||||
|
|
||||||
|
def print_log_header(label: str) -> None:
|
||||||
|
"""Print log header."""
|
||||||
|
prefix = "-" * 3 + label
|
||||||
|
suffix_len = 80 - len(prefix)
|
||||||
|
suffix = "-" * suffix_len
|
||||||
|
print(prefix + suffix) # noqa: T201
|
@ -1,226 +1,364 @@
|
|||||||
"""Parse comic book archive names using the simple 'parse' parser."""
|
"""Parse comic book archive names using the simple 'parse' parser."""
|
||||||
import re
|
from calendar import month_abbr
|
||||||
|
from copy import copy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
from pprint import pformat
|
||||||
|
from re import Match, Pattern
|
||||||
|
from sys import maxsize
|
||||||
|
|
||||||
|
from comicfn2dict.log import print_log_header
|
||||||
from comicfn2dict.regex import (
|
from comicfn2dict.regex import (
|
||||||
DASH_SPLIT_RE,
|
ALPHA_MONTH_RANGE_RE,
|
||||||
EXTRA_SPACES_RE,
|
BOOK_VOLUME_RE,
|
||||||
ISSUE_ANYWHERE_RE,
|
|
||||||
ISSUE_BEGIN_RE,
|
ISSUE_BEGIN_RE,
|
||||||
ISSUE_COUNT_RE,
|
|
||||||
ISSUE_END_RE,
|
ISSUE_END_RE,
|
||||||
ISSUE_NUMBER_RE,
|
ISSUE_NUMBER_RE,
|
||||||
ISSUE_TOKEN_RE,
|
ISSUE_WITH_COUNT_RE,
|
||||||
NON_SPACE_DIVIDER_RE,
|
MONTH_FIRST_DATE_RE,
|
||||||
ORIGINAL_FORMAT_RE,
|
NON_NUMBER_DOT_RE,
|
||||||
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||||
|
PUBLISHER_AMBIGUOUS_RE,
|
||||||
|
PUBLISHER_AMBIGUOUS_TOKEN_RE,
|
||||||
|
PUBLISHER_UNAMBIGUOUS_RE,
|
||||||
|
PUBLISHER_UNAMBIGUOUS_TOKEN_RE,
|
||||||
|
REGEX_SUBS,
|
||||||
REMAINING_GROUP_RE,
|
REMAINING_GROUP_RE,
|
||||||
SCAN_INFO_RE,
|
SCAN_INFO_SECONDARY_RE,
|
||||||
|
TOKEN_DELIMETER,
|
||||||
VOLUME_RE,
|
VOLUME_RE,
|
||||||
YEAR_BEGIN_RE,
|
VOLUME_WITH_COUNT_RE,
|
||||||
YEAR_END_RE,
|
YEAR_END_RE,
|
||||||
|
YEAR_FIRST_DATE_RE,
|
||||||
YEAR_TOKEN_RE,
|
YEAR_TOKEN_RE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_DATE_KEYS = frozenset({"year", "month", "day"})
|
||||||
_REMAINING_GROUP_KEYS = ("series", "title")
|
_REMAINING_GROUP_KEYS = ("series", "title")
|
||||||
|
# Ordered by commonness.
|
||||||
|
_TITLE_PRECEDING_KEYS = ("issue", "year", "volume", "month")
|
||||||
|
|
||||||
|
|
||||||
def _parse_ext(name, suffix, metadata):
|
class ComicFilenameParser:
|
||||||
"""Pop the extension from the pathname."""
|
"""Parse a filename metadata into a dict."""
|
||||||
data = name.removesuffix(suffix)
|
|
||||||
ext = suffix.lstrip(".")
|
|
||||||
if ext:
|
|
||||||
metadata["ext"] = ext
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
def path_index(self, key: str, default: int = -1) -> int:
|
||||||
|
"""Lazily retrieve and memoize the key's location in the path."""
|
||||||
|
if key == "remainders":
|
||||||
|
return default
|
||||||
|
value: str = self.metadata.get(key, "") # type: ignore
|
||||||
|
if not value:
|
||||||
|
return default
|
||||||
|
if value not in self._path_indexes:
|
||||||
|
# XXX This is fragile, but it's difficult to calculate the original
|
||||||
|
# position at match time from the ever changing _unparsed_path.
|
||||||
|
index = self.path.rfind(value) if key == "ext" else self.path.find(value)
|
||||||
|
self._path_indexes[value] = index
|
||||||
|
return self._path_indexes[value]
|
||||||
|
|
||||||
def _clean_dividers(data):
|
def _log(self, label: str) -> None:
|
||||||
"""Replace non space dividers and clean extra spaces out of string."""
|
if not self._debug:
|
||||||
data = NON_SPACE_DIVIDER_RE.sub(" ", data)
|
return
|
||||||
return EXTRA_SPACES_RE.sub(" ", data)
|
print_log_header(label)
|
||||||
|
combined = {}
|
||||||
|
for key in self.metadata:
|
||||||
|
combined[key] = (self.metadata.get(key), self.path_index(key))
|
||||||
|
print(" " + self._unparsed_path) # noqa: T201
|
||||||
|
print(" " + pformat(combined)) # noqa: T201
|
||||||
|
|
||||||
|
def _parse_ext(self) -> None:
|
||||||
|
"""Pop the extension from the pathname."""
|
||||||
|
path = Path(self._unparsed_path)
|
||||||
|
suffix = path.suffix
|
||||||
|
if not suffix:
|
||||||
|
return
|
||||||
|
|
||||||
def _get_data_list(path, metadata):
|
data = path.name.removesuffix(suffix)
|
||||||
"""Prepare data list from a path or string."""
|
ext = suffix.lstrip(".")
|
||||||
if isinstance(path, str):
|
self.metadata["ext"] = ext
|
||||||
path = path.strip()
|
self._unparsed_path = data
|
||||||
path = Path(path)
|
|
||||||
data = _parse_ext(path.name, path.suffix, metadata)
|
|
||||||
data = _clean_dividers(data)
|
|
||||||
return DASH_SPLIT_RE.split(data)
|
|
||||||
|
|
||||||
|
def _clean_dividers(self) -> None:
|
||||||
|
"""Replace non space dividers and clean extra spaces out of string."""
|
||||||
|
data = self._unparsed_path
|
||||||
|
|
||||||
def _paren_strip(value: str):
|
# Simple substitutions
|
||||||
"""Strip spaces and parens."""
|
for regex, pair in REGEX_SUBS.items():
|
||||||
return value.strip().strip("()").strip()
|
replacement, count = pair
|
||||||
|
data = regex.sub(replacement, data, count=count).strip()
|
||||||
|
self._unparsed_path = data.strip()
|
||||||
|
self._log("After Clean Path")
|
||||||
|
|
||||||
|
def _parse_items_update_metadata(
|
||||||
|
self, matches: Match, exclude: str, require_all: bool, first_only: bool
|
||||||
|
) -> bool:
|
||||||
|
"""Update Metadata."""
|
||||||
|
matched_metadata = {}
|
||||||
|
for key, value in matches.groupdict().items():
|
||||||
|
if value == exclude:
|
||||||
|
continue
|
||||||
|
if not value:
|
||||||
|
if require_all:
|
||||||
|
return False
|
||||||
|
continue
|
||||||
|
matched_metadata[key] = value
|
||||||
|
if first_only:
|
||||||
|
break
|
||||||
|
if not matched_metadata:
|
||||||
|
return False
|
||||||
|
self.metadata.update(matched_metadata)
|
||||||
|
return True
|
||||||
|
|
||||||
def _splicey_dicey(data_list, index, match, match_group: Union[int, str] = 0):
|
def _parse_items_pop_tokens(self, regex: Pattern, first_only: bool) -> None:
|
||||||
"""Replace a string token from a list with two strings and the value removed.
|
"""Pop tokens from unparsed path."""
|
||||||
|
count = 1 if first_only else 0
|
||||||
|
marked_str = regex.sub(TOKEN_DELIMETER, self._unparsed_path, count=count)
|
||||||
|
parts = []
|
||||||
|
for part in marked_str.split(TOKEN_DELIMETER):
|
||||||
|
if token := part.strip():
|
||||||
|
parts.append(token)
|
||||||
|
self._unparsed_path = TOKEN_DELIMETER.join(parts)
|
||||||
|
|
||||||
And return the value.
|
def _parse_items( # noqa: PLR0913
|
||||||
"""
|
self,
|
||||||
value = match.group(match_group)
|
regex: Pattern,
|
||||||
data = data_list.pop(index)
|
require_all: bool = False,
|
||||||
data_ends = []
|
exclude: str = "",
|
||||||
if data_before := data[: match.start()].strip():
|
first_only: bool = False,
|
||||||
data_ends.append(data_before)
|
pop: bool = True,
|
||||||
if data_after := data[match.end() :].strip():
|
) -> None:
|
||||||
data_ends.append(data_after)
|
"""Parse a value from the data list into metadata and alter the data list."""
|
||||||
data_list[index:index] = data_ends
|
# Match
|
||||||
return _paren_strip(value)
|
matches = regex.search(self._unparsed_path)
|
||||||
|
if not matches:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self._parse_items_update_metadata(
|
||||||
|
matches, exclude, require_all, first_only
|
||||||
|
):
|
||||||
|
return
|
||||||
|
|
||||||
def _parse_original_format_and_scan_info(data_list, metadata):
|
if pop:
|
||||||
"""Parse (ORIGINAL_FORMAT-SCAN_INFO)."""
|
self._parse_items_pop_tokens(regex, first_only)
|
||||||
original_format = None
|
|
||||||
scan_info = None
|
|
||||||
index = 0
|
|
||||||
match = None
|
|
||||||
for data in data_list:
|
|
||||||
match = ORIGINAL_FORMAT_SCAN_INFO_RE.search(data)
|
|
||||||
if match:
|
|
||||||
original_format = match.group("original_format")
|
|
||||||
try:
|
|
||||||
scan_info = match.group("scan_info")
|
|
||||||
except IndexError:
|
|
||||||
scan_info = None
|
|
||||||
break
|
|
||||||
index += 1
|
|
||||||
if original_format:
|
|
||||||
metadata["original_format"] = _paren_strip(original_format)
|
|
||||||
match_group = 1
|
|
||||||
if scan_info:
|
|
||||||
metadata["scan_info"] = _paren_strip(scan_info)
|
|
||||||
match_group = 0
|
|
||||||
_splicey_dicey(data_list, index, match, match_group=match_group)
|
|
||||||
else:
|
|
||||||
index = 0
|
|
||||||
return index
|
|
||||||
|
|
||||||
|
def _parse_issue(self) -> None:
|
||||||
|
"""Parse Issue."""
|
||||||
|
self._parse_items(ISSUE_NUMBER_RE)
|
||||||
|
if "issue" not in self.metadata:
|
||||||
|
self._parse_items(ISSUE_WITH_COUNT_RE)
|
||||||
|
self._log("After Issue")
|
||||||
|
|
||||||
def _pop_value_from_token(
|
def _parse_volume(self) -> None:
|
||||||
data_list: list,
|
"""Parse Volume."""
|
||||||
metadata: dict,
|
self._parse_items(VOLUME_RE)
|
||||||
regex: re.Pattern,
|
if "volume" not in self.metadata:
|
||||||
key: str,
|
self._parse_items(VOLUME_WITH_COUNT_RE)
|
||||||
index: int = 0,
|
self._log("After Volume")
|
||||||
):
|
|
||||||
"""Search token for value, splice and assign to metadata."""
|
|
||||||
data = data_list[index]
|
|
||||||
match = regex.search(data)
|
|
||||||
if match:
|
|
||||||
value = _splicey_dicey(data_list, index, match, key)
|
|
||||||
metadata[key] = value
|
|
||||||
return match
|
|
||||||
|
|
||||||
|
def _alpha_month_to_numeric(self) -> None:
|
||||||
|
"""Translate alpha_month to numeric month."""
|
||||||
|
if alpha_month := self.metadata.pop("alpha_month", ""):
|
||||||
|
alpha_month = alpha_month.capitalize() # type: ignore
|
||||||
|
for index, abbr in enumerate(month_abbr):
|
||||||
|
if abbr and alpha_month.startswith(abbr):
|
||||||
|
month = f"{index:02d}"
|
||||||
|
self.metadata["month"] = month
|
||||||
|
break
|
||||||
|
|
||||||
def _parse_item(
|
def _parse_dates(self) -> None:
|
||||||
data_list,
|
"""Parse date schemes."""
|
||||||
metadata,
|
# Discard second month of alpha month ranges.
|
||||||
regex,
|
self._unparsed_path = ALPHA_MONTH_RANGE_RE.sub(r"\1", self._unparsed_path)
|
||||||
key,
|
|
||||||
start_index: int = 0,
|
|
||||||
):
|
|
||||||
"""Parse a value from the data list into metadata and alter the data list."""
|
|
||||||
index = start_index
|
|
||||||
dl_len = end_index = len(data_list)
|
|
||||||
if index >= end_index:
|
|
||||||
index = 0
|
|
||||||
while index < end_index:
|
|
||||||
match = _pop_value_from_token(data_list, metadata, regex, key, index)
|
|
||||||
if match:
|
|
||||||
break
|
|
||||||
index += 1
|
|
||||||
if index > dl_len and start_index > 0:
|
|
||||||
index = 0
|
|
||||||
end_index = start_index
|
|
||||||
return index
|
|
||||||
|
|
||||||
|
# Month first date
|
||||||
|
self._parse_items(MONTH_FIRST_DATE_RE)
|
||||||
|
self._alpha_month_to_numeric()
|
||||||
|
|
||||||
def _pop_issue_from_text_fields(data_list, metadata, index):
|
# Year first date
|
||||||
"""Search issue from ends of text fields."""
|
if _DATE_KEYS - self.metadata.keys():
|
||||||
if "issue" not in metadata:
|
self._parse_items(YEAR_FIRST_DATE_RE)
|
||||||
_pop_value_from_token(data_list, metadata, ISSUE_END_RE, "issue", index=index)
|
self._alpha_month_to_numeric()
|
||||||
if "issue" not in metadata:
|
|
||||||
_pop_value_from_token(data_list, metadata, ISSUE_BEGIN_RE, "issue", index=index)
|
|
||||||
return data_list.pop(index)
|
|
||||||
|
|
||||||
|
if "year" not in self.metadata:
|
||||||
|
self._parse_items(YEAR_TOKEN_RE, first_only=True)
|
||||||
|
if "volume" in self.metadata:
|
||||||
|
return
|
||||||
|
# A second year will be the real year.
|
||||||
|
# Move the first year to volume
|
||||||
|
if volume := self.metadata.get("year", ""):
|
||||||
|
self._parse_items(YEAR_TOKEN_RE)
|
||||||
|
if self.metadata.get("year", "") != volume:
|
||||||
|
self.metadata["volume"] = volume
|
||||||
|
self._log("After Date")
|
||||||
|
|
||||||
def _assign_remaining_groups(data_list, metadata):
|
def _parse_format_and_scan_info(self) -> None:
|
||||||
"""Assign series and title."""
|
"""Format & Scan Info."""
|
||||||
index = 0
|
self._parse_items(
|
||||||
for key in _REMAINING_GROUP_KEYS:
|
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||||
try:
|
require_all=True,
|
||||||
data = data_list[index]
|
|
||||||
except (IndexError, TypeError):
|
|
||||||
break
|
|
||||||
match = REMAINING_GROUP_RE.search(data) if data else None
|
|
||||||
if match:
|
|
||||||
value = _pop_issue_from_text_fields(data_list, metadata, index)
|
|
||||||
value = _paren_strip(value)
|
|
||||||
if value:
|
|
||||||
metadata[key] = value
|
|
||||||
else:
|
|
||||||
index += 1
|
|
||||||
|
|
||||||
|
|
||||||
def _pickup_issue(remainders, metadata):
|
|
||||||
"""Get issue from remaining tokens or anywhere in a pinch."""
|
|
||||||
if "issue" in metadata:
|
|
||||||
return
|
|
||||||
_parse_item(remainders, metadata, ISSUE_TOKEN_RE, "issue")
|
|
||||||
if "issue" in metadata:
|
|
||||||
return
|
|
||||||
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
|
|
||||||
|
|
||||||
|
|
||||||
def comicfn2dict(path):
|
|
||||||
"""Parse the filename with a hierarchy of regexes."""
|
|
||||||
metadata = {}
|
|
||||||
data_list = _get_data_list(path, metadata)
|
|
||||||
|
|
||||||
# Parse paren tokens
|
|
||||||
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count")
|
|
||||||
_parse_item(data_list, metadata, YEAR_TOKEN_RE, "year")
|
|
||||||
of_index = _parse_original_format_and_scan_info(data_list, metadata)
|
|
||||||
if "original_format" not in metadata:
|
|
||||||
of_index = _parse_item(
|
|
||||||
data_list, metadata, ORIGINAL_FORMAT_RE, "original_format"
|
|
||||||
)
|
|
||||||
if "scan_info" not in metadata:
|
|
||||||
# Start searching for scan_info after original format.
|
|
||||||
_parse_item(
|
|
||||||
data_list,
|
|
||||||
metadata,
|
|
||||||
SCAN_INFO_RE,
|
|
||||||
"scan_info",
|
|
||||||
start_index=of_index + 1,
|
|
||||||
)
|
)
|
||||||
|
if "original_format" not in self.metadata:
|
||||||
|
self._parse_items(
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||||
|
)
|
||||||
|
self._parse_items(SCAN_INFO_SECONDARY_RE)
|
||||||
|
if (
|
||||||
|
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
|
||||||
|
) and "scan_info" not in self.metadata:
|
||||||
|
self.metadata["scan_info"] = scan_info_secondary # type: ignore
|
||||||
|
self._log("After original_format & scan_info")
|
||||||
|
|
||||||
# Parse regular tokens
|
def _parse_ends_of_remaining_tokens(self):
|
||||||
_parse_item(data_list, metadata, VOLUME_RE, "volume")
|
# Volume left on the end of string tokens
|
||||||
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue")
|
if "volume" not in self.metadata:
|
||||||
|
self._parse_items(BOOK_VOLUME_RE)
|
||||||
|
self._log("After original_format & scan_info")
|
||||||
|
|
||||||
# Pickup year if not gotten.
|
# Years left on the end of string tokens
|
||||||
if "year" not in metadata:
|
year_end_matched = False
|
||||||
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year")
|
if "year" not in self.metadata:
|
||||||
if "year" not in metadata:
|
self._parse_items(YEAR_END_RE, pop=False)
|
||||||
_parse_item(data_list, metadata, YEAR_END_RE, "year")
|
year_end_matched = "year" in self.metadata
|
||||||
|
self._log("After Year on end of token")
|
||||||
|
|
||||||
# Pickup issue if it's a standalone token
|
# Issue left on the end of string tokens
|
||||||
if "issue" not in metadata:
|
if "issue" not in self.metadata and not year_end_matched:
|
||||||
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue")
|
exclude: str = self.metadata.get("year", "") # type: ignore
|
||||||
|
self._parse_items(ISSUE_END_RE, exclude=exclude)
|
||||||
|
if "issue" not in self.metadata:
|
||||||
|
self._parse_items(ISSUE_BEGIN_RE)
|
||||||
|
self._log("After Issue on ends of tokens")
|
||||||
|
|
||||||
# Series and Title. Also looks for issue.
|
def _parse_publisher(self) -> None:
|
||||||
_assign_remaining_groups(data_list, metadata)
|
"""Parse Publisher."""
|
||||||
|
# Pop single tokens so they don't end up titles.
|
||||||
|
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
|
||||||
|
self._log("After publisher")
|
||||||
|
|
||||||
# Final try for issue number.
|
def _is_at_title_position(self, value: str) -> bool:
|
||||||
_pickup_issue(data_list, metadata)
|
"""Title is in correct position."""
|
||||||
|
title_index = self.path.find(value)
|
||||||
|
|
||||||
# Add Remainders
|
# Titles must come after series but before format and scan_info
|
||||||
if data_list:
|
if (
|
||||||
metadata["remainders"] = tuple(data_list)
|
title_index < self.path_index("series")
|
||||||
|
or title_index > self.path_index("original_format", maxsize)
|
||||||
|
or title_index > self.path_index("scan_info", maxsize)
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
return metadata
|
# Titles must be after the series and one other token.
|
||||||
|
title_ok = False
|
||||||
|
other_tokens_exist = False
|
||||||
|
for preceding_key in _TITLE_PRECEDING_KEYS:
|
||||||
|
other_tokens_exist = True
|
||||||
|
if title_index > self.path_index(preceding_key):
|
||||||
|
title_ok = True
|
||||||
|
break
|
||||||
|
return title_ok or not other_tokens_exist
|
||||||
|
|
||||||
|
def _grouping_operators_strip(self, value: str) -> str:
|
||||||
|
"""Strip spaces and parens."""
|
||||||
|
value = value.strip()
|
||||||
|
value = value.strip("()").strip()
|
||||||
|
value = value.strip("-").strip()
|
||||||
|
value = value.strip(",").strip()
|
||||||
|
value = value.strip("'").strip()
|
||||||
|
return value.strip('"').strip()
|
||||||
|
|
||||||
|
def _parse_series_and_title_token(
|
||||||
|
self, remaining_key_index: int, tokens: list[str]
|
||||||
|
) -> str:
|
||||||
|
"""Parse one series or title token."""
|
||||||
|
key = _REMAINING_GROUP_KEYS[remaining_key_index]
|
||||||
|
if key in self.metadata:
|
||||||
|
return ""
|
||||||
|
token = tokens.pop(0)
|
||||||
|
match = REMAINING_GROUP_RE.search(token)
|
||||||
|
if not match:
|
||||||
|
return token
|
||||||
|
value = match.group()
|
||||||
|
if key == "title" and not self._is_at_title_position(value):
|
||||||
|
return token
|
||||||
|
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
|
||||||
|
value = self._grouping_operators_strip(value)
|
||||||
|
if value:
|
||||||
|
self.metadata[key] = value
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _parse_series_and_title(self) -> None:
|
||||||
|
"""Assign series and title."""
|
||||||
|
if not self._unparsed_path:
|
||||||
|
return
|
||||||
|
|
||||||
|
remaining_key_index = 0
|
||||||
|
unused_tokens = []
|
||||||
|
tokens = self._unparsed_path.split(TOKEN_DELIMETER)
|
||||||
|
while tokens and remaining_key_index < len(_REMAINING_GROUP_KEYS):
|
||||||
|
unused_token = self._parse_series_and_title_token(
|
||||||
|
remaining_key_index, tokens
|
||||||
|
)
|
||||||
|
if unused_token:
|
||||||
|
unused_tokens.append(unused_token)
|
||||||
|
remaining_key_index += 1
|
||||||
|
|
||||||
|
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
|
||||||
|
self._log("After Series & Title")
|
||||||
|
|
||||||
|
def _add_remainders(self) -> None:
|
||||||
|
"""Add Remainders."""
|
||||||
|
remainders = []
|
||||||
|
for token in self._unparsed_path.split(TOKEN_DELIMETER):
|
||||||
|
if remainder := token.strip():
|
||||||
|
remainders.append(remainder)
|
||||||
|
|
||||||
|
if remainders:
|
||||||
|
self.metadata["remainders"] = tuple(remainders)
|
||||||
|
|
||||||
|
def parse(self) -> dict[str, str | tuple[str, ...]]:
|
||||||
|
"""Parse the filename with a hierarchy of regexes."""
|
||||||
|
self._log("Init")
|
||||||
|
self._parse_ext()
|
||||||
|
self._clean_dividers()
|
||||||
|
self._parse_issue()
|
||||||
|
self._parse_volume()
|
||||||
|
self._parse_dates()
|
||||||
|
self._parse_format_and_scan_info()
|
||||||
|
self._parse_ends_of_remaining_tokens()
|
||||||
|
self._parse_publisher()
|
||||||
|
self._parse_series_and_title()
|
||||||
|
|
||||||
|
# Copy volume into issue if it's all we have.
|
||||||
|
if "issue" not in self.metadata and "volume" in self.metadata:
|
||||||
|
self.metadata["issue"] = self.metadata["volume"]
|
||||||
|
self._log("After issue can be volume")
|
||||||
|
|
||||||
|
self._add_remainders()
|
||||||
|
|
||||||
|
return self.metadata
|
||||||
|
|
||||||
|
def __init__(self, path: str | Path, verbose: int = 0):
|
||||||
|
"""Initialize."""
|
||||||
|
self._debug: bool = verbose > 0
|
||||||
|
# munge path
|
||||||
|
if isinstance(path, str):
|
||||||
|
path = path.strip()
|
||||||
|
p_path = Path(path)
|
||||||
|
self.path = str(p_path.name).strip()
|
||||||
|
self.metadata: dict[str, str | tuple[str, ...]] = {}
|
||||||
|
self._unparsed_path = copy(self.path)
|
||||||
|
self._path_indexes: dict[str, int] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def comicfn2dict(
|
||||||
|
path: str | Path, verbose: int = 0
|
||||||
|
) -> dict[str, str | tuple[str, ...]]:
|
||||||
|
"""Simplfily the API."""
|
||||||
|
parser = ComicFilenameParser(path, verbose=verbose)
|
||||||
|
return parser.parse()
|
||||||
|
@ -1,15 +1,32 @@
|
|||||||
"""Parsing regexes."""
|
"""Parsing regexes."""
|
||||||
import re
|
from re import IGNORECASE, Pattern, compile
|
||||||
|
from types import MappingProxyType
|
||||||
|
|
||||||
|
PUBLISHERS_UNAMBIGUOUS: tuple[str, ...] = (
|
||||||
|
r"Abrams ComicArts",
|
||||||
|
r"BOOM! Studios",
|
||||||
|
r"DC(\sComics)?",
|
||||||
|
r"Dark Horse Comics",
|
||||||
|
r"Drawn & Quarterly",
|
||||||
|
r"Dynamite Entertainment",
|
||||||
|
r"IDW Publishing",
|
||||||
|
r"Icon Comics",
|
||||||
|
r"Kodansha",
|
||||||
|
r"Oni Press",
|
||||||
|
r"Pantheon Books",
|
||||||
|
r"SLG Publishing",
|
||||||
|
r"SelfMadeHero",
|
||||||
|
r"Titan Comics",
|
||||||
|
)
|
||||||
|
PUBLISHERS_AMBIGUOUS: tuple[str, ...] = (
|
||||||
|
r"(?<!Capt\.\s)(?<!Capt\s)(?<!Captain\s)Marvel",
|
||||||
|
r"Heavy Metal",
|
||||||
|
r"Epic",
|
||||||
|
r"Image",
|
||||||
|
r"Mirage",
|
||||||
|
)
|
||||||
|
|
||||||
def re_compile(exp, parenthify=False):
|
ORIGINAL_FORMAT_PATTERNS: tuple[str, ...] = (
|
||||||
"""Compile regex with options."""
|
|
||||||
if parenthify:
|
|
||||||
exp = r"\(" + exp + r"\)"
|
|
||||||
return re.compile(exp, flags=re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
ORIGINAL_FORMAT_PATTERNS = (
|
|
||||||
r"Anthology",
|
r"Anthology",
|
||||||
r"(One|1)[-\s]Shot",
|
r"(One|1)[-\s]Shot",
|
||||||
r"Annual",
|
r"Annual",
|
||||||
@ -35,41 +52,160 @@ ORIGINAL_FORMAT_PATTERNS = (
|
|||||||
r"Sketch",
|
r"Sketch",
|
||||||
r"TPB",
|
r"TPB",
|
||||||
r"Trade[-\s]Paper[-\s]?Back",
|
r"Trade[-\s]Paper[-\s]?Back",
|
||||||
r"Web([-\s]?Comic)?",
|
r"Web([-\s]?(Comic|Rip))?",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MONTHS: tuple[str, ...] = (
|
||||||
|
r"Jan(uary)?",
|
||||||
|
r"Feb(ruary)?",
|
||||||
|
r"Mar(ch)?",
|
||||||
|
r"Apr(il)?",
|
||||||
|
r"May",
|
||||||
|
r"Jun(e)?",
|
||||||
|
r"Jul(y)?",
|
||||||
|
r"Aug(ust)?",
|
||||||
|
r"Sep(tember)?",
|
||||||
|
r"Oct(ober)?",
|
||||||
|
r"Nov(ember)?",
|
||||||
|
r"Dec(ember)?",
|
||||||
|
)
|
||||||
|
|
||||||
|
TOKEN_DELIMETER: str = r"/"
|
||||||
|
|
||||||
|
|
||||||
|
def re_compile(exp: str, parenthify: bool = False) -> Pattern:
|
||||||
|
"""Compile regex with options."""
|
||||||
|
if parenthify:
|
||||||
|
exp = r"\(" + exp + r"\)"
|
||||||
|
return compile(exp, flags=IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
# CLEAN
|
# CLEAN
|
||||||
NON_SPACE_DIVIDER_RE = re_compile(r"[_\+]")
|
_TOKEN_DIVIDERS_RE = re_compile(r":")
|
||||||
DASH_SPLIT_RE = re_compile(r"\s-\s")
|
_SPACE_EQUIVALENT_RE = re_compile(r"_")
|
||||||
EXTRA_SPACES_RE = re_compile(r"\s\s+")
|
_EXTRA_SPACES_RE = re_compile(r"\s\s+")
|
||||||
|
_LEFT_PAREN_EQUIVALENT_RE = re_compile(r"\[")
|
||||||
|
_RIGHT_PAREN_EQUIVALENT_RE = re_compile(r"\]")
|
||||||
|
_DOUBLE_UNDERSCORE_RE = re_compile(r"__(.*)__")
|
||||||
|
REGEX_SUBS: MappingProxyType[Pattern, tuple[str, int]] = MappingProxyType(
|
||||||
|
{
|
||||||
|
_DOUBLE_UNDERSCORE_RE: (r"(\1)", 0),
|
||||||
|
_TOKEN_DIVIDERS_RE: (TOKEN_DELIMETER, 1),
|
||||||
|
_SPACE_EQUIVALENT_RE: (r" ", 0),
|
||||||
|
_EXTRA_SPACES_RE: (r" ", 0),
|
||||||
|
_LEFT_PAREN_EQUIVALENT_RE: (r"(", 0),
|
||||||
|
_RIGHT_PAREN_EQUIVALENT_RE: (r")", 0),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
### DATES
|
||||||
|
_YEAR_RE_EXP = r"(?P<year>[12]\d{3})"
|
||||||
|
_MONTH_ALPHA_RE_EXP = r"(" + "(?P<alpha_month>" + r"|".join(MONTHS) + r")\.?" r")"
|
||||||
|
_MONTH_NUMERIC_RE_EXP = r"(?P<month>0?\d|1[0-2]?)"
|
||||||
|
_MONTH_RE_EXP = r"(" + _MONTH_ALPHA_RE_EXP + r"|" + _MONTH_NUMERIC_RE_EXP + r")"
|
||||||
|
_ALPHA_MONTH_RANGE = (
|
||||||
|
r"\b" # noqa: ISC003
|
||||||
|
+ r"("
|
||||||
|
+ r"|".join(MONTHS)
|
||||||
|
+ r")"
|
||||||
|
+ r"("
|
||||||
|
+ r"\.?-"
|
||||||
|
+ r"("
|
||||||
|
+ r"|".join(MONTHS)
|
||||||
|
+ r")"
|
||||||
|
+ r")\b"
|
||||||
|
)
|
||||||
|
ALPHA_MONTH_RANGE_RE: Pattern = re_compile(_ALPHA_MONTH_RANGE)
|
||||||
|
|
||||||
|
_DAY_RE_EXP = r"(?P<day>([0-2]?\d|(3)[0-1]))"
|
||||||
|
_DATE_DELIM = r"[-\s]+"
|
||||||
|
_MONTH_FIRST_DATE_RE_EXP = (
|
||||||
|
r"((\b|\(?)"
|
||||||
|
# Month
|
||||||
|
+ _MONTH_RE_EXP
|
||||||
|
# Day
|
||||||
|
+ r"("
|
||||||
|
+ _DATE_DELIM
|
||||||
|
+ _DAY_RE_EXP
|
||||||
|
+ r")?"
|
||||||
|
# Year
|
||||||
|
+ r"[,]?"
|
||||||
|
+ _DATE_DELIM
|
||||||
|
+ _YEAR_RE_EXP
|
||||||
|
+ r"(\)?|\b))"
|
||||||
|
)
|
||||||
|
_YEAR_FIRST_DATE_RE_EXP = (
|
||||||
|
r"(\b\(?"
|
||||||
|
+ _YEAR_RE_EXP
|
||||||
|
+ _DATE_DELIM
|
||||||
|
+ _MONTH_RE_EXP
|
||||||
|
+ _DATE_DELIM
|
||||||
|
+ _DAY_RE_EXP
|
||||||
|
+ r"\b\)?)"
|
||||||
|
)
|
||||||
|
|
||||||
|
MONTH_FIRST_DATE_RE: Pattern = re_compile(_MONTH_FIRST_DATE_RE_EXP)
|
||||||
|
YEAR_FIRST_DATE_RE: Pattern = re_compile(_YEAR_FIRST_DATE_RE_EXP)
|
||||||
|
YEAR_TOKEN_RE: Pattern = re_compile(_YEAR_RE_EXP, parenthify=True)
|
||||||
|
YEAR_END_RE: Pattern = re_compile(_YEAR_RE_EXP + r"\/|$")
|
||||||
|
|
||||||
# PAREN GROUPS
|
# PAREN GROUPS
|
||||||
ISSUE_COUNT_RE = re_compile(r"of\s*(?P<issue_count>\d+)", parenthify=True)
|
|
||||||
_YEAR_RE_EXP = r"(?P<year>[12]\d{3})"
|
|
||||||
YEAR_TOKEN_RE = re_compile(_YEAR_RE_EXP, parenthify=True)
|
|
||||||
YEAR_BEGIN_RE = re_compile(r"^" + _YEAR_RE_EXP + r"\b")
|
|
||||||
YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$")
|
|
||||||
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
|
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
|
||||||
_ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")"
|
_ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")"
|
||||||
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]*)"
|
||||||
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)"
|
|
||||||
SCAN_INFO_RE = re_compile(_SCAN_INFO_RE_EXP, parenthify=True)
|
|
||||||
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
|
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
|
||||||
_ORIGINAL_FORMAT_RE_EXP + r"(?:-" + _SCAN_INFO_RE_EXP + r")?"
|
_ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
|
||||||
)
|
)
|
||||||
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
|
# Keep this even though comicfn2dict doesn't use it directly
|
||||||
|
ORIGINAL_FORMAT_RE: Pattern = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_RE: Pattern = re_compile(
|
||||||
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
|
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
|
||||||
)
|
)
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE: Pattern = re_compile(
|
||||||
|
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
|
||||||
|
)
|
||||||
|
|
||||||
# REGULAR TOKENS
|
SCAN_INFO_SECONDARY_RE: Pattern = re_compile(r"\b(?P<secondary_scan_info>c2c)\b")
|
||||||
VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))")
|
|
||||||
_ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)"
|
# ISSUE
|
||||||
ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_RE_EXP + r")")
|
_ISSUE_RE_EXP = r"(?P<issue>\w*(½|\d+)[\.\d+]*\w*)"
|
||||||
ISSUE_TOKEN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")$")
|
_ISSUE_COUNT_RE_EXP = r"\(of\s*(?P<issue_count>\d+)\)"
|
||||||
ISSUE_END_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")$")
|
ISSUE_NUMBER_RE: Pattern = re_compile(
|
||||||
ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b")
|
r"(\(?#" + _ISSUE_RE_EXP + r"\)?)" + r"(\W*" + _ISSUE_COUNT_RE_EXP + r")?"
|
||||||
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
|
)
|
||||||
|
ISSUE_WITH_COUNT_RE: Pattern = re_compile(
|
||||||
|
r"(\(?" + _ISSUE_RE_EXP + r"\)?" + r"\W*" + _ISSUE_COUNT_RE_EXP + r")"
|
||||||
|
)
|
||||||
|
ISSUE_END_RE: Pattern = re_compile(r"([\/\s]\(?" + _ISSUE_RE_EXP + r"\)?(\/|$))")
|
||||||
|
ISSUE_BEGIN_RE: Pattern = re_compile(r"((^|\/)\(?" + _ISSUE_RE_EXP + r"\)?[\/|\s])")
|
||||||
|
|
||||||
|
# Volume
|
||||||
|
_VOLUME_COUNT_RE_EXP = r"\(of\s*(?P<volume_count>\d+)\)"
|
||||||
|
VOLUME_RE: Pattern = re_compile(
|
||||||
|
r"(" + r"(?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+)" # noqa: ISC003
|
||||||
|
r"(\W*" + _VOLUME_COUNT_RE_EXP + r")?" + r")"
|
||||||
|
)
|
||||||
|
VOLUME_WITH_COUNT_RE: Pattern = re_compile(
|
||||||
|
r"(\(?" + r"(?P<volume>\d+)" + r"\)?" + r"\W*" + _VOLUME_COUNT_RE_EXP + r")"
|
||||||
|
)
|
||||||
|
BOOK_VOLUME_RE: Pattern = re_compile(r"(?P<title>" + r"book\s*(?P<volume>\d+)" + r")")
|
||||||
|
|
||||||
|
# Publisher
|
||||||
|
_PUBLISHER_UNAMBIGUOUS_RE_EXP = (
|
||||||
|
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_UNAMBIGUOUS) + r")\b)"
|
||||||
|
)
|
||||||
|
_PUBLISHER_AMBIGUOUS_RE_EXP = (
|
||||||
|
r"(\b(?P<publisher>" + r"|".join(PUBLISHERS_AMBIGUOUS) + r")\b)"
|
||||||
|
)
|
||||||
|
PUBLISHER_UNAMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||||
|
r"(^|\/)" + _PUBLISHER_UNAMBIGUOUS_RE_EXP + r"($|\/)"
|
||||||
|
)
|
||||||
|
PUBLISHER_AMBIGUOUS_TOKEN_RE: Pattern = re_compile(
|
||||||
|
r"(^|\/)" + _PUBLISHER_AMBIGUOUS_RE_EXP + r"($|\/)"
|
||||||
|
)
|
||||||
|
PUBLISHER_UNAMBIGUOUS_RE: Pattern = re_compile(_PUBLISHER_UNAMBIGUOUS_RE_EXP)
|
||||||
|
PUBLISHER_AMBIGUOUS_RE = re_compile(_PUBLISHER_AMBIGUOUS_RE_EXP)
|
||||||
|
|
||||||
# LONG STRINGS
|
# LONG STRINGS
|
||||||
REMAINING_GROUP_RE = re_compile(r"^[\w].*[^\)]")
|
REMAINING_GROUP_RE: Pattern = re_compile(r"^[^\(].*[^\)]")
|
||||||
|
NON_NUMBER_DOT_RE: Pattern = re_compile(r"(\D)\.(\D)")
|
||||||
|
@ -1,8 +1,13 @@
|
|||||||
"""Unparse comic filenames."""
|
"""Unparse comic filenames."""
|
||||||
from typing import Callable
|
from calendar import month_abbr
|
||||||
|
from collections.abc import Callable, Mapping, Sequence
|
||||||
|
from contextlib import suppress
|
||||||
|
from types import MappingProxyType
|
||||||
|
|
||||||
|
from comicfn2dict.log import print_log_header
|
||||||
|
|
||||||
|
|
||||||
def issue_formatter(issue):
|
def issue_formatter(issue: str) -> str:
|
||||||
"""Formatter to zero pad issues."""
|
"""Formatter to zero pad issues."""
|
||||||
i = 0
|
i = 0
|
||||||
issue = issue.lstrip("0")
|
issue = issue.lstrip("0")
|
||||||
@ -14,37 +19,103 @@ def issue_formatter(issue):
|
|||||||
return "#{:0>" + str(pad) + "}"
|
return "#{:0>" + str(pad) + "}"
|
||||||
|
|
||||||
|
|
||||||
_PAREN_FMT = "({})"
|
_PAREN_FMT: str = "({})"
|
||||||
_FILENAME_FORMAT_TAGS = (
|
_FILENAME_FORMAT_TAGS: tuple[tuple[str, str | Callable], ...] = (
|
||||||
("series", "{}"),
|
("series", "{}"),
|
||||||
("volume", "v{}"),
|
("volume", "v{}"),
|
||||||
|
("volume_count", "(of {:03})"),
|
||||||
("issue", issue_formatter),
|
("issue", issue_formatter),
|
||||||
("issue_count", "(of {:03})"),
|
("issue_count", "(of {:03})"),
|
||||||
("year", _PAREN_FMT),
|
("date", _PAREN_FMT),
|
||||||
("title", "{}"),
|
("title", "{}"),
|
||||||
|
("publisher", _PAREN_FMT),
|
||||||
("original_format", _PAREN_FMT),
|
("original_format", _PAREN_FMT),
|
||||||
("scan_info", _PAREN_FMT),
|
("scan_info", _PAREN_FMT),
|
||||||
)
|
)
|
||||||
_EMPTY_VALUES = (None, "")
|
_EMPTY_VALUES: tuple[None, str] = (None, "")
|
||||||
|
_DEFAULT_EXT = "cbz"
|
||||||
|
_DATE_KEYS = ("year", "month", "day")
|
||||||
|
|
||||||
|
|
||||||
def dict2comicfn(md, ext=True):
|
class ComicFilenameSerializer:
|
||||||
"""Get our preferred basename from a metadata dict."""
|
"""Serialize Comic Filenames from dict."""
|
||||||
if not md:
|
|
||||||
return None
|
def _log(self, label: str, fn: str) -> None:
|
||||||
tokens = []
|
"""Log progress."""
|
||||||
for tag, fmt in _FILENAME_FORMAT_TAGS:
|
if not self._debug:
|
||||||
val = md.get(tag)
|
return
|
||||||
|
print_log_header(label)
|
||||||
|
print(fn) # noqa: T201
|
||||||
|
|
||||||
|
def _add_date(self) -> None:
|
||||||
|
"""Construct date from Y-m-D if they exist."""
|
||||||
|
if "date" in self.metadata:
|
||||||
|
return
|
||||||
|
parts = []
|
||||||
|
for key in _DATE_KEYS:
|
||||||
|
if part := self.metadata.get(key):
|
||||||
|
if key == "month" and not parts:
|
||||||
|
with suppress(TypeError):
|
||||||
|
part = month_abbr[int(part)]
|
||||||
|
|
||||||
|
parts.append(part)
|
||||||
|
if key == "month" and not parts:
|
||||||
|
# noop if only day.
|
||||||
|
break
|
||||||
|
if parts:
|
||||||
|
parts = (str(part) for part in parts)
|
||||||
|
date = "-".join(parts)
|
||||||
|
self._log("After date", date)
|
||||||
|
self.metadata = MappingProxyType({**self.metadata, "date": date})
|
||||||
|
|
||||||
|
def _tokenize_tag(self, tag: str, fmt: str | Callable) -> str:
|
||||||
|
"""Add tags to the string."""
|
||||||
|
val = self.metadata.get(tag)
|
||||||
if val in _EMPTY_VALUES:
|
if val in _EMPTY_VALUES:
|
||||||
continue
|
return ""
|
||||||
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
|
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
|
||||||
token = final_fmt.format(val).strip()
|
return final_fmt.format(val).strip()
|
||||||
if token:
|
|
||||||
tokens.append(token)
|
def _add_remainder(self) -> str:
|
||||||
fn = " ".join(tokens)
|
"""Add the remainders specially."""
|
||||||
if remainders := md.get("remainders"):
|
if remainders := self.metadata.get("remainders"):
|
||||||
remainder = " ".join(remainders)
|
if isinstance(remainders, Sequence):
|
||||||
fn += f" - {remainder}"
|
remainders = (str(remainder) for remainder in remainders)
|
||||||
if ext:
|
remainder = " ".join(remainders)
|
||||||
fn += "." + md.get("ext", "cbz")
|
else:
|
||||||
return fn
|
remainder = str(remainders)
|
||||||
|
return f"[{remainder}]"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def serialize(self) -> str:
|
||||||
|
"""Get our preferred basename from a metadata dict."""
|
||||||
|
self._add_date()
|
||||||
|
|
||||||
|
tokens = []
|
||||||
|
for tag, fmt in _FILENAME_FORMAT_TAGS:
|
||||||
|
if token := self._tokenize_tag(tag, fmt):
|
||||||
|
tokens.append(token)
|
||||||
|
self._log(f"After {tag}", str(tokens))
|
||||||
|
fn = " ".join(tokens)
|
||||||
|
|
||||||
|
fn += self._add_remainder()
|
||||||
|
self._log("After remainder", fn)
|
||||||
|
|
||||||
|
if self._ext:
|
||||||
|
ext = self.metadata.get("ext", _DEFAULT_EXT)
|
||||||
|
fn += f".{ext}"
|
||||||
|
self._log("After ext", fn)
|
||||||
|
|
||||||
|
return fn
|
||||||
|
|
||||||
|
def __init__(self, metadata: Mapping, ext: bool = True, verbose: int = 0):
|
||||||
|
"""Initialize."""
|
||||||
|
self.metadata: Mapping = metadata
|
||||||
|
self._ext: bool = ext
|
||||||
|
self._debug: bool = bool(verbose)
|
||||||
|
|
||||||
|
|
||||||
|
def dict2comicfn(md: Mapping, ext: bool = True, verbose: int = 0) -> str:
|
||||||
|
"""Simplify API."""
|
||||||
|
serializer = ComicFilenameSerializer(md, ext=ext, verbose=verbose)
|
||||||
|
return serializer.serialize()
|
||||||
|
11
debian.sources
Normal file
11
debian.sources
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
Types: deb
|
||||||
|
URIs: http://deb.debian.org/debian
|
||||||
|
Suites: bookworm bookworm-updates
|
||||||
|
Components: main contrib non-free
|
||||||
|
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
|
||||||
|
|
||||||
|
Types: deb
|
||||||
|
URIs: http://deb.debian.org/debian-security
|
||||||
|
Suites: bookworm-security
|
||||||
|
Components: main contrib non-free
|
||||||
|
Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg
|
21
docker-compose.yaml
Normal file
21
docker-compose.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
services:
|
||||||
|
comicfn2dict-builder:
|
||||||
|
build: .
|
||||||
|
image: comicfn2dict-builder
|
||||||
|
container_name: comicfn2dict-builder
|
||||||
|
comicfn2dict-lint:
|
||||||
|
image: comicfn2dict-builder
|
||||||
|
container_name: comicfn2dict-lint
|
||||||
|
command: make lint
|
||||||
|
comicfn2dict-test:
|
||||||
|
image: comicfn2dict-builder
|
||||||
|
container_name: comicfn2dict-test
|
||||||
|
command: make test
|
||||||
|
volumes:
|
||||||
|
- ./test-results/:/app/test-results/
|
||||||
|
comicfn2dict-build:
|
||||||
|
image: comicfn2dict-builder
|
||||||
|
container_name: comicfn2dict-build
|
||||||
|
volumes:
|
||||||
|
- ./dist/:/app/dist/
|
||||||
|
command: poetry build
|
186
eslint.config.js
Normal file
186
eslint.config.js
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
import { FlatCompat } from "@eslint/eslintrc";
|
||||||
|
import js from "@eslint/js";
|
||||||
|
import arrayFunc from "eslint-plugin-array-func";
|
||||||
|
// import plugin broken for flag config
|
||||||
|
// https://github.com/import-js/eslint-plugin-import/issues/2556
|
||||||
|
// import importPlugin from "eslint-plugin-import";
|
||||||
|
import eslintPluginPrettierRecommended from "eslint-plugin-prettier/recommended";
|
||||||
|
import pluginSecurity from "eslint-plugin-security";
|
||||||
|
import eslintPluginUnicorn from "eslint-plugin-unicorn";
|
||||||
|
import globals from "globals";
|
||||||
|
|
||||||
|
const compat = new FlatCompat();
|
||||||
|
|
||||||
|
export default [
|
||||||
|
{
|
||||||
|
languageOptions: {
|
||||||
|
globals: {
|
||||||
|
...globals.node,
|
||||||
|
...globals.browser,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
linterOptions: {
|
||||||
|
reportUnusedDisableDirectives: "warn",
|
||||||
|
},
|
||||||
|
plugins: {
|
||||||
|
// import: importPlugin,
|
||||||
|
unicorn: eslintPluginUnicorn,
|
||||||
|
},
|
||||||
|
rules: {
|
||||||
|
"array-func/prefer-array-from": "off", // for modern browsers the spread operator, as preferred by unicorn, works fine.
|
||||||
|
"max-params": ["warn", 4],
|
||||||
|
"no-console": "warn",
|
||||||
|
"no-debugger": "warn",
|
||||||
|
"no-constructor-bind/no-constructor-bind": "error",
|
||||||
|
"no-constructor-bind/no-constructor-state": "error",
|
||||||
|
"no-secrets/no-secrets": "error",
|
||||||
|
"prettier/prettier": "warn",
|
||||||
|
"security/detect-object-injection": "off",
|
||||||
|
"space-before-function-paren": "off",
|
||||||
|
"unicorn/switch-case-braces": ["warn", "avoid"],
|
||||||
|
"unicorn/prefer-node-protocol": 0,
|
||||||
|
"unicorn/prevent-abbreviations": "off",
|
||||||
|
"unicorn/filename-case": [
|
||||||
|
"error",
|
||||||
|
{ case: "kebabCase", ignore: [".*.md"] },
|
||||||
|
],
|
||||||
|
/*
|
||||||
|
...importPlugin.configs["recommended"].rules,
|
||||||
|
"import/no-unresolved": [
|
||||||
|
"error",
|
||||||
|
{
|
||||||
|
ignore: ["^[@]"],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
*/
|
||||||
|
},
|
||||||
|
/*
|
||||||
|
settings: {
|
||||||
|
"import/parsers": {
|
||||||
|
espree: [".js", ".cjs", ".mjs", ".jsx"],
|
||||||
|
"@typescript-eslint/parser": [".ts"],
|
||||||
|
},
|
||||||
|
"import/resolver": {
|
||||||
|
typescript: true,
|
||||||
|
node: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
*/
|
||||||
|
},
|
||||||
|
js.configs.recommended,
|
||||||
|
arrayFunc.configs.all,
|
||||||
|
pluginSecurity.configs.recommended,
|
||||||
|
eslintPluginPrettierRecommended,
|
||||||
|
...compat.config({
|
||||||
|
root: true,
|
||||||
|
env: {
|
||||||
|
browser: true,
|
||||||
|
es2024: true,
|
||||||
|
node: true,
|
||||||
|
},
|
||||||
|
extends: [
|
||||||
|
// LANGS
|
||||||
|
"plugin:jsonc/recommended-with-jsonc",
|
||||||
|
"plugin:markdown/recommended",
|
||||||
|
"plugin:toml/recommended",
|
||||||
|
"plugin:yml/standard",
|
||||||
|
"plugin:yml/prettier",
|
||||||
|
// CODE QUALITY
|
||||||
|
"plugin:sonarjs/recommended",
|
||||||
|
// PRACTICES
|
||||||
|
"plugin:eslint-comments/recommended",
|
||||||
|
// "plugin:import/recommended",
|
||||||
|
"plugin:no-use-extend-native/recommended",
|
||||||
|
"plugin:optimize-regex/all",
|
||||||
|
"plugin:promise/recommended",
|
||||||
|
"plugin:switch-case/recommended",
|
||||||
|
// SECURITY
|
||||||
|
"plugin:no-unsanitized/DOM",
|
||||||
|
],
|
||||||
|
overrides: [
|
||||||
|
{
|
||||||
|
files: ["**/*.md"],
|
||||||
|
processor: "markdown/markdown",
|
||||||
|
rules: {
|
||||||
|
"prettier/prettier": ["warn", { parser: "markdown" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ["**/*.md/*.js"], // Will match js code inside *.md files
|
||||||
|
rules: {
|
||||||
|
"no-unused-vars": "off",
|
||||||
|
"no-undef": "off",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ["**/*.md/*.sh"],
|
||||||
|
rules: {
|
||||||
|
"prettier/prettier": ["error", { parser: "sh" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ["*.yaml", "*.yml"],
|
||||||
|
//parser: "yaml-eslint-parser",
|
||||||
|
rules: {
|
||||||
|
"unicorn/filename-case": "off",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ["*.toml"],
|
||||||
|
//parser: "toml-eslint-parser",
|
||||||
|
rules: {
|
||||||
|
"prettier/prettier": ["error", { parser: "toml" }],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
files: ["*.json", "*.json5", "*.jsonc"],
|
||||||
|
//parser: "jsonc-eslint-parser",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
parserOptions: {
|
||||||
|
ecmaFeatures: {
|
||||||
|
impliedStrict: true,
|
||||||
|
},
|
||||||
|
ecmaVersion: "latest",
|
||||||
|
},
|
||||||
|
plugins: [
|
||||||
|
"eslint-comments",
|
||||||
|
//"import",
|
||||||
|
"markdown",
|
||||||
|
"no-constructor-bind",
|
||||||
|
"no-secrets",
|
||||||
|
"no-unsanitized",
|
||||||
|
"no-use-extend-native",
|
||||||
|
"optimize-regex",
|
||||||
|
"promise",
|
||||||
|
"simple-import-sort",
|
||||||
|
"sonarjs",
|
||||||
|
"switch-case",
|
||||||
|
"unicorn",
|
||||||
|
],
|
||||||
|
rules: {
|
||||||
|
"no-constructor-bind/no-constructor-bind": "error",
|
||||||
|
"no-constructor-bind/no-constructor-state": "error",
|
||||||
|
"no-secrets/no-secrets": "error",
|
||||||
|
"eslint-comments/no-unused-disable": 1,
|
||||||
|
"simple-import-sort/exports": "warn",
|
||||||
|
"simple-import-sort/imports": "warn",
|
||||||
|
"switch-case/newline-between-switch-case": "off", // Malfunctioning
|
||||||
|
},
|
||||||
|
ignorePatterns: [
|
||||||
|
"*~",
|
||||||
|
"**/__pycache__",
|
||||||
|
".git",
|
||||||
|
"!.circleci",
|
||||||
|
".mypy_cache",
|
||||||
|
".ruff_cache",
|
||||||
|
".pytest_cache",
|
||||||
|
".venv*",
|
||||||
|
"dist",
|
||||||
|
"node_modules",
|
||||||
|
"package-lock.json",
|
||||||
|
"test-results",
|
||||||
|
"typings",
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
];
|
6339
package-lock.json
generated
6339
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
37
package.json
37
package.json
@ -1,10 +1,10 @@
|
|||||||
{
|
{
|
||||||
"version": "0.1.0",
|
"version": "0.2.0",
|
||||||
"description": "linting",
|
"description": "comicfn2dict linting",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"fix": "eslint_d --cache --fix --ignore-pattern frontend --ext .cjs,.mjs,.js,.json,.yaml,.md . && prettier --write .",
|
"fix": "eslint --cache --fix . && prettier --write .",
|
||||||
"lint": "eslint_d --cache --ignore-pattern frontend --ext .cjs,.mjs,.js,.json,.yaml,.md . && prettier --check .",
|
"lint": "eslint --cache . && prettier --check .",
|
||||||
"remark-check": "remark .",
|
"remark-check": "remark .",
|
||||||
"remark-fix": "remark . --output"
|
"remark-fix": "remark . --output"
|
||||||
},
|
},
|
||||||
@ -13,12 +13,13 @@
|
|||||||
"@prettier/plugin-xml",
|
"@prettier/plugin-xml",
|
||||||
"prettier-plugin-nginx",
|
"prettier-plugin-nginx",
|
||||||
"prettier-plugin-packagejson",
|
"prettier-plugin-packagejson",
|
||||||
"prettier-plugin-sh"
|
"prettier-plugin-sh",
|
||||||
|
"prettier-plugin-toml"
|
||||||
],
|
],
|
||||||
"overrides": [
|
"overrides": [
|
||||||
{
|
{
|
||||||
"files": [
|
"files": [
|
||||||
"*.md"
|
"**/*.md"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"proseWrap": "always"
|
"proseWrap": "always"
|
||||||
@ -28,6 +29,7 @@
|
|||||||
},
|
},
|
||||||
"remarkConfig": {
|
"remarkConfig": {
|
||||||
"plugins": [
|
"plugins": [
|
||||||
|
"gfm",
|
||||||
"preset-lint-consistent",
|
"preset-lint-consistent",
|
||||||
"preset-lint-recommended",
|
"preset-lint-recommended",
|
||||||
"preset-lint-markdown-style-guide",
|
"preset-lint-markdown-style-guide",
|
||||||
@ -42,36 +44,37 @@
|
|||||||
"@prettier/plugin-xml": "^3.0.0",
|
"@prettier/plugin-xml": "^3.0.0",
|
||||||
"eslint": "^8.34.0",
|
"eslint": "^8.34.0",
|
||||||
"eslint-config-prettier": "^9.0.0",
|
"eslint-config-prettier": "^9.0.0",
|
||||||
"eslint-plugin-array-func": "^4.0.0",
|
"eslint-plugin-array-func": "^5.0.1",
|
||||||
"eslint-plugin-eslint-comments": "^3.2.0",
|
"eslint-plugin-eslint-comments": "^3.2.0",
|
||||||
"eslint-plugin-import": "^2.25.4",
|
"eslint-plugin-import": "^2.25.4",
|
||||||
"eslint-plugin-json": "^3.1.0",
|
"eslint-plugin-jsonc": "^2.13.0",
|
||||||
"eslint-plugin-mdx": "^3.0.0",
|
"eslint-plugin-markdown": "^3.0.0",
|
||||||
"eslint-plugin-no-constructor-bind": "^2.0.4",
|
"eslint-plugin-no-constructor-bind": "^2.0.4",
|
||||||
"eslint-plugin-no-secrets": "^0.8.9",
|
"eslint-plugin-no-secrets": "^0.8.9",
|
||||||
"eslint-plugin-no-unsanitized": "^4.0.0",
|
"eslint-plugin-no-unsanitized": "^4.0.0",
|
||||||
"eslint-plugin-no-use-extend-native": "^0.5.0",
|
"eslint-plugin-no-use-extend-native": "^0.5.0",
|
||||||
"eslint-plugin-only-warn": "^1.0.2",
|
|
||||||
"eslint-plugin-optimize-regex": "^1.2.0",
|
"eslint-plugin-optimize-regex": "^1.2.0",
|
||||||
"eslint-plugin-prettier": "^5.0.0-alpha.2",
|
"eslint-plugin-prettier": "^5.0.0-alpha.2",
|
||||||
"eslint-plugin-promise": "^6.0.0",
|
"eslint-plugin-promise": "^6.0.0",
|
||||||
"eslint-plugin-scanjs-rules": "^0.2.1",
|
"eslint-plugin-scanjs-rules": "^0.2.1",
|
||||||
"eslint-plugin-security": "^2.1.0",
|
"eslint-plugin-security": "^2.1.0",
|
||||||
"eslint-plugin-simple-import-sort": "^10.0.0",
|
"eslint-plugin-simple-import-sort": "^12.0.0",
|
||||||
"eslint-plugin-sonarjs": "^0.23.0",
|
"eslint-plugin-sonarjs": "^0.24.0",
|
||||||
"eslint-plugin-switch-case": "^1.1.2",
|
"eslint-plugin-switch-case": "^1.1.2",
|
||||||
"eslint-plugin-unicorn": "^50.0.1",
|
"eslint-plugin-toml": "^0.9.2",
|
||||||
"eslint-plugin-yaml": "^0.5.0",
|
"eslint-plugin-unicorn": "^51.0.1",
|
||||||
|
"eslint-plugin-yml": "^1.12.2",
|
||||||
"eslint_d": "^13.0.0",
|
"eslint_d": "^13.0.0",
|
||||||
"prettier": "^3.0.0",
|
"prettier": "^3.0.0",
|
||||||
"prettier-plugin-nginx": "^1.0.3",
|
"prettier-plugin-nginx": "^1.0.3",
|
||||||
"prettier-plugin-packagejson": "^2.4.4",
|
"prettier-plugin-packagejson": "^2.4.4",
|
||||||
"prettier-plugin-sh": "^0.13.0",
|
"prettier-plugin-sh": "^0.14.0",
|
||||||
|
"prettier-plugin-toml": "^2.0.1",
|
||||||
"remark-cli": "^12.0.0",
|
"remark-cli": "^12.0.0",
|
||||||
|
"remark-gfm": "^4.0.0",
|
||||||
"remark-preset-lint-consistent": "^5.1.1",
|
"remark-preset-lint-consistent": "^5.1.1",
|
||||||
"remark-preset-lint-markdown-style-guide": "^5.1.2",
|
"remark-preset-lint-markdown-style-guide": "^5.1.2",
|
||||||
"remark-preset-lint-recommended": "^6.1.2",
|
"remark-preset-lint-recommended": "^6.1.2",
|
||||||
"remark-preset-prettier": "^2.0.1",
|
"remark-preset-prettier": "^2.0.1"
|
||||||
"toml": "^3.0.0"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
942
poetry.lock
generated
942
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "comicfn2dict"
|
name = "comicfn2dict"
|
||||||
version = "0.1.3"
|
version = "0.2.0a4"
|
||||||
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
||||||
license = "GPL-3.0-only"
|
license = "GPL-3.0-only"
|
||||||
authors = ["AJ Slater <aj@slater.net>"]
|
authors = ["AJ Slater <aj@slater.net>"]
|
||||||
@ -20,12 +20,15 @@ classifiers = [
|
|||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3 :: Only",
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
]
|
]
|
||||||
packages = [{ include = "comicfn2dict" }, { include = "tests", format = "sdist" }]
|
packages = [
|
||||||
|
{ include = "comicfn2dict" },
|
||||||
|
{ include = "tests", format = "sdist" },
|
||||||
|
]
|
||||||
exclude = ["*/**/*~"]
|
exclude = ["*/**/*~"]
|
||||||
include = []
|
include = []
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.9"
|
python = "^3.10"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
neovim = "^0.3.1"
|
neovim = "^0.3.1"
|
||||||
@ -42,7 +45,7 @@ pytest-gitignore = "^1.3"
|
|||||||
codespell = "^2.1.0"
|
codespell = "^2.1.0"
|
||||||
pyright = "^1.1.232"
|
pyright = "^1.1.232"
|
||||||
radon = { version = "^6.0.1", extras = ["toml"] }
|
radon = { version = "^6.0.1", extras = ["toml"] }
|
||||||
ruff = "^0.1.2"
|
ruff = "^0.2.1"
|
||||||
types-python-dateutil = "^2.8.19"
|
types-python-dateutil = "^2.8.19"
|
||||||
vulture = "^2.3"
|
vulture = "^2.3"
|
||||||
|
|
||||||
@ -77,7 +80,7 @@ omit = [
|
|||||||
"dist/*",
|
"dist/*",
|
||||||
"node_modules/*",
|
"node_modules/*",
|
||||||
"test-results/*",
|
"test-results/*",
|
||||||
"typings/*"
|
"typings/*",
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.pyright]
|
[tool.pyright]
|
||||||
@ -98,12 +101,10 @@ exclude = [
|
|||||||
useLibraryCodeForTypes = true
|
useLibraryCodeForTypes = true
|
||||||
reportMissingImports = true
|
reportMissingImports = true
|
||||||
reportImportCycles = true
|
reportImportCycles = true
|
||||||
pythonVersion = "3.9"
|
pythonVersion = "3.10"
|
||||||
pythonPlatform = "All"
|
pythonPlatform = "All"
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
[tool.pytest.ini_options]
|
||||||
junit_family = "xunit2"
|
|
||||||
# --black
|
|
||||||
addopts = """
|
addopts = """
|
||||||
--junit-xml=test-results/pytest/results.xml
|
--junit-xml=test-results/pytest/results.xml
|
||||||
-ra
|
-ra
|
||||||
@ -113,21 +114,38 @@ addopts = """
|
|||||||
--cov-append
|
--cov-append
|
||||||
--cov-report=html
|
--cov-report=html
|
||||||
--cov-report=term
|
--cov-report=term
|
||||||
--ignore=.git
|
|
||||||
--ignore=cache
|
|
||||||
--ignore=frontend
|
|
||||||
--ignore=typings
|
|
||||||
"""
|
"""
|
||||||
|
junit_family = "xunit2"
|
||||||
|
testpaths = "tests"
|
||||||
|
|
||||||
[tool.radon]
|
[tool.radon]
|
||||||
exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,dist/*,node_modules/*,test-results/*,typings/*"
|
exclude = "*~,.git/*,.mypy_cache/*,.pytest_cache/*,.venv*,__pycache__/*,cache/*,dist/*,node_modules/*,test-results/*,typings/*"
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
extend-exclude = ["typings"]
|
extend-exclude = ["typings"]
|
||||||
extend-ignore = ["S101", "D203", "D213",
|
target-version = "py310"
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
extend-ignore = [
|
||||||
|
"S101",
|
||||||
|
"D203",
|
||||||
|
"D213",
|
||||||
# Format ignores
|
# Format ignores
|
||||||
"W191", "E501", "E111", "E114", "E117", "D206", "D300", "Q000", "Q001",
|
"W191",
|
||||||
"Q002", "Q003", "COM812", "COM819", "ISC001", "ISC002"
|
"E501",
|
||||||
|
"E111",
|
||||||
|
"E114",
|
||||||
|
"E117",
|
||||||
|
"D206",
|
||||||
|
"D300",
|
||||||
|
"Q000",
|
||||||
|
"Q001",
|
||||||
|
"Q002",
|
||||||
|
"Q003",
|
||||||
|
"COM812",
|
||||||
|
"COM819",
|
||||||
|
"ISC001",
|
||||||
|
"ISC002",
|
||||||
]
|
]
|
||||||
extend-select = [
|
extend-select = [
|
||||||
"A",
|
"A",
|
||||||
@ -168,19 +186,16 @@ extend-select = [
|
|||||||
"TRY",
|
"TRY",
|
||||||
"UP",
|
"UP",
|
||||||
"W",
|
"W",
|
||||||
"YTT"
|
"YTT",
|
||||||
# "ANN", "ERA", "COM"
|
# "ANN", "ERA", "COM"
|
||||||
]
|
]
|
||||||
external = ["V101"]
|
external = ["V101"]
|
||||||
# format = "grouped"
|
|
||||||
# show-source = true
|
|
||||||
target-version = "py39"
|
|
||||||
task-tags = ["TODO", "FIXME", "XXX", "http", "HACK"]
|
task-tags = ["TODO", "FIXME", "XXX", "http", "HACK"]
|
||||||
|
|
||||||
[tool.ruff.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
"tests/*" = ["SLF001", "T201", "T203"]
|
"tests/*" = ["SLF001", "T201", "T203"]
|
||||||
|
|
||||||
[tool.ruff.pycodestyle]
|
[tool.ruff.lint.pycodestyle]
|
||||||
ignore-overlong-task-comments = true
|
ignore-overlong-task-comments = true
|
||||||
|
|
||||||
[tool.vulture]
|
[tool.vulture]
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
"""Test filenames with human parsed correct results."""
|
"""Test filenames with human parsed correct results."""
|
||||||
|
|
||||||
|
from types import MappingProxyType
|
||||||
|
|
||||||
TEST_COMIC_FIELDS = {
|
TEST_COMIC_FIELDS = {
|
||||||
"series": "Long Series Name",
|
"series": "Long Series Name",
|
||||||
"issue": "001",
|
"issue": "001",
|
||||||
@ -22,6 +24,7 @@ TEST_COMIC_FIELDS_VOL = {
|
|||||||
TEST_COMIC_VOL_ONLY = {
|
TEST_COMIC_VOL_ONLY = {
|
||||||
"series": "Long Series Name",
|
"series": "Long Series Name",
|
||||||
"volume": "1",
|
"volume": "1",
|
||||||
|
"issue": "1",
|
||||||
"title": "Title",
|
"title": "Title",
|
||||||
"original_format": "TPB",
|
"original_format": "TPB",
|
||||||
"year": "2000",
|
"year": "2000",
|
||||||
@ -29,6 +32,7 @@ TEST_COMIC_VOL_ONLY = {
|
|||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Tests for 0.1.0
|
||||||
FNS = {
|
FNS = {
|
||||||
"Night of 1000 Wolves 001 (2013).cbz": {
|
"Night of 1000 Wolves 001 (2013).cbz": {
|
||||||
"series": "Night of 1000 Wolves",
|
"series": "Night of 1000 Wolves",
|
||||||
@ -51,11 +55,6 @@ FNS = {
|
|||||||
"Long Series Name #001 (2000) Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
"Long Series Name #001 (2000) Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||||
"Long Series Name (2000) 001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
"Long Series Name (2000) 001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||||
"Long Series Name (2000) #001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
"Long Series Name (2000) #001 Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||||
"Long Series Name v1 (2000) #001 "
|
|
||||||
"Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS_VOL,
|
|
||||||
"Long Series Name 001 (2000) (TPB-Releaser) Title.cbz": TEST_COMIC_FIELDS,
|
|
||||||
"Long Series Name Vol 1 "
|
|
||||||
"(2000) (TPB) (Releaser & Releaser-Releaser) Title.cbr": TEST_COMIC_VOL_ONLY,
|
|
||||||
"Ultimate Craziness (2019) (Digital) (Friends-of-Bill).cbr": {
|
"Ultimate Craziness (2019) (Digital) (Friends-of-Bill).cbr": {
|
||||||
"series": "Ultimate Craziness",
|
"series": "Ultimate Craziness",
|
||||||
"year": "2019",
|
"year": "2019",
|
||||||
@ -73,26 +72,17 @@ FNS = {
|
|||||||
"Arkenstone Vol. 01 - The Smell of Burnt Toast (2020) (digital) (My-brother).cbr": {
|
"Arkenstone Vol. 01 - The Smell of Burnt Toast (2020) (digital) (My-brother).cbr": {
|
||||||
"series": "Arkenstone",
|
"series": "Arkenstone",
|
||||||
"volume": "01",
|
"volume": "01",
|
||||||
|
"issue": "01",
|
||||||
"year": "2020",
|
"year": "2020",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"scan_info": "My-brother",
|
"scan_info": "My-brother",
|
||||||
"title": "The Smell of Burnt Toast",
|
"title": "The Smell of Burnt Toast",
|
||||||
"original_format": "digital",
|
"original_format": "digital",
|
||||||
},
|
},
|
||||||
"Bardude - The Last Thing I Remember.cbz": {
|
|
||||||
"series": "Bardude",
|
|
||||||
"title": "The Last Thing I Remember",
|
|
||||||
"ext": "cbz",
|
|
||||||
},
|
|
||||||
"Drunkguy - The Man Without Fear - 01.cbz": {
|
|
||||||
"series": "Drunkguy",
|
|
||||||
"title": "The Man Without Fear",
|
|
||||||
"issue": "01",
|
|
||||||
"ext": "cbz",
|
|
||||||
},
|
|
||||||
"The_Arkenstone_v03_(2002)_(Digital)_(DR_&_Quenya-Elves).cbr": {
|
"The_Arkenstone_v03_(2002)_(Digital)_(DR_&_Quenya-Elves).cbr": {
|
||||||
"series": "The Arkenstone",
|
"series": "The Arkenstone",
|
||||||
"volume": "03",
|
"volume": "03",
|
||||||
|
"issue": "03",
|
||||||
"year": "2002",
|
"year": "2002",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"scan_info": "DR & Quenya-Elves",
|
"scan_info": "DR & Quenya-Elves",
|
||||||
@ -111,6 +101,7 @@ FNS = {
|
|||||||
"Kartalk Library Edition v01 (1992) (digital) (Son of Ultron-Empire).cbr": {
|
"Kartalk Library Edition v01 (1992) (digital) (Son of Ultron-Empire).cbr": {
|
||||||
"series": "Kartalk Library Edition",
|
"series": "Kartalk Library Edition",
|
||||||
"volume": "01",
|
"volume": "01",
|
||||||
|
"issue": "01",
|
||||||
"year": "1992",
|
"year": "1992",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"original_format": "digital",
|
"original_format": "digital",
|
||||||
@ -119,15 +110,15 @@ FNS = {
|
|||||||
"Kind of Deadly v02 - Last Bullet (2006) (Digital) (Zone-Empire).cbr": {
|
"Kind of Deadly v02 - Last Bullet (2006) (Digital) (Zone-Empire).cbr": {
|
||||||
"series": "Kind of Deadly",
|
"series": "Kind of Deadly",
|
||||||
"volume": "02",
|
"volume": "02",
|
||||||
|
"issue": "02",
|
||||||
"year": "2006",
|
"year": "2006",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"original_format": "Digital",
|
"original_format": "Digital",
|
||||||
"scan_info": "Zone-Empire",
|
"scan_info": "Zone-Empire",
|
||||||
"title": "Last Bullet",
|
"title": "Last Bullet",
|
||||||
},
|
},
|
||||||
"Jeremy John - A Big Long Title (2017) (digital-Minutement).cbz": {
|
"Jeremy John - Not A Title (2017) (digital-Minutement).cbz": {
|
||||||
"series": "Jeremy John",
|
"series": "Jeremy John - Not A Title",
|
||||||
"title": "A Big Long Title",
|
|
||||||
"year": "2017",
|
"year": "2017",
|
||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
"original_format": "digital",
|
"original_format": "digital",
|
||||||
@ -139,8 +130,7 @@ FNS = {
|
|||||||
"year": "2006",
|
"year": "2006",
|
||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
"scan_info": "Minutemen-Faessla",
|
"scan_info": "Minutemen-Faessla",
|
||||||
# "original_format": "digital",
|
"original_format": "digital",
|
||||||
"remainders": ("(digital",),
|
|
||||||
},
|
},
|
||||||
"Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": {
|
"Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": {
|
||||||
"series": "Jeremy John",
|
"series": "Jeremy John",
|
||||||
@ -154,6 +144,7 @@ FNS = {
|
|||||||
"Jeremy John v01 - Uninterested! (2007) (Digital) (Asgard-Empire).cbr": {
|
"Jeremy John v01 - Uninterested! (2007) (Digital) (Asgard-Empire).cbr": {
|
||||||
"series": "Jeremy John",
|
"series": "Jeremy John",
|
||||||
"volume": "01",
|
"volume": "01",
|
||||||
|
"issue": "01",
|
||||||
"year": "2007",
|
"year": "2007",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"original_format": "Digital",
|
"original_format": "Digital",
|
||||||
@ -180,6 +171,7 @@ FNS = {
|
|||||||
"Darkwad by Carlos Zemo v01 - Knuckle Fight (2009) (Digital) (Zone-Empire).cbr": {
|
"Darkwad by Carlos Zemo v01 - Knuckle Fight (2009) (Digital) (Zone-Empire).cbr": {
|
||||||
"series": "Darkwad by Carlos Zemo",
|
"series": "Darkwad by Carlos Zemo",
|
||||||
"volume": "01",
|
"volume": "01",
|
||||||
|
"issue": "01",
|
||||||
"year": "2009",
|
"year": "2009",
|
||||||
"ext": "cbr",
|
"ext": "cbr",
|
||||||
"title": "Knuckle Fight",
|
"title": "Knuckle Fight",
|
||||||
@ -243,3 +235,273 @@ FNS = {
|
|||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Tests for 0.2.0
|
||||||
|
FNS.update(
|
||||||
|
{
|
||||||
|
# Philosopy change regarding dashes.
|
||||||
|
"Bardude - The Last Thing I Remember.cbz": {
|
||||||
|
"series": "Bardude - The Last Thing I Remember",
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
"Drunkguy - The Man Without Fear - 01.cbz": {
|
||||||
|
"series": "Drunkguy - The Man Without Fear",
|
||||||
|
"issue": "01",
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
# BIG Change. title after token. more stripping.
|
||||||
|
"'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "022",
|
||||||
|
"original_format": "Webrip",
|
||||||
|
"series": "Batman - Superman - World's Finest",
|
||||||
|
"scan_info": "The Last Kryptonian-DCP",
|
||||||
|
"year": "2024",
|
||||||
|
},
|
||||||
|
# Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543
|
||||||
|
# word characters now allowed to lead issue numbers only if preceded by a # marker
|
||||||
|
"batman #B01 title.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "B01",
|
||||||
|
"series": "batman",
|
||||||
|
"title": "title",
|
||||||
|
},
|
||||||
|
"Monster_Island_v1_#2__repaired__c2c.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "2",
|
||||||
|
"series": "Monster Island",
|
||||||
|
"volume": "1",
|
||||||
|
"scan_info": "c2c",
|
||||||
|
"remainders": ("(repaired)",),
|
||||||
|
},
|
||||||
|
# Extra - in the series
|
||||||
|
" X-Men-V1-#067.cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "067",
|
||||||
|
"series": "X-Men",
|
||||||
|
"volume": "1",
|
||||||
|
"remainders": ("-",),
|
||||||
|
},
|
||||||
|
"Aquaman - Green Arrow - Deep Target #01 (of 07) (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "01",
|
||||||
|
"series": "Aquaman - Green Arrow - Deep Target",
|
||||||
|
"year": "2021",
|
||||||
|
"issue_count": "07",
|
||||||
|
},
|
||||||
|
# CT only separates this into a title if the '-' is attached to the previous word eg 'aquaman- Green Arrow'. @bpepple opened a ticket for this https://github.com/ajslater/comicfn2dict/issues/1 already
|
||||||
|
"Batman_-_Superman_#020_(2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "020",
|
||||||
|
"series": "Batman - Superman",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
# Publishers like to re-print some of their annuals using this format for the year
|
||||||
|
"Batman '89 (2021) .cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"series": "Batman '89",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
# This made the parser in CT much more complicated. It's understandable that this isn't parsed on the first few iterations of this project
|
||||||
|
"Star Wars - War of the Bounty Hunters - IG-88 (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||||
|
"year": "2021",
|
||||||
|
}, # The addition of the '#1' turns this into the same as 'Aquaman - Green Arrow - Deep Target' above
|
||||||
|
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "1",
|
||||||
|
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
"Free Comic Book Day - Avengers.Hulk (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"series": "Free Comic Book Day - Avengers Hulk",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
# CT assumes the volume is also the issue number if it can't find an issue number
|
||||||
|
"Avengers By Brian Michael Bendis volume 03 (2013).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "03",
|
||||||
|
"series": "Avengers By Brian Michael Bendis",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2013",
|
||||||
|
},
|
||||||
|
# CT catches the year
|
||||||
|
"Marvel Previews #002 (January 2022).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "002",
|
||||||
|
"series": "Marvel Previews",
|
||||||
|
"publisher": "Marvel",
|
||||||
|
"month": "01",
|
||||||
|
"year": "2022",
|
||||||
|
},
|
||||||
|
"Test Numeric Year #2 2001-02-24.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "2",
|
||||||
|
"series": "Test Numeric Year",
|
||||||
|
"year": "2001",
|
||||||
|
"month": "02",
|
||||||
|
"day": "24",
|
||||||
|
},
|
||||||
|
"Test Month First Date 02-24-2001.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"series": "Test Month First Date",
|
||||||
|
"year": "2001",
|
||||||
|
"month": "02",
|
||||||
|
"day": "24",
|
||||||
|
},
|
||||||
|
# CT notices that this is a full date, CT doesn't actually return the month or day though just removes it
|
||||||
|
"X-Men, 2021-08-04 (#02).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "02",
|
||||||
|
"series": "X-Men",
|
||||||
|
"year": "2021",
|
||||||
|
"month": "08",
|
||||||
|
"day": "04",
|
||||||
|
},
|
||||||
|
# 4 digit issue number
|
||||||
|
# should this be an issue number if year DONE?.
|
||||||
|
"action comics 1024.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "1024",
|
||||||
|
"series": "action comics",
|
||||||
|
},
|
||||||
|
# This is a contrived test case. I've never seen this I just wanted to handle it with my parser
|
||||||
|
"Cory Doctorow's Futuristic Tales of the Here and Now #0.0.1 (2007).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "0.0.1",
|
||||||
|
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||||
|
"year": "2007",
|
||||||
|
},
|
||||||
|
# CT treats ':' the same as '-' but here the ':' is attached to 'Now' which CT sees as a title separation
|
||||||
|
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "001",
|
||||||
|
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||||
|
"title": "Anda's Game",
|
||||||
|
"year": "2007",
|
||||||
|
},
|
||||||
|
# If a title ends in a year, it's not an issue (and is a year if no year)
|
||||||
|
"Blade Runner Free Comic Book Day 2021 (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"series": "Blade Runner Free Comic Book Day 2021",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
# If a year occurs after another year, and no volume, do volume / year
|
||||||
|
"Super Strange Yarns (1957) #92 (1969).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "92",
|
||||||
|
"series": "Super Strange Yarns",
|
||||||
|
"volume": "1957",
|
||||||
|
"year": "1969",
|
||||||
|
},
|
||||||
|
# CT checks for the following '(of 06)' after the '03' and marks it as the volume
|
||||||
|
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "008",
|
||||||
|
"series": "Elephantmen 2259",
|
||||||
|
"title": "Simple Truth",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2021",
|
||||||
|
"volume_count": "06",
|
||||||
|
},
|
||||||
|
# CT treats book like 'v' but also adds it as the title (matches ComicVine for this particular series)
|
||||||
|
"Bloodshot Book 03 (2020).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "03",
|
||||||
|
"series": "Bloodshot",
|
||||||
|
"title": "Book 03",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2020",
|
||||||
|
},
|
||||||
|
# c2c aka "cover to cover" is fairly common and CT moves it to scan_info/remainder
|
||||||
|
"Marvel Two In One V1 #090 c2c.cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "090",
|
||||||
|
"series": "Marvel Two In One",
|
||||||
|
"publisher": "Marvel",
|
||||||
|
"volume": "1",
|
||||||
|
"scan_info": "c2c",
|
||||||
|
},
|
||||||
|
# CT treats '[]' as equivalent to '()', catches DC as a publisher and 'Sep-Oct 1951' as dates and removes them. CT doesn't catch the digital though so that could be better but I blame whoever made this atrocious filename
|
||||||
|
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "49",
|
||||||
|
"series": "Wonder Woman",
|
||||||
|
"publisher": "DC",
|
||||||
|
"year": "1951",
|
||||||
|
"month": "09",
|
||||||
|
"remainders": (
|
||||||
|
"digital (downsized, lightened, 4 missing story pages "
|
||||||
|
"restored) (Shadowcat-Empire)",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"Captain Science #001 (1950) The Beginning - nothing.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "001",
|
||||||
|
"title": "The Beginning - nothing",
|
||||||
|
"series": "Captain Science",
|
||||||
|
"year": "1950",
|
||||||
|
},
|
||||||
|
"Captain Science #001-cix-cbi.cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "001",
|
||||||
|
"series": "Captain Science",
|
||||||
|
"title": "cix-cbi",
|
||||||
|
},
|
||||||
|
"Long Series Name v1 (2000) #001 "
|
||||||
|
"Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS_VOL,
|
||||||
|
"Long Series Name 001 (2000) (TPB-Releaser) Title.cbz": {
|
||||||
|
"series": "Long Series Name",
|
||||||
|
"issue": "001",
|
||||||
|
"year": "2000",
|
||||||
|
"original_format": "TPB",
|
||||||
|
"scan_info": "Releaser",
|
||||||
|
"remainders": ("Title",),
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
"Long Series Name Vol 1 "
|
||||||
|
"(2000) (TPB) (Releaser & Releaser-Releaser) Title.cbr": {
|
||||||
|
"series": "Long Series Name",
|
||||||
|
"volume": "1",
|
||||||
|
"issue": "1",
|
||||||
|
"remainders": ("Title",),
|
||||||
|
"original_format": "TPB",
|
||||||
|
"year": "2000",
|
||||||
|
"scan_info": "Releaser & Releaser-Releaser",
|
||||||
|
"ext": "cbr",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# first_key, first_val = NEW.popitem()
|
||||||
|
# FNS[first_key] = first_val
|
||||||
|
PARSE_FNS = MappingProxyType(FNS)
|
||||||
|
|
||||||
|
SERIALIZE_FNS = MappingProxyType(
|
||||||
|
{
|
||||||
|
"Long Series Name #001 (2000) Title (TPB) (Releaser).cbz": TEST_COMIC_FIELDS,
|
||||||
|
"Long Series Name v1 #001 "
|
||||||
|
"(2000) Title (TPB) (Releaser & Releaser-Releaser).cbr": TEST_COMIC_VOL_ONLY,
|
||||||
|
"Series Name (2000-12-31).cbz": {
|
||||||
|
"series": "Series Name",
|
||||||
|
"year": "2000",
|
||||||
|
"month": "12",
|
||||||
|
"day": "31",
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
"Series Name (2000-12).cbz": {
|
||||||
|
"series": "Series Name",
|
||||||
|
"year": "2000",
|
||||||
|
"month": "12",
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
"Series Name (Dec-31).cbz": {
|
||||||
|
"series": "Series Name",
|
||||||
|
"month": "12",
|
||||||
|
"day": "31",
|
||||||
|
"ext": "cbz",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
@ -1,22 +1,18 @@
|
|||||||
"""Tests for filename parsing."""
|
"""Tests for filename parsing."""
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
from types import MappingProxyType
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from deepdiff.diff import DeepDiff
|
from deepdiff.diff import DeepDiff
|
||||||
|
|
||||||
from comicfn2dict import comicfn2dict
|
from comicfn2dict import ComicFilenameParser
|
||||||
from tests.comic_filenames import FNS
|
from tests.comic_filenames import PARSE_FNS
|
||||||
|
|
||||||
ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"})
|
|
||||||
FIELD_SCHEMA = MappingProxyType({key: None for key in ALL_FIELDS})
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("item", FNS.items())
|
@pytest.mark.parametrize("item", PARSE_FNS.items())
|
||||||
def test_parse_filename(item):
|
def test_parse_filename(item):
|
||||||
"""Test filename parsing."""
|
"""Test filename parsing."""
|
||||||
fn, defined_fields = item
|
fn, defined_fields = item
|
||||||
md = comicfn2dict(fn)
|
md = ComicFilenameParser(fn, verbose=1).parse()
|
||||||
diff = DeepDiff(defined_fields, md, ignore_order=True)
|
diff = DeepDiff(defined_fields, md, ignore_order=True)
|
||||||
print(fn)
|
print(fn)
|
||||||
pprint(defined_fields)
|
pprint(defined_fields)
|
||||||
|
13
tests/test_dict2comicfn.py
Normal file
13
tests/test_dict2comicfn.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
"""Tests for filename parsing."""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from comicfn2dict import ComicFilenameSerializer
|
||||||
|
from tests.comic_filenames import SERIALIZE_FNS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("item", SERIALIZE_FNS.items())
|
||||||
|
def test_serialize_dict(item):
|
||||||
|
"""Test metadata serialization."""
|
||||||
|
test_fn, md = item
|
||||||
|
fn = ComicFilenameSerializer(md).serialize()
|
||||||
|
assert test_fn == fn
|
Loading…
Reference in New Issue
Block a user