Compare commits

...

13 Commits
1.4.5 ... 1.5.0

Author SHA1 Message Date
904561fb8e Merge branch 'pyicu' into develop 2022-09-10 21:48:04 -07:00
be6b71dec7 Put unix specific commands in OS specific blocks 2022-09-10 21:11:48 -07:00
63b654a173 Update ci to install pyicu 2022-09-10 19:51:26 -07:00
bc25acde9f Fix sorting
Switch natsort to use os_sorted
Remove directories when returning a list of files in a comic
Update tests to account for '!cover.jpg'
2022-09-10 19:48:50 -07:00
03677ce4b8 Fix renaming
Make ComicArchive.path always absolute
Fix unique_file not preserving the extension
Fix incorrect output when renaming in CLI mode
Fix handling of platform when renaming
2022-08-19 20:20:37 -07:00
535afcb4c6 Fix replacements 2022-08-19 19:59:58 -07:00
06255f7848 Perform replacements on literal text and format values 2022-08-18 13:48:23 -07:00
00e649bb4c Move colon handling when renaming to the MetadataFormatter class
Fixes #356
2022-08-17 16:16:38 -07:00
078f569ec6 Fix codeblock in README.md 2022-08-14 10:51:08 -07:00
315cf7d920 Merge pull request #355 from Xav83/patch-1
Adds the Chocolatey package as a way to install ComicTagger
2022-08-14 10:47:24 -07:00
e9cc6a16a8 Note that @Xav83 is the maintainer of the chocolatey package
Co-authored-by: Xavier Jouvenot <x.jouvenot@gmail.com>
2022-08-14 10:45:51 -07:00
26eb6985fe Adds the Chocolatey package as a way to install ComicTagger
Adds the Chocolatey package in the list of possibilities to install ComicTagger
2022-08-13 11:52:09 +02:00
be983c61bc Fix #353
The two primary cases fixed are:
Ms. Marvel
spider-man/deadpool

The first issue removed 'Ms.' which is a problem as many comics have
series that the only difference in the title is the
designation/honorific.

The second issue is that the '/' was removed and not replaced with
anything causing a search for 'mandeadpool' which will not show useful
results.

Consequently all designations/honorifics are now untouched
All punctuation is replaced with a space
2022-08-12 07:10:36 -07:00
13 changed files with 119 additions and 37 deletions

View File

@ -80,6 +80,20 @@ jobs:
run: |
choco install -y zip
if: runner.os == 'Windows'
- name: Install macos dependencies
run: |
brew install icu4c pkg-config
export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
python -m pip install --no-binary=:pyicu: pyicu
if: runner.os == 'macOS'
- name: Install linux dependencies
run: |
sudo apt-get install pkg-config libicu-dev
export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
python -m pip install --no-binary=:pyicu: pyicu
if: runner.os == 'Linux'
- name: Build and install PyPi packages
run: |

View File

@ -41,12 +41,25 @@ jobs:
run: |
choco install -y zip
if: runner.os == 'Windows'
- name: Install macos dependencies
run: |
brew install icu4c pkg-config
export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
python -m pip install --no-binary=:pyicu: pyicu
if: runner.os == 'macOS'
- name: Install linux dependencies
run: |
sudo apt-get install pkg-config libicu-dev
export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
python -m pip install --no-binary=:pyicu: pyicu
if: runner.os == 'Linux'
- name: Build, Install and Test PyPi packages
run: |
make clean pydist
python -m pip install "dist/$(python setup.py --fullname)-py3-none-any.whl[all]"
echo "CT_FULL_NAME=$(python setup.py --fullname)" >> $GITHUB_ENV
python -m flake8
python -m pytest
@ -69,4 +82,4 @@ jobs:
draft: false
files: |
dist/!(*Linux).zip
dist/*.whl
dist/*${{ fromJSON('["never", ""]')[runner.os == 'Linux'] }}.whl

View File

@ -2,6 +2,7 @@
[![GitHub release (latest by date)](https://img.shields.io/github/downloads/comictagger/comictagger/latest/total)](https://github.com/comictagger/comictagger/releases/latest)
[![PyPI](https://img.shields.io/pypi/v/comictagger)](https://pypi.org/project/comictagger/)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/comictagger)](https://pypistats.org/packages/comictagger)
[![Chocolatey package](https://img.shields.io/chocolatey/dt/comictagger?color=blue&label=chocolatey)](https://community.chocolatey.org/packages/comictagger)
[![PyPI - License](https://img.shields.io/pypi/l/comictagger)](https://opensource.org/licenses/Apache-2.0)
[![GitHub Discussions](https://img.shields.io/github/discussions/comictagger/comictagger)](https://github.com/comictagger/comictagger/discussions)
@ -48,6 +49,12 @@ A pip package is provided, you can install it with:
There are two optional dependencies GUI and CBR. You can install the optional dependencies by specifying one or more of `GUI`,`CBR` or `all` in braces e.g. `comictagger[CBR,GUI]`
### Chocolatey installation (Windows only)
A [Chocolatey package](https://community.chocolatey.org/packages/comictagger), maintained by @Xav83, is provided, you can install it with:
```powershell
choco install comictagger
```
### From source
1. Ensure you have python 3.9 installed

View File

@ -148,7 +148,7 @@ class SevenZipArchiver(UnknownArchiver):
def get_filename_list(self) -> list[str]:
try:
with py7zr.SevenZipFile(self.path, "r") as zf:
namelist: list[str] = zf.getnames()
namelist: list[str] = [file.filename for file in zf.list() if not file.is_directory]
return namelist
except (py7zr.Bad7zFile, OSError) as e:
@ -248,7 +248,7 @@ class ZipArchiver(UnknownArchiver):
def get_filename_list(self) -> list[str]:
try:
with zipfile.ZipFile(self.path, mode="r") as zf:
namelist = zf.namelist()
namelist = [file.filename for file in zf.infolist() if not file.is_dir()]
return namelist
except (zipfile.BadZipfile, OSError) as e:
logger.error("Error listing files in zip archive [%s]: %s", e, self.path)
@ -683,7 +683,7 @@ class ComicArchive:
self._has_cbi: bool | None = None
self._has_cix: bool | None = None
self._has_comet: bool | None = None
self.path = pathlib.Path(path)
self.path = pathlib.Path(path).absolute()
self.page_count: int | None = None
self.page_list: list[str] = []
@ -934,7 +934,7 @@ class ComicArchive:
# seems like some archive creators are on Windows, and don't know about case-sensitivity!
if sort_list:
files = cast(list[str], natsort.natsorted(files, alg=natsort.ns.IC | natsort.ns.I | natsort.ns.U))
files = cast(list[str], natsort.os_sorted(files))
# make a sub-list of image files
self.page_list = []

View File

@ -121,13 +121,9 @@ def remove_articles(text: str) -> str:
"the",
"the",
"with",
"ms",
"mrs",
"mr",
"dr",
]
new_text = ""
for word in text.split(" "):
for word in text.split():
if word not in articles:
new_text += word + " "
@ -139,19 +135,16 @@ def remove_articles(text: str) -> str:
def sanitize_title(text: str, basic: bool = False) -> str:
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
text = unicodedata.normalize("NFKD", text).casefold()
if basic:
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
else:
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
if not basic:
# comicvine ignores punctuation and accents
# remove all characters that are not a letter, separator (space) or number
# replace any "dash punctuation" with a space
# makes sure that batman-superman and self-proclaimed stay separate words
text = "".join(
c if not unicodedata.category(c) in ("Pd",) else " "
for c in text
if unicodedata.category(c)[0] in "LZN" or unicodedata.category(c) in ("Pd",)
c if unicodedata.category(c)[0] not in "P" else " " for c in text if unicodedata.category(c)[0] in "LZNP"
)
# remove extra space and articles and all lower case
text = remove_articles(text).strip()
@ -174,12 +167,12 @@ def titles_match(search_title: str, record_title: str, threshold: int = 90) -> b
def unique_file(file_name: pathlib.Path) -> pathlib.Path:
name = file_name.name
name = file_name.stem
counter = 1
while True:
if not file_name.exists():
return file_name
file_name = file_name.with_name(name + " (" + str(counter) + ")")
file_name = file_name.with_stem(name + " (" + str(counter) + ")")
counter += 1

View File

@ -472,7 +472,7 @@ def process_file_cli(
match_results.good_matches.append(str(ca.path.absolute()))
elif opts.rename:
original_path = ca.path
msg_hdr = ""
if batch_mode:
msg_hdr = f"{ca.path}: "
@ -529,7 +529,7 @@ def process_file_cli(
else:
suffix = " (dry-run, no change)"
print(f"renamed '{os.path.basename(ca.path)}' -> '{new_name}' {suffix}")
print(f"renamed '{original_path.name}' -> '{new_name}' {suffix}")
elif opts.export_to_zip:
msg_hdr = ""

View File

@ -20,10 +20,9 @@ import logging
import os
import pathlib
import string
import sys
from typing import Any, cast
from typing import Any, NamedTuple, cast
from pathvalidate import sanitize_filename
from pathvalidate import Platform, normalize_platform, sanitize_filename
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import GenericMetadata
@ -32,6 +31,16 @@ from comicapi.issuestring import IssueString
logger = logging.getLogger(__name__)
class Replacements(NamedTuple):
literal_text: list[tuple[str, str]]
format_value: list[tuple[str, str]]
REPLACEMENTS = Replacements(
literal_text=[(": ", " - "), (":", "-")], format_value=[(": ", " - "), (":", "-"), ("/", "-"), ("\\", "-")]
)
def get_rename_dir(ca: ComicArchive, rename_dir: str | pathlib.Path | None) -> pathlib.Path:
folder = ca.path.parent.absolute()
if rename_dir is not None:
@ -42,16 +51,24 @@ def get_rename_dir(ca: ComicArchive, rename_dir: str | pathlib.Path | None) -> p
class MetadataFormatter(string.Formatter):
def __init__(self, smart_cleanup: bool = False, platform: str = "auto") -> None:
def __init__(
self, smart_cleanup: bool = False, platform: str = "auto", replacements: Replacements = REPLACEMENTS
) -> None:
super().__init__()
self.smart_cleanup = smart_cleanup
self.platform = platform
self.platform = normalize_platform(platform)
self.replacements = replacements
def format_field(self, value: Any, format_spec: str) -> str:
if value is None or value == "":
return ""
return cast(str, super().format_field(value, format_spec))
def handle_replacements(self, string: str, replacements: list[tuple[str, str]]) -> str:
for f, r in replacements:
string = string.replace(f, r)
return string
def _vformat(
self,
format_string: str,
@ -72,6 +89,8 @@ class MetadataFormatter(string.Formatter):
if lstrip:
literal_text = literal_text.lstrip("-_)}]#")
if self.smart_cleanup:
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]:
literal_text = self.handle_replacements(literal_text, self.replacements.literal_text)
lspace = literal_text[0].isspace() if literal_text else False
rspace = literal_text[-1].isspace() if literal_text else False
literal_text = " ".join(literal_text.split())
@ -117,6 +136,9 @@ class MetadataFormatter(string.Formatter):
result[-1], _, _ = result[-1].rstrip().rpartition(" ")
result[-1] = result[-1].rstrip("-_({[#")
if self.smart_cleanup:
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]:
# colons and slashes get special treatment
fmt_obj = self.handle_replacements(fmt_obj, self.replacements.format_value)
fmt_obj = " ".join(fmt_obj.split())
fmt_obj = str(sanitize_filename(fmt_obj, platform=self.platform))
result.append(fmt_obj)
@ -179,13 +201,6 @@ class FileRenamer:
new_basename = ""
for component in pathlib.PureWindowsPath(template).parts:
if (
self.platform.casefold() in ["universal", "windows"] or sys.platform.casefold() in ["windows"]
) and self.smart_cleanup:
# colons get special treatment
component = component.replace(": ", " - ")
component = component.replace(":", "-")
new_basename = str(
sanitize_filename(fmt.vformat(component, args=[], kwargs=Default(md_dict)), platform=self.platform)
).strip()

View File

@ -5,6 +5,7 @@ pathvalidate
pillow>=9.1.0
py7zr
pycountry
pyicu; sys_platform == 'linux' or sys_platform == 'darwin'
requests==2.*
text2digits
thefuzz>=0.19.0

Binary file not shown.

View File

@ -757,6 +757,13 @@ rnames = [
"Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz",
does_not_raise(),
),
(
"{series} #{issue} - {title} {volume:02} ({year})", # Ensure format specifier works
False,
"universal",
"Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game 01 (2007).cbz",
does_not_raise(),
),
(
"{series} #{issue} - {title} ({year})({price})", # price should be none, test no space between ')('
False,
@ -778,6 +785,20 @@ rnames = [
"Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz",
does_not_raise(),
),
(
"{title} {web_link}", # Ensure colon is replaced in metadata
False,
"universal",
"Anda's Game https---comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
does_not_raise(),
),
(
"{series}:{title} #{issue} ({year})", # on windows the ':' is replaced
False,
"universal",
"Cory Doctorow's Futuristic Tales of the Here and Now-Anda's Game #001 (2007).cbz",
does_not_raise(),
),
(
"{series}: {title} #{issue} ({year})", # on windows the ':' is replaced
False,

View File

@ -15,6 +15,8 @@ def test_getPageNameList():
pageNameList = c.get_page_name_list()
assert pageNameList == [
"!cover.jpg",
"00.jpg",
"page0.jpg",
"Page1.jpeg",
"Page2.png",

View File

@ -77,11 +77,11 @@ def test_get_language(value, result):
def test_unique_file(tmp_path):
file = tmp_path / "test"
file = tmp_path / "test.cbz"
assert file == comicapi.utils.unique_file(file)
file.mkdir()
assert (tmp_path / "test (1)") == comicapi.utils.unique_file(file)
assert (tmp_path / "test (1).cbz") == comicapi.utils.unique_file(file)
def test_add_to_path(monkeypatch):
@ -108,3 +108,19 @@ titles = [
@pytest.mark.parametrize("value, result", titles)
def test_titles_match(value, result):
assert comicapi.utils.titles_match(value[0], value[1]) == result
titles_2 = [
("", ""),
("鋼の錬金術師", "鋼の錬金術師"),
("Conan el Bárbaro", "Conan el Barbaro"),
("The Batman's Grave", "batmans grave"),
("A+X", "ax"),
("ms. marvel", "ms marvel"),
("spider-man/deadpool", "spider man deadpool"),
]
@pytest.mark.parametrize("value, result", titles_2)
def test_sanitize_title(value, result):
assert comicapi.utils.sanitize_title(value) == result.casefold()