Move colon handling when renaming to the MetadataFormatter class

Fixes #356
Fix codeblock in README.md
2022-08-17 16:16:38 -07:00 · 2022-08-14 10:51:08 -07:00 · 2022-08-14 10:47:24 -07:00 · 2022-08-14 10:45:51 -07:00 · 2022-08-13 11:52:09 +02:00 · 2022-08-12 07:10:36 -07:00
17 changed files with 75 additions and 50 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,7 +1,7 @@
 exclude: ^scripts
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.2.0
+    rev: v4.3.0
    hooks:
    -   id: trailing-whitespace
    -   id: end-of-file-fixer
@ -10,7 +10,7 @@ repos:
    -   id: name-tests-test
    -   id: requirements-txt-fixer
 -   repo: https://github.com/asottile/setup-cfg-fmt
-    rev: v1.20.1
+    rev: v2.0.0
    hooks:
    -   id: setup-cfg-fmt
 -   repo: https://github.com/PyCQA/isort
@ -19,12 +19,12 @@ repos:
    -   id: isort
        args: [--af,--add-import, 'from __future__ import annotations']
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.32.1
+    rev: v2.37.3
    hooks:
    -   id: pyupgrade
        args: [--py39-plus]
 -   repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 22.6.0
    hooks:
    -   id: black
 -   repo: https://github.com/PyCQA/autoflake
@ -33,12 +33,12 @@ repos:
    -   id: autoflake
        args: [-i]
 -   repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
+    rev: 5.0.4
    hooks:
    -   id: flake8
-        additional_dependencies: [flake8-encodings, flake8-warnings, flake8-builtins, flake8-eradicate, flake8-length, flake8-print]
+        additional_dependencies: [flake8-encodings, flake8-warnings, flake8-builtins, flake8-length, flake8-print]
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.960
+    rev: v0.971
    hooks:
    -   id: mypy
        additional_dependencies: [types-setuptools, types-requests]
--- a/README.md
+++ b/README.md
@ -2,6 +2,7 @@
 [![GitHub release (latest by date)](https://img.shields.io/github/downloads/comictagger/comictagger/latest/total)](https://github.com/comictagger/comictagger/releases/latest)
 [![PyPI](https://img.shields.io/pypi/v/comictagger)](https://pypi.org/project/comictagger/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/comictagger)](https://pypistats.org/packages/comictagger)
+[![Chocolatey package](https://img.shields.io/chocolatey/dt/comictagger?color=blue&label=chocolatey)](https://community.chocolatey.org/packages/comictagger)
 [![PyPI - License](https://img.shields.io/pypi/l/comictagger)](https://opensource.org/licenses/Apache-2.0)

 [![GitHub Discussions](https://img.shields.io/github/discussions/comictagger/comictagger)](https://github.com/comictagger/comictagger/discussions)
@ -48,6 +49,12 @@ A pip package is provided, you can install it with:

 There are two optional dependencies GUI and CBR. You can install the optional dependencies by specifying one or more of `GUI`,`CBR` or `all` in braces e.g. `comictagger[CBR,GUI]`

+### Chocolatey installation (Windows only)
+
+A [Chocolatey package](https://community.chocolatey.org/packages/comictagger), maintained by @Xav83, is provided, you can install it with:
+```powershell
+choco install comictagger
+```
 ### From source

 1. Ensure you have python 3.9 installed
--- a/comicapi/comicarchive.py
+++ b/comicapi/comicarchive.py
@ -114,7 +114,7 @@ class SevenZipArchiver(UnknownArchiver):
        return False

    def read_file(self, archive_file: str) -> bytes:
-        data = bytes()
+        data = b""
        try:
            with py7zr.SevenZipFile(self.path, "r") as zf:
                data = zf.read(archive_file)[archive_file].read()
@ -422,7 +422,7 @@ class RarArchiver(UnknownArchiver):

        rarc = self.get_rar_obj()
        if rarc is None:
-            return bytes()
+            return b""

        tries = 0
        while tries < 7:
@ -665,7 +665,7 @@ class FolderArchiver(UnknownArchiver):


 class ComicArchive:
-    logo_data = bytes()
+    logo_data = b""

    class ArchiveType:
        SevenZip, Zip, Rar, Folder, Pdf, Unknown = list(range(6))
@ -853,13 +853,13 @@ class ComicArchive:
        return retcode

    def get_page(self, index: int) -> bytes:
-        image_data = bytes()
+        image_data = b""

        filename = self.get_page_name(index)

        if filename:
            try:
-                image_data = self.archiver.read_file(filename) or bytes()
+                image_data = self.archiver.read_file(filename) or b""
            except Exception:
                logger.error("Error reading in page %d. Substituting logo page.", index)
                image_data = ComicArchive.logo_data
@ -1033,7 +1033,7 @@ class ComicArchive:
            raw_cix = self.archiver.read_file(self.ci_xml_filename) or b""
        except Exception as e:
            logger.error("Error reading in raw CIX! for %s: %s", self.path, e)
-            raw_cix = bytes()
+            raw_cix = b""
        return raw_cix

    def write_cix(self, metadata: GenericMetadata) -> bool:
--- a/comicapi/filenameparser.py
+++ b/comicapi/filenameparser.py
@ -24,7 +24,8 @@ import logging
 import os
 import re
 from operator import itemgetter
-from typing import Callable, Match, TypedDict
+from re import Match
+from typing import Callable, TypedDict
 from urllib.parse import unquote

 from text2digits import text2digits
--- a/comicapi/utils.py
+++ b/comicapi/utils.py
@ -21,8 +21,9 @@ import os
 import pathlib
 import unicodedata
 from collections import defaultdict
+from collections.abc import Mapping
 from shutil import which  # noqa: F401
-from typing import Any, Mapping
+from typing import Any

 import pycountry
 import thefuzz.fuzz
@ -120,13 +121,9 @@ def remove_articles(text: str) -> str:
        "the",
        "the",
        "with",
-        "ms",
-        "mrs",
-        "mr",
-        "dr",
    ]
    new_text = ""
-    for word in text.split(" "):
+    for word in text.split():
        if word not in articles:
            new_text += word + " "

@ -138,19 +135,16 @@ def remove_articles(text: str) -> str:
 def sanitize_title(text: str, basic: bool = False) -> str:
    # normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
    text = unicodedata.normalize("NFKD", text).casefold()
-    if basic:
-        # comicvine keeps apostrophes a part of the word
-        text = text.replace("'", "")
-        text = text.replace('"', "")
-    else:
+    # comicvine keeps apostrophes a part of the word
+    text = text.replace("'", "")
+    text = text.replace('"', "")
+    if not basic:
        # comicvine ignores punctuation and accents
        # remove all characters that are not a letter, separator (space) or number
        # replace any "dash punctuation" with a space
        # makes sure that batman-superman and self-proclaimed stay separate words
        text = "".join(
-            c if not unicodedata.category(c) in ("Pd",) else " "
-            for c in text
-            if unicodedata.category(c)[0] in "LZN" or unicodedata.category(c) in ("Pd",)
+            c if unicodedata.category(c)[0] not in "P" else " " for c in text if unicodedata.category(c)[0] in "LZNP"
        )
        # remove extra space and articles and all lower case
        text = remove_articles(text).strip()
--- a/comictaggerlib/comicvinetalker.py
+++ b/comictaggerlib/comicvinetalker.py
@ -740,7 +740,7 @@ class ComicVineTalker:
        )

        self.nam.finished.connect(self.async_fetch_issue_cover_url_complete)
-        self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(issue_url)))
+        self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(issue_url.geturl())))

    def async_fetch_issue_cover_url_complete(self, reply: QtNetwork.QNetworkReply) -> None:
        # read in the response
--- a/comictaggerlib/coverimagewidget.py
+++ b/comictaggerlib/coverimagewidget.py
@ -113,7 +113,7 @@ class CoverImageWidget(QtWidgets.QWidget):
        self.page_loader = None
        self.imageIndex = -1
        self.imageCount = 1
-        self.imageData = bytes()
+        self.imageData = b""

        self.btnLeft.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("left.png")))
        self.btnRight.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("right.png")))
@ -136,7 +136,7 @@ class CoverImageWidget(QtWidgets.QWidget):
        self.page_loader = None
        self.imageIndex = -1
        self.imageCount = 1
-        self.imageData = bytes()
+        self.imageData = b""

    def clear(self) -> None:
        self.reset_widget()
--- a/comictaggerlib/filerenamer.py
+++ b/comictaggerlib/filerenamer.py
@ -45,7 +45,7 @@ class MetadataFormatter(string.Formatter):
    def __init__(self, smart_cleanup: bool = False, platform: str = "auto") -> None:
        super().__init__()
        self.smart_cleanup = smart_cleanup
-        self.platform = platform
+        self.platform = platform.casefold()

    def format_field(self, value: Any, format_spec: str) -> str:
        if value is None or value == "":
@ -72,6 +72,10 @@ class MetadataFormatter(string.Formatter):
                if lstrip:
                    literal_text = literal_text.lstrip("-_)}]#")
                if self.smart_cleanup:
+                    if self.platform in ["universal", "windows"] or sys.platform.casefold() in ["windows"]:
+                        # colons get special treatment
+                        literal_text = literal_text.replace(": ", " - ")
+                        literal_text = literal_text.replace(":", "-")
                    lspace = literal_text[0].isspace() if literal_text else False
                    rspace = literal_text[-1].isspace() if literal_text else False
                    literal_text = " ".join(literal_text.split())
@ -179,13 +183,6 @@ class FileRenamer:

        new_basename = ""
        for component in pathlib.PureWindowsPath(template).parts:
-            if (
-                self.platform.casefold() in ["universal", "windows"] or sys.platform.casefold() in ["windows"]
-            ) and self.smart_cleanup:
-                # colons get special treatment
-                component = component.replace(": ", " - ")
-                component = component.replace(":", "-")
-
            new_basename = str(
                sanitize_filename(fmt.vformat(component, args=[], kwargs=Default(md_dict)), platform=self.platform)
            ).strip()
--- a/comictaggerlib/imagefetcher.py
+++ b/comictaggerlib/imagefetcher.py
@ -97,14 +97,14 @@ class ImageFetcher:
            # if we found it, just emit the signal asap
            if image_data:
                ImageFetcher.image_fetch_complete(QtCore.QByteArray(image_data))
-                return bytes()
+                return b""

            # didn't find it.  look online
            self.nam.finished.connect(self.finish_request)
            self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(url)))

            # we'll get called back when done...
-        return bytes()
+        return b""

    def finish_request(self, reply: QtNetwork.QNetworkReply) -> None:
        # read in the image data
@ -159,10 +159,10 @@ class ImageFetcher:
            row = cur.fetchone()

            if row is None:
-                return bytes()
+                return b""

            filename = row[0]
-            image_data = bytes()
+            image_data = b""

            try:
                with open(filename, "rb") as f:
--- a/comictaggerlib/issueidentifier.py
+++ b/comictaggerlib/issueidentifier.py
@ -157,7 +157,7 @@ class IssueIdentifier:
            cropped_im = im.crop((int(w / 2), 0, w, h))
        except Exception:
            logger.exception("cropCover() error")
-            return bytes()
+            return b""

        output = io.BytesIO()
        cropped_im.save(output, format="PNG")
--- a/comictaggerlib/settings.py
+++ b/comictaggerlib/settings.py
@ -22,7 +22,8 @@ import pathlib
 import platform
 import sys
 import uuid
-from typing import Iterator, TextIO, no_type_check
+from collections.abc import Iterator
+from typing import TextIO, no_type_check

 from comicapi import utils

--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@ -26,7 +26,8 @@ import pprint
 import re
 import sys
 import webbrowser
-from typing import Any, Callable, Iterable, cast
+from collections.abc import Iterable
+from typing import Any, Callable, cast
 from urllib.parse import urlparse

 import natsort
@ -1854,7 +1855,7 @@ Have fun!
                logger.error("Failed to load metadata for %s: %s", ca.path, e)
            image_data = ca.get_page(cover_idx)
            self.atprogdialog.set_archive_image(image_data)
-            self.atprogdialog.set_test_image(bytes())
+            self.atprogdialog.set_test_image(b"")

            QtCore.QCoreApplication.processEvents()
            if self.atprogdialog.isdone:
--- a/requirements-speedup.txt
+++ b/requirements-speedup.txt
@ -1 +1 @@
-thefuzz[speedup]>=0.19.0
+thefuzz[speedup]>=0.19.0
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@ beautifulsoup4 >= 4.1
 importlib_metadata
 natsort>=8.1.0
 pathvalidate
-pillow>=4.3.0
+pillow>=9.1.0
 py7zr
 pycountry
 requests==2.*
--- a/testing/filenames.py
+++ b/testing/filenames.py
@ -757,6 +757,13 @@ rnames = [
        "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz",
        does_not_raise(),
    ),
+    (
+        "{series} #{issue} - {title} {volume:02} ({year})",  # Ensure format specifier works
+        False,
+        "universal",
+        "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game 01 (2007).cbz",
+        does_not_raise(),
+    ),
    (
        "{series} #{issue} - {title} ({year})({price})",  # price should be none, test no  space between ')('
        False,
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -5,7 +5,8 @@ import datetime
 import io
 import shutil
 import unittest.mock
-from typing import Any, Generator
+from collections.abc import Generator
+from typing import Any

 import pytest
 import requests
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@ -108,3 +108,19 @@ titles = [
@pytest.mark.parametrize("value, result", titles)
 def test_titles_match(value, result):
    assert comicapi.utils.titles_match(value[0], value[1]) == result
+
+
+titles_2 = [
+    ("", ""),
+    ("鋼の錬金術師", "鋼の錬金術師"),
+    ("Conan el Bárbaro", "Conan el Barbaro"),
+    ("The Batman's Grave", "batmans grave"),
+    ("A+X", "ax"),
+    ("ms. marvel", "ms marvel"),
+    ("spider-man/deadpool", "spider man deadpool"),
+]
+
+
+@pytest.mark.parametrize("value, result", titles_2)
+def test_sanitize_title(value, result):
+    assert comicapi.utils.sanitize_title(value) == result.casefold()
Author	SHA1	Message	Date
Timmy Welch	00e649bb4c	Move colon handling when renaming to the MetadataFormatter class Fixes #356	2022-08-17 16:16:38 -07:00
Timmy Welch	078f569ec6	Fix codeblock in README.md	2022-08-14 10:51:08 -07:00
Timmy Welch	315cf7d920	Merge pull request #355 from Xav83/patch-1 Adds the Chocolatey package as a way to install ComicTagger	2022-08-14 10:47:24 -07:00
Timmy Welch	e9cc6a16a8	Note that @Xav83 is the maintainer of the chocolatey package Co-authored-by: Xavier Jouvenot <x.jouvenot@gmail.com>	2022-08-14 10:45:51 -07:00
Xavier Jouvenot	26eb6985fe	Adds the Chocolatey package as a way to install ComicTagger Adds the Chocolatey package in the list of possibilities to install ComicTagger	2022-08-13 11:52:09 +02:00
Timmy Welch	be983c61bc	Fix #353 The two primary cases fixed are: Ms. Marvel spider-man/deadpool The first issue removed 'Ms.' which is a problem as many comics have series that the only difference in the title is the designation/honorific. The second issue is that the '/' was removed and not replaced with anything causing a search for 'mandeadpool' which will not show useful results. Consequently all designations/honorifics are now untouched All punctuation is replaced with a space	2022-08-12 07:10:36 -07:00
Timmy Welch	77a53a6834	Update dependencies Includes changes from pyupgrade	2022-08-10 20:55:46 -07:00
Timmy Welch	860a3147d2	Construct URL correctly	2022-08-10 16:33:40 -07:00