Compare commits

..

32 Commits

Author SHA1 Message Date
b1a9b0b016 Only upgrade icu4c and pkg-config 2024-03-09 14:47:47 -08:00
0929a6678b Update icu4c paths and upgrade packages on macOS 2024-03-09 14:45:49 -08:00
69824412ce Update GH Actions 2024-03-09 14:07:11 -08:00
0d9756f8b0 Pin minimum version for comicinfoxml 2024-03-09 13:51:35 -08:00
244cd9101d Remove commented code 2024-03-09 13:46:51 -08:00
3df263858d Merge branch 'web-links' into develop 2024-03-09 13:42:29 -08:00
b45c39043b Merge branch 'comicfn2dict' into develop 2024-03-09 13:10:27 -08:00
9eae71fb62 Disable checkboxes when the complicated parser is not used 2024-03-09 13:07:49 -08:00
9a95adf47d Bump comicfn2dict 2024-03-09 13:02:02 -08:00
956c383e5f Fix py7zr 2024-03-05 15:13:03 -08:00
5155762711 Add comicfn2dict as an alternative filename parser 2024-03-03 21:47:31 -08:00
ea43eccd78 Merge branch 'ii-rework' into develop 2024-03-01 15:39:01 -08:00
ff2547e7f2 Disable buttons for add/remove weblink 2024-03-01 15:26:56 -08:00
163cf44751 Open the editor when adding a now web link 2024-02-26 19:04:33 -08:00
14ce8a759f Mark all QTextEdit's as plain text only 2024-02-26 15:57:00 -08:00
22d92e1ded Move result determination out of _cover_matching 2024-02-26 15:38:13 -08:00
3c3700838b Select item on add and set the dirty flag on change 2024-02-25 08:26:29 -08:00
05423c8270 Use a QListWidget for web_links
Fix web_link in md_attributes
2024-02-24 22:31:45 -08:00
d277eb332b Add an option to disable prompt on save Fixes #422 2024-02-24 19:56:32 -08:00
dcad32ade0 Fix settngs generation 2024-02-24 19:55:28 -08:00
dd0b637566 Bump settngs 2024-02-24 19:01:10 -08:00
bad8b85874 Fix tests 2024-02-24 18:30:41 -08:00
938f760a37 Remove IssueIdentifier.search 2024-02-23 20:50:17 -08:00
f382c2f814 Update Tests 2024-02-23 20:47:22 -08:00
4e75731024 Re-write IssueIdentifier.search as IssueIdentifier.identify 2024-02-23 20:47:04 -08:00
920a0ed1af Implement better migration of changed settings should fix #609 2024-02-23 15:45:18 -08:00
9eb50da744 Fix setting rar info in the settings window Fixes #596
Look in all drive letters for rar executable
2024-02-23 15:45:18 -08:00
2e2d886cb2 Bump settngs 2024-02-22 14:52:26 -08:00
5738433c2b Fix fileselectionlist
Remove the custom widgetitem
Set a minimum size for the columns
Use a space " " a and nbsp "\xa0" for the check column to allow sorting
2024-02-22 14:30:15 -08:00
4a33dbde46 Fix PyInstaller packaging 2024-02-22 14:30:15 -08:00
b2d3869488 Update filerenaming for web_links
Ensure the j specifier in MetadataFormatter converts to str before joining
Add a web_link variable to the filerenamer
2024-02-17 17:42:07 -08:00
44e9a47a8b Support multiple web_links 2024-02-17 17:42:07 -08:00
38 changed files with 1596 additions and 678 deletions

View File

@ -1,7 +1,7 @@
name: CI
env:
PKG_CONFIG_PATH: /usr/local/opt/icu4c/lib/pkgconfig
PKG_CONFIG_PATH: /usr/local/opt/icu4c/lib/pkgconfig:/opt/homebrew/opt/icu4c/lib/pkgconfig
LC_COLLATE: en_US.UTF-8
on:
pull_request:
@ -23,12 +23,12 @@ jobs:
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@ -51,12 +51,12 @@ jobs:
os: [ubuntu-latest, macos-11, windows-latest]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@ -66,7 +66,7 @@ jobs:
- name: Install macos dependencies
run: |
brew install icu4c pkg-config
brew upgrade icu4c pkg-config || brew install icu4c pkg-config
# export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
# export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
if: runner.os == 'macOS'

View File

@ -23,7 +23,7 @@ jobs:
use_username: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0

View File

@ -1,7 +1,7 @@
name: Package
env:
PKG_CONFIG_PATH: /usr/local/opt/icu4c/lib/pkgconfig
PKG_CONFIG_PATH: /usr/local/opt/icu4c/lib/pkgconfig:/opt/homebrew/opt/icu4c/lib/pkgconfig
LC_COLLATE: en_US.UTF-8
on:
push:
@ -18,12 +18,12 @@ jobs:
os: [ubuntu-latest, macos-11, windows-latest]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
@ -33,7 +33,7 @@ jobs:
- name: Install macos dependencies
run: |
brew install icu4c pkg-config
brew upgrade && brew install icu4c pkg-config
# export PKG_CONFIG_PATH="/usr/local/opt/icu4c/lib/pkgconfig";
# export PATH="/usr/local/opt/icu4c/bin:/usr/local/opt/icu4c/sbin:$PATH"
if: runner.os == 'macOS'
@ -61,7 +61,7 @@ jobs:
echo "release_name=$(git tag -l --format "%(refname:strip=2): %(contents:lines=1)" ${{ github.ref_name }})" >> $GITHUB_ENV
- name: Release
uses: softprops/action-gh-release@v1
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
name: "${{ env.release_name }}"

View File

@ -41,6 +41,6 @@ repos:
rev: v1.8.0
hooks:
- id: mypy
additional_dependencies: [types-setuptools, types-requests, settngs>=0.9.1]
additional_dependencies: [types-setuptools, types-requests, settngs>=0.10.0]
ci:
skip: [mypy]

View File

@ -11,7 +11,11 @@ def generate() -> str:
app = comictaggerlib.main.App()
app.load_plugins(app.initial_arg_parser.parse_known_args()[0])
app.register_settings()
return settngs.generate_ns(app.manager.definitions)
imports, types = settngs.generate_dict(app.manager.definitions)
imports2, types2 = settngs.generate_ns(app.manager.definitions)
i = imports.splitlines()
i.extend(set(imports2.splitlines()) - set(i))
return "\n\n".join(("\n".join(i), types2, types))
if __name__ == "__main__":

468
comicapi/_url.py Normal file
View File

@ -0,0 +1,468 @@
# mypy: disable-error-code="no-redef"
from __future__ import annotations
try:
from urllib3.exceptions import HTTPError, LocationParseError, LocationValueError
from urllib3.util import Url, parse_url
except ImportError:
import re
import typing
class HTTPError(Exception):
"""Base exception used by this module."""
class LocationValueError(ValueError, HTTPError):
"""Raised when there is something wrong with a given URL input."""
class LocationParseError(LocationValueError):
"""Raised when get_host or similar fails to parse the URL input."""
def __init__(self, location: str) -> None:
message = f"Failed to parse: {location}"
super().__init__(message)
self.location = location
def to_str(x: str | bytes, encoding: str | None = None, errors: str | None = None) -> str:
if isinstance(x, str):
return x
elif not isinstance(x, bytes):
raise TypeError(f"not expecting type {type(x).__name__}")
if encoding or errors:
return x.decode(encoding or "utf-8", errors=errors or "strict")
return x.decode()
# We only want to normalize urls with an HTTP(S) scheme.
# urllib3 infers URLs without a scheme (None) to be http.
_NORMALIZABLE_SCHEMES = ("http", "https", None)
# Almost all of these patterns were derived from the
# 'rfc3986' module: https://github.com/python-hyper/rfc3986
_PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
_SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
_URI_RE = re.compile(
r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" r"(?:\?([^#]*))?" r"(?:#(.*))?$",
re.UNICODE | re.DOTALL,
)
_IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
_HEX_PAT = "[0-9A-Fa-f]{1,4}"
_LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
_subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
_variations = [
# 6( h16 ":" ) ls32
"(?:%(hex)s:){6}%(ls32)s",
# "::" 5( h16 ":" ) ls32
"::(?:%(hex)s:){5}%(ls32)s",
# [ h16 ] "::" 4( h16 ":" ) ls32
"(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
"(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
"(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
"(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
# [ *4( h16 ":" ) h16 ] "::" ls32
"(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
# [ *5( h16 ":" ) h16 ] "::" h16
"(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
# [ *6( h16 ":" ) h16 ] "::"
"(?:(?:%(hex)s:){0,6}%(hex)s)?::",
]
_UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
_IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
_ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
_IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
_REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
_TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
_IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
_IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
_BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
_ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
_REG_NAME_PAT,
_IPV4_PAT,
_IPV6_ADDRZ_PAT,
)
_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
_UNRESERVED_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~")
_SUB_DELIM_CHARS = set("!$&'()*+,;=")
_USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
_PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
_QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}
class Url(
typing.NamedTuple(
"Url",
[
("scheme", typing.Optional[str]),
("auth", typing.Optional[str]),
("host", typing.Optional[str]),
("port", typing.Optional[int]),
("path", typing.Optional[str]),
("query", typing.Optional[str]),
("fragment", typing.Optional[str]),
],
)
):
"""
Data structure for representing an HTTP URL. Used as a return value for
:func:`parse_url`. Both the scheme and host are normalized as they are
both case-insensitive according to RFC 3986.
"""
def __new__( # type: ignore[no-untyped-def]
cls,
scheme: str | None = None,
auth: str | None = None,
host: str | None = None,
port: int | None = None,
path: str | None = None,
query: str | None = None,
fragment: str | None = None,
):
if path and not path.startswith("/"):
path = "/" + path
if scheme is not None:
scheme = scheme.lower()
return super().__new__(cls, scheme, auth, host, port, path, query, fragment)
@property
def hostname(self) -> str | None:
"""For backwards-compatibility with urlparse. We're nice like that."""
return self.host
@property
def request_uri(self) -> str:
"""Absolute path including the query string."""
uri = self.path or "/"
if self.query is not None:
uri += "?" + self.query
return uri
@property
def authority(self) -> str | None:
"""
Authority component as defined in RFC 3986 3.2.
This includes userinfo (auth), host and port.
i.e.
userinfo@host:port
"""
userinfo = self.auth
netloc = self.netloc
if netloc is None or userinfo is None:
return netloc
else:
return f"{userinfo}@{netloc}"
@property
def netloc(self) -> str | None:
"""
Network location including host and port.
If you need the equivalent of urllib.parse's ``netloc``,
use the ``authority`` property instead.
"""
if self.host is None:
return None
if self.port:
return f"{self.host}:{self.port}"
return self.host
@property
def url(self) -> str:
"""
Convert self into a url
This function should more or less round-trip with :func:`.parse_url`. The
returned url may not be exactly the same as the url inputted to
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
with a blank port will have : removed).
Example:
.. code-block:: python
import urllib3
U = urllib3.util.parse_url("https://google.com/mail/")
print(U.url)
# "https://google.com/mail/"
print( urllib3.util.Url("https", "username:password",
"host.com", 80, "/path", "query", "fragment"
).url
)
# "https://username:password@host.com:80/path?query#fragment"
"""
scheme, auth, host, port, path, query, fragment = self
url = ""
# We use "is not None" we want things to happen with empty strings (or 0 port)
if scheme is not None:
url += scheme + "://"
if auth is not None:
url += auth + "@"
if host is not None:
url += host
if port is not None:
url += ":" + str(port)
if path is not None:
url += path
if query is not None:
url += "?" + query
if fragment is not None:
url += "#" + fragment
return url
def __str__(self) -> str:
return self.url
@typing.overload
def _encode_invalid_chars(component: str, allowed_chars: typing.Container[str]) -> str: # Abstract
...
@typing.overload
def _encode_invalid_chars(component: None, allowed_chars: typing.Container[str]) -> None: # Abstract
...
def _encode_invalid_chars(component: str | None, allowed_chars: typing.Container[str]) -> str | None:
"""Percent-encodes a URI component without reapplying
onto an already percent-encoded component.
"""
if component is None:
return component
component = to_str(component)
# Normalize existing percent-encoded bytes.
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
component, percent_encodings = _PERCENT_RE.subn(lambda match: match.group(0).upper(), component)
uri_bytes = component.encode("utf-8", "surrogatepass")
is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
encoded_component = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring
byte = uri_bytes[i : i + 1]
byte_ord = ord(byte)
if (is_percent_encoded and byte == b"%") or (byte_ord < 128 and byte.decode() in allowed_chars):
encoded_component += byte
continue
encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
return encoded_component.decode()
def _remove_path_dot_segments(path: str) -> str:
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = path.split("/") # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == ".":
continue
# Anything other than '..', should be appended to the output
if segment != "..":
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if path.startswith("/") and (not output or output[0]):
output.insert(0, "")
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if path.endswith(("/.", "/..")):
output.append("")
return "/".join(output)
@typing.overload
def _normalize_host(host: None, scheme: str | None) -> None: ...
@typing.overload
def _normalize_host(host: str, scheme: str | None) -> str: ...
def _normalize_host(host: str | None, scheme: str | None) -> str | None:
if host:
if scheme in _NORMALIZABLE_SCHEMES:
is_ipv6 = _IPV6_ADDRZ_RE.match(host)
if is_ipv6:
# IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
# such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
# separator as necessary to return a valid RFC 4007 scoped IP.
match = _ZONE_ID_RE.search(host)
if match:
start, end = match.span(1)
zone_id = host[start:end]
if zone_id.startswith("%25") and zone_id != "%25":
zone_id = zone_id[3:]
else:
zone_id = zone_id[1:]
zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
return f"{host[:start].lower()}%{zone_id}{host[end:]}"
else:
return host.lower()
elif not _IPV4_RE.match(host):
return to_str(
b".".join([_idna_encode(label) for label in host.split(".")]),
"ascii",
)
return host
def _idna_encode(name: str) -> bytes:
if not name.isascii():
try:
import idna
except ImportError:
raise LocationParseError("Unable to parse URL without the 'idna' module") from None
try:
return idna.encode(name.lower(), strict=True, std3_rules=True)
except idna.IDNAError:
raise LocationParseError(f"Name '{name}' is not a valid IDNA label") from None
return name.lower().encode("ascii")
def _encode_target(target: str) -> str:
"""Percent-encodes a request target so that there are no invalid characters
Pre-condition for this function is that 'target' must start with '/'.
If that is the case then _TARGET_RE will always produce a match.
"""
match = _TARGET_RE.match(target)
if not match: # Defensive:
raise LocationParseError(f"{target!r} is not a valid request URI")
path, query = match.groups()
encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
if query is not None:
query = _encode_invalid_chars(query, _QUERY_CHARS)
encoded_target += "?" + query
return encoded_target
def parse_url(url: str) -> Url:
"""
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
performed to parse incomplete urls. Fields not provided will be None.
This parser is RFC 3986 and RFC 6874 compliant.
The parser logic and helper functions are based heavily on
work done in the ``rfc3986`` module.
:param str url: URL to parse into a :class:`.Url` namedtuple.
Partly backwards-compatible with :mod:`urllib.parse`.
Example:
.. code-block:: python
import urllib3
print( urllib3.util.parse_url('http://google.com/mail/'))
# Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
print( urllib3.util.parse_url('google.com:80'))
# Url(scheme=None, host='google.com', port=80, path=None, ...)
print( urllib3.util.parse_url('/foo?bar'))
# Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
"""
if not url:
# Empty
return Url()
source_url = url
if not _SCHEME_RE.search(url):
url = "//" + url
scheme: str | None
authority: str | None
auth: str | None
host: str | None
port: str | None
port_int: int | None
path: str | None
query: str | None
fragment: str | None
try:
scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr]
normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES
if scheme:
scheme = scheme.lower()
if authority:
auth, _, host_port = authority.rpartition("@")
auth = auth or None
host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr]
if auth and normalize_uri:
auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
if port == "":
port = None
else:
auth, host, port = None, None, None
if port is not None:
port_int = int(port)
if not (0 <= port_int <= 65535):
raise LocationParseError(url)
else:
port_int = None
host = _normalize_host(host, scheme)
if normalize_uri and path:
path = _remove_path_dot_segments(path)
path = _encode_invalid_chars(path, _PATH_CHARS)
if normalize_uri and query:
query = _encode_invalid_chars(query, _QUERY_CHARS)
if normalize_uri and fragment:
fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)
except (ValueError, AttributeError) as e:
raise LocationParseError(source_url) from e
# For the sake of backwards compatibility we put empty
# string values for path if there are any defined values
# beyond the path in the URL.
# TODO: Remove this when we break backwards compatibility.
if not path:
if query is not None or fragment is not None:
path = ""
else:
path = None
return Url(
scheme=scheme,
auth=auth,
host=host,
port=port_int,
path=path,
query=query,
fragment=fragment,
)
__all__ = ("Url", "parse_url", "HTTPError", "LocationParseError", "LocationValueError")

View File

@ -37,7 +37,7 @@ class SevenZipArchiver(Archiver):
data = b""
try:
with py7zr.SevenZipFile(self.path, "r") as zf:
data = zf.read(archive_file)[archive_file].read()
data = zf.read([archive_file])[archive_file].read()
except (py7zr.Bad7zFile, OSError) as e:
logger.error("Error reading 7zip archive [%s]: %s :: %s", e, self.path, archive_file)
raise

View File

@ -364,7 +364,7 @@ class ComicArchive:
def metadata_from_filename(
self,
complicated_parser: bool = False,
parser: utils.Parser = utils.Parser.ORIGINAL,
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
@ -376,7 +376,7 @@ class ComicArchive:
filename_info = utils.parse_filename(
self.path.name,
complicated_parser=complicated_parser,
parser=parser,
remove_c2c=remove_c2c,
remove_fcbd=remove_fcbd,
remove_publisher=remove_publisher,

View File

@ -31,6 +31,8 @@ from typing_extensions import NamedTuple, Required
from comicapi import utils
from ._url import Url, parse_url
logger = logging.getLogger(__name__)
@ -133,7 +135,7 @@ class GenericMetadata:
year: int | None = None
language: str | None = None # 2 letter iso code
country: str | None = None
web_link: str | None = None
web_links: list[Url] = dataclasses.field(default_factory=list)
format: str | None = None
manga: str | None = None
black_and_white: bool | None = None
@ -253,7 +255,7 @@ class GenericMetadata:
assign("year", new_md.year)
assign("language", new_md.language)
assign("country", new_md.country)
assign("web_link", new_md.web_link)
assign("web_links", new_md.web_links)
assign("format", new_md.format)
assign("manga", new_md.manga)
assign("black_and_white", new_md.black_and_white)
@ -487,7 +489,9 @@ md_test: GenericMetadata = GenericMetadata(
alternate_count=7,
imprint="craphound.com",
notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]",
web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/",
web_links=[
parse_url("https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/")
],
format="Series",
manga="No",
black_and_white=None,
@ -551,3 +555,15 @@ md_test: GenericMetadata = GenericMetadata(
last_mark=None,
_cover_image=None,
)
__all__ = (
"Url",
"parse_url",
"PageType",
"ImageMetadata",
"Credit",
"ComicSeries",
"TagOrigin",
"GenericMetadata",
)

View File

@ -57,7 +57,7 @@ class ComicRack(Metadata):
"month",
"year",
"language",
"web_link",
"web_links",
"format",
"manga",
"black_and_white",
@ -229,7 +229,7 @@ class ComicRack(Metadata):
assign("Month", md.month)
assign("Year", md.year)
assign("LanguageISO", md.language)
assign("Web", md.web_link)
assign("Web", " ".join(u.url for u in md.web_links))
assign("Format", md.format)
assign("Manga", md.manga)
assign("BlackAndWhite", "Yes" if md.black_and_white else None)
@ -313,7 +313,7 @@ class ComicRack(Metadata):
md.month = utils.xlate_int(get("Month"))
md.year = utils.xlate_int(get("Year"))
md.language = utils.xlate(get("LanguageISO"))
md.web_link = utils.xlate(get("Web"))
md.web_links = utils.split_urls(utils.xlate(get("Web")))
md.format = utils.xlate(get("Format"))
md.manga = utils.xlate(get("Manga"))
md.maturity_rating = utils.xlate(get("AgeRating"))

View File

@ -20,15 +20,21 @@ import logging
import os
import pathlib
import platform
import sys
import unicodedata
from collections import defaultdict
from collections.abc import Iterable, Mapping
from enum import Enum, auto
from shutil import which # noqa: F401
from typing import Any, TypeVar, cast
from comicfn2dict import comicfn2dict
import comicapi.data
from comicapi import filenamelexer, filenameparser
from ._url import Url, parse_url
try:
import icu
@ -37,9 +43,55 @@ try:
except ImportError:
icu_available = False
if sys.version_info < (3, 11):
class StrEnum(str, Enum):
"""
Enum where members are also (and must be) strings
"""
def __new__(cls, *values: Any) -> Any:
"values must already be of type `str`"
if len(values) > 3:
raise TypeError(f"too many arguments for str(): {values!r}")
if len(values) == 1:
# it must be a string
if not isinstance(values[0], str):
raise TypeError(f"{values[0]!r} is not a string")
if len(values) >= 2:
# check that encoding argument is a string
if not isinstance(values[1], str):
raise TypeError(f"encoding must be a string, not {values[1]!r}")
if len(values) == 3:
# check that errors argument is a string
if not isinstance(values[2], str):
raise TypeError("errors must be a string, not %r" % (values[2]))
value = str(*values)
member = str.__new__(cls, value)
member._value_ = value
return member
@staticmethod
def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str:
"""
Return the lower-cased version of the member name.
"""
return name.lower()
else:
from enum import StrEnum
logger = logging.getLogger(__name__)
class Parser(StrEnum):
ORIGINAL = auto()
COMPLICATED = auto()
COMICFN2DICT = auto()
def _custom_key(tup: Any) -> Any:
import natsort
@ -67,7 +119,7 @@ def os_sorted(lst: Iterable[T]) -> Iterable[T]:
def parse_filename(
filename: str,
complicated_parser: bool = False,
parser: Parser = Parser.ORIGINAL,
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
@ -99,7 +151,25 @@ def parse_filename(
filename, ext = os.path.splitext(filename)
filename = " ".join(wordninja.split(filename)) + ext
if complicated_parser:
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
format="",
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
)
if parser == Parser.COMPLICATED:
lex = filenamelexer.Lex(filename, allow_issue_start_with_letter)
p = filenameparser.Parse(
lex.items,
@ -108,7 +178,26 @@ def parse_filename(
remove_publisher=remove_publisher,
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
)
return p.filename_info
fni = p.filename_info
elif parser == Parser.COMICFN2DICT:
fn2d = comicfn2dict(filename)
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive=fn2d.get("ext", ""),
c2c=False,
fcbd=False,
issue=fn2d.get("issue", ""),
issue_count=fn2d.get("issue_count", ""),
publisher=fn2d.get("publisher", ""),
remainder=fn2d.get("scan_info", ""),
series=fn2d.get("series", ""),
title=fn2d.get("title", ""),
volume=fn2d.get("volume", ""),
volume_count=fn2d.get("volume_count", ""),
year=fn2d.get("year", ""),
format=fn2d.get("original_format", ""),
)
else:
fnp = filenameparser.FileNameParser()
fnp.parse_filename(filename)
@ -129,7 +218,7 @@ def parse_filename(
year=fnp.year,
format="",
)
return fni
return fni
def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str:
@ -283,6 +372,24 @@ def split(s: str | None, c: str) -> list[str]:
return []
def split_urls(s: str | None) -> list[Url]:
if s is None:
return []
# Find occurences of ' http'
if s.count("http") > 1 and s.count(" http") >= 1:
urls = []
# Split urls out
url_strings = split(s, " http")
# Return the scheme 'http' and parse the url
for i, url_string in enumerate(url_strings):
if not url_string.startswith("http"):
url_string = "http" + url_string
urls.append(parse_url(url_string))
return urls
else:
return [parse_url(s)]
def remove_articles(text: str) -> str:
text = text.casefold()
articles = [

View File

@ -1,7 +1,8 @@
from __future__ import annotations
from PyInstaller.utils.hooks import collect_data_files, collect_entry_point
from PyInstaller.utils.hooks import collect_data_files, collect_entry_point, collect_submodules
datas, hiddenimports = collect_entry_point("comictagger.talker")
hiddenimports += collect_submodules("comictaggerlib")
datas += collect_data_files("comictaggerlib.ui")
datas += collect_data_files("comictaggerlib.graphics")

View File

@ -233,7 +233,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.config.internal__load_data_style)
if md.is_empty:
md = ca.metadata_from_filename(
self.config.Filename_Parsing__complicated_parser,
self.config.Filename_Parsing__filename_parser,
self.config.Filename_Parsing__remove_c2c,
self.config.Filename_Parsing__remove_fcbd,
self.config.Filename_Parsing__remove_publisher,

View File

@ -78,12 +78,13 @@ class CBLTransformer:
self.metadata.description += self.metadata.notes
if self.config.Comic_Book_Lover__copy_weblink_to_comments:
if self.metadata.web_link is not None:
if self.metadata.description is None:
self.metadata.description = ""
for web_link in self.metadata.web_links:
temp_desc = self.metadata.description
if temp_desc is None:
temp_desc = ""
else:
self.metadata.description += "\n\n"
if self.metadata.web_link not in self.metadata.description:
self.metadata.description += self.metadata.web_link
temp_desc += "\n\n"
if web_link.url and web_link.url not in temp_desc:
self.metadata.description = temp_desc + web_link.url
return self.metadata

View File

@ -250,7 +250,7 @@ class CLI:
# now, overlay the parsed filename info
if self.config.Runtime_Options__parse_filename:
f_md = ca.metadata_from_filename(
self.config.Filename_Parsing__complicated_parser,
self.config.Filename_Parsing__filename_parser,
self.config.Filename_Parsing__remove_c2c,
self.config.Filename_Parsing__remove_fcbd,
self.config.Filename_Parsing__remove_publisher,
@ -458,32 +458,27 @@ class CLI:
if self.config.Runtime_Options__verbose:
self.output(text)
# use our overlaid MD struct to search
ii.set_additional_metadata(md)
ii.only_use_additional_meta_data = True
ii.set_output_function(functools.partial(self.output, already_logged=True))
ii.cover_page_index = md.get_cover_page_index_list()[0]
matches = ii.search()
result = ii.search_result
# use our overlaid MD to search
result, matches = ii.identify(ca, md)
found_match = False
choices = False
low_confidence = False
if result == ii.result_no_matches:
if result == IssueIdentifier.result_no_matches:
pass
elif result == ii.result_found_match_but_bad_cover_score:
elif result == IssueIdentifier.result_found_match_but_bad_cover_score:
low_confidence = True
found_match = True
elif result == ii.result_found_match_but_not_first_page:
elif result == IssueIdentifier.result_found_match_but_not_first_page:
found_match = True
elif result == ii.result_multiple_matches_with_bad_image_scores:
elif result == IssueIdentifier.result_multiple_matches_with_bad_image_scores:
low_confidence = True
choices = True
elif result == ii.result_one_good_match:
elif result == IssueIdentifier.result_one_good_match:
found_match = True
elif result == ii.result_multiple_good_matches:
elif result == IssueIdentifier.result_multiple_good_matches:
choices = True
if choices:

View File

@ -14,7 +14,7 @@ from comictaggerlib.ctsettings.commandline import (
)
from comictaggerlib.ctsettings.file import register_file_settings, validate_file_settings
from comictaggerlib.ctsettings.plugin import group_for_plugin, register_plugin_settings, validate_plugin_settings
from comictaggerlib.ctsettings.settngs_namespace import settngs_namespace as ct_ns
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns
from comictaggerlib.ctsettings.types import ComicTaggerPaths
from comictalker import ComicTalker

View File

@ -29,7 +29,7 @@ from comicapi import utils
from comicapi.comicarchive import metadata_styles
from comicapi.genericmetadata import GenericMetadata
from comictaggerlib import ctversion
from comictaggerlib.ctsettings.settngs_namespace import settngs_namespace as ct_ns
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns
from comictaggerlib.ctsettings.types import (
ComicTaggerPaths,
metadata_type,
@ -308,9 +308,12 @@ def validate_commandline_settings(config: settngs.Config[ct_ns], parser: settngs
# take a crack at finding rar exe if it's not in the path
if not utils.which("rar"):
if platform.system() == "Windows":
# look in some likely places for Windows machines
utils.add_to_path(r"C:\Program Files\WinRAR")
utils.add_to_path(r"C:\Program Files (x86)\WinRAR")
letters = ["C"]
letters.extend({f"{d}" for d in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if os.path.exists(f"{d}:\\")} - {"C"})
for letter in letters:
# look in some likely places for Windows machines
utils.add_to_path(rf"{letters}:\Program Files\WinRAR")
utils.add_to_path(rf"{letters}:\Program Files (x86)\WinRAR")
else:
if platform.system() == "Darwin":
result = subprocess.run(("/usr/libexec/path_helper", "-s"), capture_output=True)

View File

@ -5,7 +5,8 @@ import uuid
import settngs
from comictaggerlib.ctsettings.settngs_namespace import settngs_namespace as ct_ns
from comicapi import utils
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns
from comictaggerlib.defaults import DEFAULT_REPLACEMENTS, Replacement, Replacements
@ -19,6 +20,12 @@ def general(parser: settngs.Manager) -> None:
help="Disable the ComicRack metadata type",
)
parser.add_setting("use_short_metadata_names", default=False, action=argparse.BooleanOptionalAction, cmdline=False)
parser.add_setting(
"--prompt-on-save",
default=True,
action=argparse.BooleanOptionalAction,
help="Prompts the user to confirm saving tags when using the GUI.",
)
def internal(parser: settngs.Manager) -> None:
@ -96,10 +103,12 @@ def dialog(parser: settngs.Manager) -> None:
def filename(parser: settngs.Manager) -> None:
# filename parsing settings
parser.add_setting(
"--complicated-parser",
default=False,
action=argparse.BooleanOptionalAction,
help="Enables the new parser which tries to extract more information from filenames",
"--filename-parser",
default=utils.Parser.ORIGINAL,
metavar=f"{{{','.join(utils.Parser)}}}",
type=utils.Parser,
choices=[p.value for p in utils.Parser],
help="Select the filename parser, defaults to original",
)
parser.add_setting(
"--remove-c2c",
@ -254,12 +263,24 @@ def parse_filter(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
return config
def migrate_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
original_types = ("cbi", "cr", "comet")
save_style = config[0].internal__save_data_style
if not isinstance(save_style, list):
if isinstance(save_style, int) and save_style in (0, 1, 2):
config[0].internal__save_data_style = [original_types[save_style]]
elif isinstance(save_style, str):
config[0].internal__save_data_style = [save_style]
else:
config[0].internal__save_data_style = ["cbi"]
return config
def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]:
config = parse_filter(config)
# TODO Remove this conversion check at a later date
if isinstance(config[0].internal__save_data_style, str):
config[0].internal__save_data_style = [config[0].internal__save_data_style]
config = migrate_settings(config)
if config[0].Filename_Parsing__protofolius_issue_number_scheme:
config[0].Filename_Parsing__allow_issue_start_with_letter = True

View File

@ -10,16 +10,16 @@ import comicapi.comicarchive
import comicapi.utils
import comictaggerlib.ctsettings
from comicapi.comicarchive import Archiver
from comictaggerlib.ctsettings.settngs_namespace import settngs_namespace as ct_ns
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS as ct_ns
from comictalker.comictalker import ComicTalker
logger = logging.getLogger("comictagger")
def group_for_plugin(plugin: Archiver | ComicTalker) -> str:
def group_for_plugin(plugin: Archiver | ComicTalker | type[Archiver]) -> str:
if isinstance(plugin, ComicTalker):
return f"Source {plugin.id}"
if isinstance(plugin, Archiver):
if isinstance(plugin, Archiver) or plugin == Archiver:
return "Archive"
raise NotImplementedError(f"Invalid plugin received: {plugin=}")

View File

@ -1,14 +1,17 @@
from __future__ import annotations
import typing
import settngs
import comicapi.genericmetadata
import comicapi.utils
import comictaggerlib.ctsettings.types
import comictaggerlib.defaults
import comictaggerlib.resulttypes
class settngs_namespace(settngs.TypedNS):
class SettngsNS(settngs.TypedNS):
Commands__version: bool
Commands__command: comictaggerlib.resulttypes.Action
Commands__copy: str
@ -59,7 +62,7 @@ class settngs_namespace(settngs.TypedNS):
Issue_Identifier__exact_series_matches_first: bool
Issue_Identifier__always_use_publisher_filter: bool
Filename_Parsing__complicated_parser: bool
Filename_Parsing__filename_parser: comicapi.utils.Parser
Filename_Parsing__remove_c2c: bool
Filename_Parsing__remove_fcbd: bool
Filename_Parsing__remove_publisher: bool
@ -98,6 +101,7 @@ class settngs_namespace(settngs.TypedNS):
General__check_for_new_version: bool
General__disable_cr: bool
General__use_short_metadata_names: bool
General__prompt_on_save: bool
Dialog_Flags__show_disclaimer: bool
Dialog_Flags__dont_notify_about_this_version: str
@ -108,3 +112,150 @@ class settngs_namespace(settngs.TypedNS):
Source_comicvine__comicvine_key: str
Source_comicvine__comicvine_url: str
Source_comicvine__cv_use_series_start_as_volume: bool
class Commands(typing.TypedDict):
version: bool
command: comictaggerlib.resulttypes.Action
copy: str
class Runtime_Options(typing.TypedDict):
config: comictaggerlib.ctsettings.types.ComicTaggerPaths
verbose: int
abort_on_conflict: bool
delete_original: bool
parse_filename: bool
issue_id: str
online: bool
metadata: comicapi.genericmetadata.GenericMetadata
interactive: bool
abort_on_low_confidence: bool
summary: bool
raw: bool
recursive: bool
dryrun: bool
darkmode: bool
glob: bool
quiet: bool
json: bool
type: list[str]
overwrite: bool
no_gui: bool
files: list[str]
class internal(typing.TypedDict):
install_id: str
save_data_style: list[str]
load_data_style: str
last_opened_folder: str
window_width: int
window_height: int
window_x: int
window_y: int
form_width: int
list_width: int
sort_column: int
sort_direction: int
class Issue_Identifier(typing.TypedDict):
series_match_identify_thresh: int
border_crop_percent: int
publisher_filter: list[str]
series_match_search_thresh: int
clear_metadata: bool
auto_imprint: bool
sort_series_by_year: bool
exact_series_matches_first: bool
always_use_publisher_filter: bool
class Filename_Parsing(typing.TypedDict):
filename_parser: comicapi.utils.Parser
remove_c2c: bool
remove_fcbd: bool
remove_publisher: bool
split_words: bool
protofolius_issue_number_scheme: bool
allow_issue_start_with_letter: bool
class Sources(typing.TypedDict):
source: str
remove_html_tables: bool
class Comic_Book_Lover(typing.TypedDict):
assume_lone_credit_is_primary: bool
copy_characters_to_tags: bool
copy_teams_to_tags: bool
copy_locations_to_tags: bool
copy_storyarcs_to_tags: bool
copy_notes_to_comments: bool
copy_weblink_to_comments: bool
apply_transform_on_import: bool
apply_transform_on_bulk_operation: bool
class File_Rename(typing.TypedDict):
template: str
issue_number_padding: int
use_smart_string_cleanup: bool
auto_extension: bool
dir: str
move_to_dir: bool
strict: bool
replacements: comictaggerlib.defaults.Replacements
class Auto_Tag(typing.TypedDict):
save_on_low_confidence: bool
dont_use_year_when_identifying: bool
assume_issue_one: bool
ignore_leading_numbers_in_filename: bool
remove_archive_after_successful_match: bool
class General(typing.TypedDict):
check_for_new_version: bool
disable_cr: bool
use_short_metadata_names: bool
prompt_on_save: bool
class Dialog_Flags(typing.TypedDict):
show_disclaimer: bool
dont_notify_about_this_version: str
ask_about_usage_stats: bool
class Archive(typing.TypedDict):
rar: str
class Source_comicvine(typing.TypedDict):
comicvine_key: str
comicvine_url: str
cv_use_series_start_as_volume: bool
SettngsDict = typing.TypedDict(
"SettngsDict",
{
"Commands": Commands,
"Runtime Options": Runtime_Options,
"internal": internal,
"Issue Identifier": Issue_Identifier,
"Filename Parsing": Filename_Parsing,
"Sources": Sources,
"Comic Book Lover": Comic_Book_Lover,
"File Rename": File_Rename,
"Auto-Tag": Auto_Tag,
"General": General,
"Dialog Flags": Dialog_Flags,
"Archive": Archive,
"Source comicvine": Source_comicvine,
},
)

View File

@ -69,7 +69,7 @@ class MetadataFormatter(string.Formatter):
if conversion == "t":
return str(value).title()
if conversion == "j":
return ", ".join(list(value))
return ", ".join(list(str(v) for v in value))
return cast(str, super().convert_field(value, conversion))
def handle_replacements(self, string: str, replacements: list[Replacement]) -> str:
@ -218,6 +218,10 @@ class FileRenamer:
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements)
md_dict = vars(md)
md_dict["web_link"] = ""
if md.web_links:
md_dict["web_link"] = md.web_links[0]
md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
md_dict[role] = md.get_primary_credit(role)

View File

@ -35,11 +35,6 @@ from comictaggerlib.ui.qtutils import center_window_on_parent, reduce_widget_fon
logger = logging.getLogger(__name__)
class FileTableWidgetItem(QtWidgets.QTableWidgetItem):
def __lt__(self, other: object) -> bool:
return self.data(QtCore.Qt.ItemDataRole.UserRole) < other.data(QtCore.Qt.ItemDataRole.UserRole) # type: ignore
class FileSelectionList(QtWidgets.QWidget):
selectionChanged = QtCore.pyqtSignal(QtCore.QVariant)
listCleared = QtCore.pyqtSignal()
@ -63,7 +58,7 @@ class FileSelectionList(QtWidgets.QWidget):
reduce_widget_font_size(self.twList)
self.twList.setColumnCount(6)
self.twList.horizontalHeader().setMinimumSectionSize(50)
self.twList.currentItemChanged.connect(self.current_item_changed_cb)
self.currentItem = None
@ -278,8 +273,8 @@ class FileSelectionList(QtWidgets.QWidget):
filename_item = QtWidgets.QTableWidgetItem()
folder_item = QtWidgets.QTableWidgetItem()
md_item = FileTableWidgetItem()
readonly_item = FileTableWidgetItem()
md_item = QtWidgets.QTableWidgetItem()
readonly_item = QtWidgets.QTableWidgetItem()
type_item = QtWidgets.QTableWidgetItem()
filename_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled)
@ -333,9 +328,12 @@ class FileSelectionList(QtWidgets.QWidget):
if not ca.is_writable():
readonly_item.setCheckState(QtCore.Qt.CheckState.Checked)
readonly_item.setData(QtCore.Qt.ItemDataRole.UserRole, True)
readonly_item.setText(" ")
else:
readonly_item.setData(QtCore.Qt.ItemDataRole.UserRole, False)
readonly_item.setCheckState(QtCore.Qt.CheckState.Unchecked)
# This is a nbsp it sorts after a space ' '
readonly_item.setText("\xa0")
def get_selected_archive_list(self) -> list[ComicArchive]:
ca_list: list[ComicArchive] = []

View File

@ -34,13 +34,19 @@ logger = logging.getLogger(__name__)
class ImageHasher:
def __init__(self, path: str | None = None, data: bytes = b"", width: int = 8, height: int = 8) -> None:
def __init__(
self, path: str | None = None, image: Image | None = None, data: bytes = b"", width: int = 8, height: int = 8
) -> None:
self.width = width
self.height = height
if path is None and not data:
if path is None and not data and not image:
raise OSError
if image is not None:
self.image = image
return
try:
if path is not None:
self.image = Image.open(path)

View File

@ -24,8 +24,8 @@ from typing import Any, Callable
from typing_extensions import NotRequired, TypedDict
from comicapi import utils
from comicapi.comicarchive import ComicArchive, metadata_styles
from comicapi.genericmetadata import GenericMetadata
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import ComicSeries, GenericMetadata
from comicapi.issuestring import IssueString
from comictaggerlib.ctsettings import ct_ns
from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
@ -44,17 +44,23 @@ except ImportError:
class SearchKeys(TypedDict):
series: str | None
issue_number: str | None
series: str
issue_number: str
alternate_number: str | None
month: int | None
year: int | None
issue_count: int | None
alternate_count: int | None
publisher: str | None
imprint: str | None
class Score(TypedDict):
score: NotRequired[int]
url: str
hash: int
remote_hash: int
local_hash_name: str
local_hash: int
class IssueIdentifierNetworkError(Exception): ...
@ -71,10 +77,17 @@ class IssueIdentifier:
result_one_good_match = 4
result_multiple_good_matches = 5
def __init__(self, comic_archive: ComicArchive, config: ct_ns, talker: ComicTalker) -> None:
def __init__(
self,
comic_archive: ComicArchive,
config: ct_ns,
talker: ComicTalker,
metadata: GenericMetadata = GenericMetadata(),
) -> None:
self.config = config
self.talker = talker
self.comic_archive: ComicArchive = comic_archive
self.md = metadata
self.image_hasher = 1
self.only_use_additional_meta_data = False
@ -139,35 +152,21 @@ class IssueIdentifier:
return ImageHasher(data=image_data).average_hash()
def get_aspect_ratio(self, image_data: bytes) -> float:
try:
im = Image.open(io.BytesIO(image_data))
w, h = im.size
return float(h) / float(w)
except Exception:
return 1.5
def crop_cover(self, image_data: bytes) -> bytes:
im = Image.open(io.BytesIO(image_data))
def _crop_double_page(self, im: Image.Image) -> Image.Image | None:
w, h = im.size
try:
cropped_im = im.crop((int(w / 2), 0, w, h))
except Exception:
logger.exception("cropCover() error")
return b""
return None
output = io.BytesIO()
cropped_im.convert("RGB").save(output, format="PNG")
cropped_image_data = output.getvalue()
output.close()
return cropped_image_data
return cropped_im
# Adapted from https://stackoverflow.com/a/10616717/20629671
def crop_border(self, image_data: bytes, ratio: int) -> bytes | None:
im = Image.open(io.BytesIO(image_data))
def _crop_border(self, im: Image.Image, ratio: int) -> Image.Image | None:
assert Image
assert ImageChops
# RGBA doesn't work????
tmp = im.convert("RGB")
@ -199,11 +198,7 @@ class IssueIdentifier:
# If there is a difference return the image otherwise return None
if width_percent > ratio or height_percent > ratio:
output = io.BytesIO()
im.crop(bbox).save(output, format="PNG")
cropped_image_data = output.getvalue()
output.close()
return cropped_image_data
return im.crop(bbox)
return None
def set_progress_callback(self, cb_func: Callable[[int, int], None]) -> None:
@ -212,57 +207,6 @@ class IssueIdentifier:
def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None:
self.cover_url_callback = cb_func
def get_search_keys(self) -> SearchKeys:
ca = self.comic_archive
search_keys: SearchKeys
if self.only_use_additional_meta_data:
search_keys = SearchKeys(
series=self.additional_metadata.series,
issue_number=self.additional_metadata.issue,
year=self.additional_metadata.year,
month=self.additional_metadata.month,
issue_count=self.additional_metadata.issue_count,
)
return search_keys
# see if the archive has any useful meta data for searching with
try:
for style in metadata_styles:
internal_metadata = ca.read_metadata(style)
if not internal_metadata.is_empty:
break
except Exception as e:
internal_metadata = GenericMetadata()
logger.error("Failed to load metadata for %s: %s", ca.path, e)
# try to get some metadata from filename
md_from_filename = ca.metadata_from_filename(
self.config.Filename_Parsing__complicated_parser,
self.config.Filename_Parsing__remove_c2c,
self.config.Filename_Parsing__remove_fcbd,
self.config.Filename_Parsing__remove_publisher,
)
working_md = md_from_filename.copy()
working_md.overlay(internal_metadata)
working_md.overlay(self.additional_metadata)
# preference order:
# 1. Additional metadata
# 1. Internal metadata
# 1. Filename metadata
search_keys = SearchKeys(
series=working_md.series,
issue_number=working_md.issue,
year=working_md.year,
month=working_md.month,
issue_count=working_md.issue_count,
)
return search_keys
def log_msg(self, msg: Any) -> None:
msg = str(msg)
for handler in logging.getLogger().handlers:
@ -291,70 +235,62 @@ class IssueIdentifier:
# default output is stdout
self.output_function(*args, **kwargs)
def get_issue_cover_match_score(
def _get_remote_hashes(self, urls: list[str]) -> list[tuple[str, int]]:
remote_hashes: list[tuple[str, int]] = []
for url in urls:
try:
alt_url_image_data = ImageFetcher(self.config.Runtime_Options__config.user_cache_dir).fetch(
url, blocking=True
)
except ImageFetcherException as e:
self.log_msg(f"Network issue while fetching alt. cover image from {self.talker.name}. Aborting...")
raise IssueIdentifierNetworkError from e
self._user_canceled(self.cover_url_callback, alt_url_image_data)
remote_hashes.append((url, self.calculate_hash(alt_url_image_data)))
if self.cancel:
raise IssueIdentifierCancelled
return remote_hashes
def _get_issue_cover_match_score(
self,
primary_img_url: str,
alt_urls: list[str],
local_cover_hash_list: list[int],
use_remote_alternates: bool = False,
local_hashes: list[tuple[str, int]],
use_alt_urls: bool = False,
) -> Score:
# local_cover_hash_list is a list of pre-calculated hashes.
# use_remote_alternates - indicates to use alternate covers from CV
# local_hashes is a list of pre-calculated hashes.
# use_alt_urls - indicates to use alternate covers from CV
# If there is no URL return 100
if not primary_img_url:
return Score(score=100, url="", hash=0)
return Score(score=100, url="", remote_hash=0)
try:
url_image_data = ImageFetcher(self.config.Runtime_Options__config.user_cache_dir).fetch(
primary_img_url, blocking=True
)
except ImageFetcherException as e:
self.log_msg(f"Network issue while fetching cover image from {self.talker.name}. Aborting...")
raise IssueIdentifierNetworkError from e
self._user_canceled()
if self.cancel:
raise IssueIdentifierCancelled
urls = [primary_img_url]
if use_alt_urls:
urls.extend(alt_urls)
self.log_msg(f"[{len(alt_urls)} alt. covers]")
# alert the GUI, if needed
if self.cover_url_callback is not None:
self.cover_url_callback(url_image_data)
remote_cover_list = [Score(url=primary_img_url, hash=self.calculate_hash(url_image_data))]
if self.cancel:
raise IssueIdentifierCancelled
if use_remote_alternates:
for alt_url in alt_urls:
try:
alt_url_image_data = ImageFetcher(self.config.Runtime_Options__config.user_cache_dir).fetch(
alt_url, blocking=True
)
except ImageFetcherException as e:
self.log_msg(f"Network issue while fetching alt. cover image from {self.talker.name}. Aborting...")
raise IssueIdentifierNetworkError from e
if self.cancel:
raise IssueIdentifierCancelled
# alert the GUI, if needed
if self.cover_url_callback is not None:
self.cover_url_callback(alt_url_image_data)
remote_cover_list.append(Score(url=alt_url, hash=self.calculate_hash(alt_url_image_data)))
if self.cancel:
raise IssueIdentifierCancelled
self.log_msg(f"[{len(remote_cover_list) - 1} alt. covers]")
remote_hashes = self._get_remote_hashes(urls)
score_list = []
done = False
for local_cover_hash in local_cover_hash_list:
for remote_cover_item in remote_cover_list:
score = ImageHasher.hamming_distance(local_cover_hash, remote_cover_item["hash"])
score_list.append(Score(score=score, url=remote_cover_item["url"], hash=remote_cover_item["hash"]))
for local_hash in local_hashes:
for remote_hash in remote_hashes:
score = ImageHasher.hamming_distance(local_hash[1], remote_hash[1])
score_list.append(
Score(
score=score,
url=remote_hash[0],
remote_hash=remote_hash[1],
local_hash_name=local_hash[0],
local_hash=local_hash[1],
)
)
self.log_msg(f" - {score:03}")
@ -369,167 +305,181 @@ class IssueIdentifier:
return best_score_item
def search(self) -> list[IssueResult]:
ca = self.comic_archive
self.match_list = []
self.cancel = False
self.search_result = self.result_no_matches
def _check_requirements(self, ca: ComicArchive) -> bool:
if not pil_available:
self.log_msg("Python Imaging Library (PIL) is not available and is needed for issue identification.")
return self.match_list
return False
if not ca.seems_to_be_a_comic_archive():
self.log_msg(f"Sorry, but {ca.path} is not a comic archive!")
return self.match_list
return False
return True
cover_image_data = ca.get_page(self.cover_page_index)
cover_hash = self.calculate_hash(cover_image_data)
def _process_cover(self, name: str, image_data: bytes) -> list[tuple[str, Image.Image]]:
assert Image
cover_image = Image.open(io.BytesIO(image_data))
images = [(name, cover_image)]
# check the aspect ratio
# if it's wider than it is high, it's probably a two page spread
# if it's wider than it is high, it's probably a two page spread (back_cover, front_cover)
# if so, crop it and calculate a second hash
narrow_cover_hash = None
aspect_ratio = self.get_aspect_ratio(cover_image_data)
aspect_ratio = float(cover_image.height) / float(cover_image.width)
if aspect_ratio < 1.0:
right_side_image_data = self.crop_cover(cover_image_data)
if right_side_image_data is not None:
narrow_cover_hash = self.calculate_hash(right_side_image_data)
im = self._crop_double_page(cover_image)
if im is not None:
images.append(("double page", im))
keys = self.get_search_keys()
# normalize the issue number, None will return as ""
keys["issue_number"] = IssueString(keys["issue_number"]).as_string()
# Check and remove black borders. Helps in identifying comics with an excessive black border like https://comicvine.gamespot.com/marvel-graphic-novel-1-the-death-of-captain-marvel/4000-21782/
cropped = self._crop_border(cover_image, self.config.Issue_Identifier__border_crop_percent)
if cropped is not None:
images.append(("black border cropped", cropped))
# we need, at minimum, a series and issue number
if not (keys["series"] and keys["issue_number"]):
self.log_msg("Not enough info for a search!")
return []
return images
def _get_images(self, ca: ComicArchive, md: GenericMetadata) -> list[tuple[str, Image.Image]]:
covers: list[tuple[str, Image.Image]] = []
for cover_index in md.get_cover_page_index_list():
image_data = ca.get_page(cover_index)
covers.extend(self._process_cover(f"{cover_index}", image_data))
return covers
def _get_extra_images(self, ca: ComicArchive, md: GenericMetadata) -> list[tuple[str, Image.Image]]:
assert md
covers: list[tuple[str, Image.Image]] = []
for cover_index in range(1, min(3, ca.get_number_of_pages())):
image_data = ca.get_page(cover_index)
covers.extend(self._process_cover(f"{cover_index}", image_data))
return covers
def _get_search_keys(self, md: GenericMetadata) -> Any:
search_keys = SearchKeys(
series=md.series,
issue_number=IssueString(md.issue).as_string(),
alternate_number=IssueString(md.alternate_number).as_string(),
month=md.month,
year=md.year,
issue_count=md.issue_count,
alternate_count=md.alternate_count,
publisher=md.publisher,
imprint=md.imprint,
)
return search_keys
def _get_search_terms(
self, ca: ComicArchive, md: GenericMetadata
) -> tuple[SearchKeys, list[tuple[str, Image.Image]], list[tuple[str, Image.Image]]]:
return self._get_search_keys(md), self._get_images(ca, md), self._get_extra_images(ca, md)
def _user_canceled(self, callback: Callable[..., Any] | None = None, *args: Any) -> Any:
if self.cancel:
raise IssueIdentifierCancelled
if callback is not None:
return callback(*args)
def _print_terms(self, keys: SearchKeys, images: list[tuple[str, Image.Image]]) -> None:
assert keys["series"]
assert keys["issue_number"]
self.log_msg(f"Using {self.talker.name} to search for:")
self.log_msg("\tSeries: " + keys["series"])
self.log_msg("\tIssue: " + keys["issue_number"])
if keys["issue_count"] is not None:
self.log_msg("\tCount: " + str(keys["issue_count"]))
if keys["year"] is not None:
self.log_msg("\tYear: " + str(keys["year"]))
# if keys["alternate_number"] is not None:
# self.log_msg("\tAlternate Issue: " + str(keys["alternate_number"]))
if keys["month"] is not None:
self.log_msg("\tMonth: " + str(keys["month"]))
if keys["year"] is not None:
self.log_msg("\tYear: " + str(keys["year"]))
if keys["issue_count"] is not None:
self.log_msg("\tCount: " + str(keys["issue_count"]))
# if keys["alternate_count"] is not None:
# self.log_msg("\tAlternate Count: " + str(keys["alternate_count"]))
# if keys["publisher"] is not None:
# self.log_msg("\tPublisher: " + str(keys["publisher"]))
# if keys["imprint"] is not None:
# self.log_msg("\tImprint: " + str(keys["imprint"]))
for name, _ in images:
self.log_msg("Cover: " + name)
self.log_msg(f"Searching for {keys['series']} #{keys['issue_number']} ...")
try:
ct_search_results = self.talker.search_for_series(keys["series"])
except TalkerError as e:
self.log_msg(f"Error searching for series.\n{e}")
return []
if self.cancel:
return []
def _filter_series(self, terms: SearchKeys, search_results: list[ComicSeries]) -> list[ComicSeries]:
assert terms["series"]
if ct_search_results is None:
return []
series_second_round_list = []
for item in ct_search_results:
filtered_results = []
for item in search_results:
length_approved = False
publisher_approved = True
date_approved = True
# remove any series that starts after the issue year
if keys["year"] is not None and item.start_year is not None:
if keys["year"] < item.start_year:
if terms["year"] is not None and item.start_year is not None:
if terms["year"] < item.start_year:
date_approved = False
for name in [item.name, *item.aliases]:
if utils.titles_match(keys["series"], name, self.series_match_thresh):
if utils.titles_match(terms["series"], name, self.series_match_thresh):
length_approved = True
break
# remove any series from publishers on the filter
if item.publisher is not None:
publisher = item.publisher
if publisher is not None and publisher.casefold() in self.publisher_filter:
if item.publisher is not None and item.publisher.casefold() in self.publisher_filter:
publisher_approved = False
if length_approved and publisher_approved and date_approved:
series_second_round_list.append(item)
self.log_msg("Searching in " + str(len(series_second_round_list)) + " series")
if self.progress_callback is not None:
self.progress_callback(0, len(series_second_round_list))
# now sort the list by name length
series_second_round_list.sort(key=lambda x: len(x.name), reverse=False)
series_by_id = {series.id: series for series in series_second_round_list}
issue_list = None
try:
if len(series_by_id) > 0:
issue_list = self.talker.fetch_issues_by_series_issue_num_and_year(
list(series_by_id.keys()), keys["issue_number"], keys["year"]
filtered_results.append(item)
else:
logger.debug(
"Filtered out series: '%s' length approved: '%s', publisher approved: '%s', date approved: '%s'",
item.name,
length_approved,
publisher_approved,
date_approved,
)
except TalkerError as e:
self.log_msg(f"Issue with while searching for series details. Aborting...\n{e}")
return []
return filtered_results
if issue_list is None:
return []
def _calculate_hashes(self, images: list[tuple[str, Image.Image]]) -> list[tuple[str, int]]:
hashes = []
for name, image in images:
hashes.append((name, ImageHasher(image=image).average_hash()))
return hashes
shortlist = []
# now re-associate the issues and series
# is this really needed?
for issue in issue_list:
if issue.series_id in series_by_id:
shortlist.append((series_by_id[issue.series_id], issue))
if keys["year"] is None:
self.log_msg(f"Found {len(shortlist)} series that have an issue #{keys['issue_number']}")
else:
self.log_msg(
f"Found {len(shortlist)} series that have an issue #{keys['issue_number']} from {keys['year']}"
)
# now we have a shortlist of series with the desired issue number
# Do first round of cover matching
counter = len(shortlist)
for series, issue in shortlist:
if self.progress_callback is not None:
self.progress_callback(counter, len(shortlist) * 3)
counter += 1
def _match_covers(
self,
terms: SearchKeys,
images: list[tuple[str, Image.Image]],
issues: list[tuple[ComicSeries, GenericMetadata]],
use_alternates: bool,
) -> list[IssueResult]:
assert terms["issue_number"]
match_results: list[IssueResult] = []
hashes = self._calculate_hashes(images)
counter = 0
alternate = ""
if use_alternates:
alternate = " Alternate"
for series, issue in issues:
self._user_canceled(self.progress_callback, counter, len(issues))
counter += 1
self.log_msg(
f"Examining covers for ID: {series.id} {series.name} ({series.start_year}):",
f"Examining{alternate} covers for Series ID: {series.id} {series.name} ({series.start_year}):",
)
# Now check the cover match against the primary image
hash_list = [cover_hash]
if narrow_cover_hash is not None:
hash_list.append(narrow_cover_hash)
cropped_border = self.crop_border(cover_image_data, self.config.Issue_Identifier__border_crop_percent)
if cropped_border is not None:
hash_list.append(self.calculate_hash(cropped_border))
logger.info("Adding cropped cover to the hashlist")
try:
image_url = issue._cover_image or ""
alt_urls = issue._alternate_images
score_item = self.get_issue_cover_match_score(
image_url, alt_urls, hash_list, use_remote_alternates=False
)
score_item = self._get_issue_cover_match_score(image_url, alt_urls, hashes, use_alt_urls=use_alternates)
except Exception:
logger.exception("Scoring series failed")
self.match_list = []
return self.match_list
logger.exception(f"Scoring series{alternate} covers failed")
return []
match = IssueResult(
series=f"{series.name} ({series.start_year})",
distance=score_item["score"],
issue_number=keys["issue_number"],
cv_issue_count=series.count_of_issues,
url_image_hash=score_item["hash"],
issue_number=terms["issue_number"],
issue_count=series.count_of_issues,
url_image_hash=score_item["remote_hash"],
issue_title=issue.title or "",
issue_id=issue.issue_id or "",
series_id=series.id,
@ -543,142 +493,188 @@ class IssueIdentifier:
if series.publisher is not None:
match.publisher = series.publisher
self.match_list.append(match)
match_results.append(match)
self.log_msg(f"best score {match.distance:03}")
self.log_msg("")
return match_results
if len(self.match_list) == 0:
def _print_match(self, item: IssueResult) -> None:
self.log_msg(
"-----> {} #{} {} ({}/{}) -- score: {}".format(
item.series,
item.issue_number,
item.issue_title,
item.month,
item.year,
item.distance,
)
)
def _search_for_issues(self, terms: SearchKeys) -> list[tuple[ComicSeries, GenericMetadata]]:
try:
search_results = self.talker.search_for_series(
terms["series"],
callback=lambda x, y: self._user_canceled(self.progress_callback, x, y),
series_match_thresh=self.config.Issue_Identifier__series_match_search_thresh,
)
except TalkerError as e:
self.log_msg(f"Error searching for series.\n{e}")
return []
# except IssueIdentifierCancelled:
# return []
if not search_results:
return []
filtered_series = self._filter_series(terms, search_results)
if not filtered_series:
return []
self.log_msg(f"Searching in {len(filtered_series)} series")
self._user_canceled(self.progress_callback, 0, len(filtered_series))
series_by_id = {series.id: series for series in filtered_series}
try:
talker_result = self.talker.fetch_issues_by_series_issue_num_and_year(
list(series_by_id.keys()), terms["issue_number"], terms["year"]
)
except TalkerError as e:
self.log_msg(f"Issue with while searching for series details. Aborting...\n{e}")
return []
# except IssueIdentifierCancelled:
# return []
if not talker_result:
return []
self._user_canceled(self.progress_callback, 0, 0)
issues: list[tuple[ComicSeries, GenericMetadata]] = []
# now re-associate the issues and series
for issue in talker_result:
if issue.series_id in series_by_id:
issues.append((series_by_id[issue.series_id], issue))
else:
logger.warning("Talker '%s' is returning arbitrary series when searching by id", self.talker.id)
return issues
def _cover_matching(
self,
terms: SearchKeys,
images: list[tuple[str, Image.Image]],
extra_images: list[tuple[str, Image.Image]],
issues: list[tuple[ComicSeries, GenericMetadata]],
) -> list[IssueResult]:
cover_matching_1 = self._match_covers(terms, images, issues, use_alternates=False)
if len(cover_matching_1) == 0:
self.log_msg(":-( no matches!")
self.search_result = self.result_no_matches
return self.match_list
return cover_matching_1
# sort list by image match scores
self.match_list.sort(key=attrgetter("distance"))
cover_matching_1.sort(key=attrgetter("distance"))
lst = []
for i in self.match_list:
for i in cover_matching_1:
lst.append(i.distance)
self.log_msg(f"Compared to covers in {len(self.match_list)} issue(s): {lst}")
self.log_msg(f"Compared to covers in {len(cover_matching_1)} issue(s): {lst}")
def print_match(item: IssueResult) -> None:
self.log_msg(
"-----> {} #{} {} ({}/{}) -- score: {}".format(
item.series,
item.issue_number,
item.issue_title,
item.month,
item.year,
item.distance,
)
)
best_score: int = self.match_list[0].distance
if best_score >= self.min_score_thresh:
cover_matching_2 = []
final_cover_matching = cover_matching_1
if cover_matching_1[0].distance >= self.min_score_thresh:
# we have 1 or more low-confidence matches (all bad cover scores)
# look at a few more pages in the archive, and also alternate covers online
self.log_msg("Very weak scores for the cover. Analyzing alternate pages and covers...")
hash_list = [cover_hash]
if narrow_cover_hash is not None:
hash_list.append(narrow_cover_hash)
for page_index in range(1, min(3, ca.get_number_of_pages())):
image_data = ca.get_page(page_index)
page_hash = self.calculate_hash(image_data)
hash_list.append(page_hash)
second_match_list = []
counter = 2 * len(self.match_list)
for m in self.match_list:
if self.progress_callback is not None:
self.progress_callback(counter, len(self.match_list) * 3)
counter += 1
self.log_msg(f"Examining alternate covers for ID: {m.series_id} {m.series}:")
try:
score_item = self.get_issue_cover_match_score(
m.image_url,
m.alt_image_urls,
hash_list,
use_remote_alternates=True,
)
except Exception:
logger.exception("failed examining alt covers")
self.match_list = []
return self.match_list
self.log_msg(f"--->{score_item['score']}")
self.log_msg("")
temp = self._match_covers(terms, images + extra_images, issues, use_alternates=True)
for score in temp:
if score.distance < self.min_alternate_score_thresh:
cover_matching_2.append(score)
if score_item["score"] < self.min_alternate_score_thresh:
second_match_list.append(m)
m.distance = score_item["score"]
if len(cover_matching_2) > 0:
# We did good, found something!
self.log_msg("Success in secondary/alternate cover matching!")
if len(second_match_list) == 0:
if len(self.match_list) == 1:
self.log_msg("No matching pages in the issue.")
self.log_msg("--------------------------------------------------------------------------")
print_match(self.match_list[0])
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_found_match_but_bad_cover_score
else:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("Multiple bad cover matches! Need to use other info...")
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_multiple_matches_with_bad_image_scores
return self.match_list
# We did good, found something!
self.log_msg("Success in secondary/alternate cover matching!")
self.match_list = second_match_list
# sort new list by image match scores
self.match_list.sort(key=attrgetter("distance"))
best_score = self.match_list[0].distance
self.log_msg("[Second round cover matching: best score = {best_score}]")
# now drop down into the rest of the processing
if self.progress_callback is not None:
self.progress_callback(99, 100)
final_cover_matching = cover_matching_2
# sort new list by image match scores
final_cover_matching.sort(key=attrgetter("distance"))
self.log_msg("[Second round cover matching: best score = {best_score}]")
# now drop down into the rest of the processing
best_score = final_cover_matching[0].distance
# now pare down list, remove any item more than specified distant from the top scores
for match_item in reversed(self.match_list):
if match_item.distance > best_score + self.min_score_distance:
self.match_list.remove(match_item)
for match_item in reversed(final_cover_matching):
if match_item.distance > (best_score + self.min_score_distance):
final_cover_matching.remove(match_item)
return final_cover_matching
def identify(self, ca: ComicArchive, md: GenericMetadata) -> tuple[int, list[IssueResult]]:
if not self._check_requirements(ca):
return self.result_no_matches, []
terms, images, extra_images = self._get_search_terms(ca, md)
# we need, at minimum, a series and issue number
if not (terms["series"] and terms["issue_number"]):
self.log_msg("Not enough info for a search!")
return self.result_no_matches, []
self._print_terms(terms, images)
issues = self._search_for_issues(terms)
self.log_msg(f"Found {len(issues)} series that have an issue #{terms['issue_number']}")
final_cover_matching = self._cover_matching(terms, images, extra_images, issues)
# One more test for the case choosing limited series first issue vs a trade with the same cover:
# if we have a given issue count > 1 and the series from CV has count==1, remove it from match list
if len(self.match_list) >= 2 and keys["issue_count"] is not None and keys["issue_count"] != 1:
new_list = []
for match in self.match_list:
if match.cv_issue_count != 1:
new_list.append(match)
else:
if len(final_cover_matching) > 1 and terms["issue_count"] is not None and terms["issue_count"] != 1:
for match in final_cover_matching.copy():
if match.issue_count == 1:
self.log_msg(
f"Removing series {match.series} [{match.series_id}] from consideration (only 1 issue)"
)
final_cover_matching.remove(match)
if len(new_list) > 0:
self.match_list = new_list
if len(self.match_list) == 1:
self.log_msg("--------------------------------------------------------------------------")
print_match(self.match_list[0])
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_one_good_match
elif len(self.match_list) == 0:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("No matches found :(")
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_no_matches
best_score = final_cover_matching[0].distance
if best_score >= self.min_score_thresh:
if len(final_cover_matching) == 1:
self.log_msg("No matching pages in the issue.")
self.log_msg("--------------------------------------------------------------------------")
self._print_match(final_cover_matching[0])
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_found_match_but_bad_cover_score
else:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("Multiple bad cover matches! Need to use other info...")
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_multiple_matches_with_bad_image_scores
else:
# we've got multiple good matches:
self.log_msg("More than one likely candidate.")
self.search_result = self.result_multiple_good_matches
self.log_msg("--------------------------------------------------------------------------")
for match_item in self.match_list:
print_match(match_item)
self.log_msg("--------------------------------------------------------------------------")
if len(final_cover_matching) == 1:
self.log_msg("--------------------------------------------------------------------------")
self._print_match(final_cover_matching[0])
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_one_good_match
return self.match_list
elif len(self.match_list) == 0:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("No matches found :(")
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_no_matches
else:
# we've got multiple good matches:
self.log_msg("More than one likely candidate.")
search_result = self.result_multiple_good_matches
self.log_msg("--------------------------------------------------------------------------")
for match_item in final_cover_matching:
self._print_match(match_item)
self.log_msg("--------------------------------------------------------------------------")
return search_result, final_cover_matching

View File

@ -84,7 +84,7 @@ class RenameWindow(QtWidgets.QDialog):
md = ca.read_metadata(self.data_style)
if md.is_empty:
md = ca.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,

View File

@ -2,58 +2,18 @@ from __future__ import annotations
import dataclasses
import pathlib
import sys
from enum import Enum, auto
from typing import Any
from enum import auto
from comicapi import utils
from comicapi.genericmetadata import GenericMetadata
if sys.version_info < (3, 11):
class StrEnum(str, Enum):
"""
Enum where members are also (and must be) strings
"""
def __new__(cls, *values: Any) -> Any:
"values must already be of type `str`"
if len(values) > 3:
raise TypeError(f"too many arguments for str(): {values!r}")
if len(values) == 1:
# it must be a string
if not isinstance(values[0], str):
raise TypeError(f"{values[0]!r} is not a string")
if len(values) >= 2:
# check that encoding argument is a string
if not isinstance(values[1], str):
raise TypeError(f"encoding must be a string, not {values[1]!r}")
if len(values) == 3:
# check that errors argument is a string
if not isinstance(values[2], str):
raise TypeError("errors must be a string, not %r" % (values[2]))
value = str(*values)
member = str.__new__(cls, value)
member._value_ = value
return member
@staticmethod
def _generate_next_value_(name: str, start: int, count: int, last_values: Any) -> str:
"""
Return the lower-cased version of the member name.
"""
return name.lower()
else:
from enum import StrEnum
@dataclasses.dataclass
class IssueResult:
series: str
distance: int
issue_number: str
cv_issue_count: int | None
issue_count: int | None
url_image_hash: int
issue_title: str
issue_id: str
@ -69,7 +29,7 @@ class IssueResult:
return f"series: {self.series}; series id: {self.series_id}; issue number: {self.issue_number}; issue id: {self.issue_id}; published: {self.month} {self.year}"
class Action(StrEnum):
class Action(utils.StrEnum):
print = auto()
delete = auto()
copy = auto()
@ -80,14 +40,14 @@ class Action(StrEnum):
list_plugins = auto()
class MatchStatus(StrEnum):
class MatchStatus(utils.StrEnum):
good_match = auto()
no_match = auto()
multiple_match = auto()
low_confidence_match = auto()
class Status(StrEnum):
class Status(utils.StrEnum):
success = auto()
match_failure = auto()
write_failure = auto()

View File

@ -33,6 +33,7 @@ from comictaggerlib.issueidentifier import IssueIdentifier
from comictaggerlib.issueselectionwindow import IssueSelectionWindow
from comictaggerlib.matchselectionwindow import MatchSelectionWindow
from comictaggerlib.progresswindow import IDProgressWindow
from comictaggerlib.resulttypes import IssueResult
from comictaggerlib.ui import qtutils, ui_path
from comictaggerlib.ui.qtutils import new_web_view, reduce_widget_font_size
from comictalker.comictalker import ComicTalker, TalkerError
@ -76,15 +77,17 @@ class SearchThread(QtCore.QThread):
class IdentifyThread(QtCore.QThread):
identifyComplete = pyqtSignal()
identifyComplete = pyqtSignal((int, list))
identifyLogMsg = pyqtSignal(str)
identifyProgress = pyqtSignal(int, int)
def __init__(self, identifier: IssueIdentifier) -> None:
def __init__(self, identifier: IssueIdentifier, ca: ComicArchive, md: GenericMetadata) -> None:
QtCore.QThread.__init__(self)
self.identifier = identifier
self.identifier.set_output_function(self.log_output)
self.identifier.set_progress_callback(self.progress_callback)
self.ca = ca
self.md = md
def log_output(self, text: str) -> None:
self.identifyLogMsg.emit(str(text))
@ -93,8 +96,7 @@ class IdentifyThread(QtCore.QThread):
self.identifyProgress.emit(cur, total)
def run(self) -> None:
self.identifier.search()
self.identifyComplete.emit()
self.identifyComplete.emit(*self.identifier.identify(self.ca, self.md))
class SeriesSelectionWindow(QtWidgets.QDialog):
@ -245,12 +247,7 @@ class SeriesSelectionWindow(QtWidgets.QDialog):
md.year = self.year
md.issue_count = self.issue_count
self.ii.set_additional_metadata(md)
self.ii.only_use_additional_meta_data = True
self.ii.cover_page_index = int(self.cover_index_list[0])
self.id_thread = IdentifyThread(self.ii)
self.id_thread = IdentifyThread(self.ii, self.comic_archive, md)
self.id_thread.identifyComplete.connect(self.identify_complete)
self.id_thread.identifyLogMsg.connect(self.log_id_output)
self.id_thread.identifyProgress.connect(self.identify_progress)
@ -276,35 +273,33 @@ class SeriesSelectionWindow(QtWidgets.QDialog):
if self.ii is not None:
self.ii.cancel = True
def identify_complete(self) -> None:
if self.ii is not None and self.iddialog is not None and self.comic_archive is not None:
matches = self.ii.match_list
result = self.ii.search_result
def identify_complete(self, result: int, issues: list[IssueResult]) -> None:
if self.iddialog is not None and self.comic_archive is not None:
found_match = None
choices = False
if result == self.ii.result_no_matches:
QtWidgets.QMessageBox.information(self, "Auto-Select Result", " No matches found :-(")
elif result == self.ii.result_found_match_but_bad_cover_score:
if result == IssueIdentifier.result_no_matches:
QtWidgets.QMessageBox.information(self, "Auto-Select Result", " No issues found :-(")
elif result == IssueIdentifier.result_found_match_but_bad_cover_score:
QtWidgets.QMessageBox.information(
self,
"Auto-Select Result",
" Found a match, but cover doesn't seem the same. Verify before committing!",
)
found_match = matches[0]
elif result == self.ii.result_found_match_but_not_first_page:
found_match = issues[0]
elif result == IssueIdentifier.result_found_match_but_not_first_page:
QtWidgets.QMessageBox.information(
self, "Auto-Select Result", " Found a match, but not with the first page of the archive."
)
found_match = matches[0]
elif result == self.ii.result_multiple_matches_with_bad_image_scores:
found_match = issues[0]
elif result == IssueIdentifier.result_multiple_matches_with_bad_image_scores:
QtWidgets.QMessageBox.information(
self, "Auto-Select Result", " Found some possibilities, but no confidence. Proceed manually."
)
choices = True
elif result == self.ii.result_one_good_match:
found_match = matches[0]
elif result == self.ii.result_multiple_good_matches:
elif result == IssueIdentifier.result_one_good_match:
found_match = issues[0]
elif result == IssueIdentifier.result_multiple_good_matches:
QtWidgets.QMessageBox.information(
self, "Auto-Select Result", " Found multiple likely matches. Please select."
)
@ -312,7 +307,7 @@ class SeriesSelectionWindow(QtWidgets.QDialog):
if choices:
selector = MatchSelectionWindow(
self, matches, self.comic_archive, talker=self.talker, config=self.config
self, issues, self.comic_archive, talker=self.talker, config=self.config
)
selector.setModal(True)
selector.exec()

View File

@ -29,9 +29,11 @@ from PyQt5 import QtCore, QtGui, QtWidgets, uic
import comictaggerlib.ui.talkeruigenerator
from comicapi import utils
from comicapi.archivers.archiver import Archiver
from comicapi.genericmetadata import md_test
from comictaggerlib import ctsettings
from comictaggerlib.ctsettings import ct_ns
from comictaggerlib.ctsettings.plugin import group_for_plugin
from comictaggerlib.filerenamer import FileRenamer, Replacement, Replacements
from comictaggerlib.ui import ui_path
from comictalker.comictalker import ComicTalker
@ -155,7 +157,6 @@ class SettingsWindow(QtWidgets.QDialog):
self.lblRarHelp.setText(linuxRarHelp)
elif platform.system() == "Darwin":
self.leRarExePath.setReadOnly(False)
self.lblRarHelp.setText(macRarHelp)
self.name = "Preferences"
@ -191,6 +192,8 @@ class SettingsWindow(QtWidgets.QDialog):
self.sources = comictaggerlib.ui.talkeruigenerator.generate_source_option_tabs(
self.tComicTalkers, self.config, self.talkers
)
self.cbFilenameParser.clear()
self.cbFilenameParser.addItems(utils.Parser)
self.connect_signals()
self.settings_to_form()
self.rename_test()
@ -208,7 +211,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.btnTemplateHelp.clicked.connect(self.show_template_help)
self.cbxMoveFiles.clicked.connect(self.dir_test)
self.leDirectory.textEdited.connect(self.dir_test)
self.cbxComplicatedParser.clicked.connect(self.switch_parser)
self.cbFilenameParser.currentIndexChanged.connect(self.switch_parser)
self.btnAddLiteralReplacement.clicked.connect(self.addLiteralReplacement)
self.btnAddValueReplacement.clicked.connect(self.addValueReplacement)
@ -243,7 +246,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.btnResetSettings.clicked.disconnect()
self.btnTemplateHelp.clicked.disconnect()
self.cbxChangeExtension.clicked.disconnect()
self.cbxComplicatedParser.clicked.disconnect()
self.cbFilenameParser.currentIndexChanged.disconnect()
self.cbxMoveFiles.clicked.disconnect()
self.cbxRenameStrict.clicked.disconnect()
self.cbxSmartCleanup.clicked.disconnect()
@ -272,9 +275,10 @@ class SettingsWindow(QtWidgets.QDialog):
self._filename_parser_test(self.leFilenameParserTest.text())
def _filename_parser_test(self, filename: str) -> None:
self.cbFilenameParser: QtWidgets.QComboBox
filename_info = utils.parse_filename(
filename=filename,
complicated_parser=self.cbxComplicatedParser.isChecked(),
parser=utils.Parser(self.cbFilenameParser.currentText()),
remove_c2c=self.cbxRemoveC2C.isChecked(),
remove_fcbd=self.cbxRemoveFCBD.isChecked(),
remove_publisher=self.cbxRemovePublisher.isChecked(),
@ -357,18 +361,22 @@ class SettingsWindow(QtWidgets.QDialog):
self.lblRenameTest.setText(str(e))
def switch_parser(self) -> None:
complicated = self.cbxComplicatedParser.isChecked()
currentParser = utils.Parser(self.cbFilenameParser.currentText())
complicated = currentParser == utils.Parser.COMPLICATED
self.cbxRemoveC2C.setEnabled(complicated)
self.cbxRemoveFCBD.setEnabled(complicated)
self.cbxRemovePublisher.setEnabled(complicated)
self.cbxProtofoliusIssueNumberScheme.setEnabled(complicated)
self.cbxAllowIssueStartWithLetter.setEnabled(complicated)
self.filename_parser_test()
def settings_to_form(self) -> None:
self.disconnect_signals()
# Copy values from settings to form
if "archiver" in self.config[1] and "rar" in self.config[1]["archiver"].v:
self.leRarExePath.setText(getattr(self.config[0], self.config[1]["archiver"].v["rar"].internal_name))
archive_group = group_for_plugin(Archiver)
if archive_group in self.config[1] and "rar" in self.config[1][archive_group].v:
self.leRarExePath.setText(getattr(self.config[0], self.config[1][archive_group].v["rar"].internal_name))
else:
self.leRarExePath.setEnabled(False)
self.sbNameMatchIdentifyThresh.setValue(self.config[0].Issue_Identifier__series_match_identify_thresh)
@ -378,7 +386,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.cbxCheckForNewVersion.setChecked(self.config[0].General__check_for_new_version)
self.cbxShortMetadataNames.setChecked(self.config[0].General__use_short_metadata_names)
self.cbxComplicatedParser.setChecked(self.config[0].Filename_Parsing__complicated_parser)
self.cbFilenameParser.setCurrentText(self.config[0].Filename_Parsing__filename_parser)
self.cbxRemoveC2C.setChecked(self.config[0].Filename_Parsing__remove_c2c)
self.cbxRemoveFCBD.setChecked(self.config[0].Filename_Parsing__remove_fcbd)
self.cbxRemovePublisher.setChecked(self.config[0].Filename_Parsing__remove_publisher)
@ -482,11 +490,12 @@ class SettingsWindow(QtWidgets.QDialog):
)
# Copy values from form to settings and save
if "archiver" in self.config[1] and "rar" in self.config[1]["archiver"].v:
setattr(self.config[0], self.config[1]["archiver"].v["rar"].internal_name, str(self.leRarExePath.text()))
archive_group = group_for_plugin(Archiver)
if archive_group in self.config[1] and "rar" in self.config[1][archive_group].v:
setattr(self.config[0], self.config[1][archive_group].v["rar"].internal_name, str(self.leRarExePath.text()))
# make sure rar program is now in the path for the rar class
if self.config[0].archiver_rar: # type: ignore[attr-defined]
if self.config[0].Archive__rar:
utils.add_to_path(os.path.dirname(str(self.leRarExePath.text())))
if not str(self.leIssueNumPadding.text()).isdigit():
@ -504,7 +513,7 @@ class SettingsWindow(QtWidgets.QDialog):
self.config[0].Issue_Identifier__series_match_search_thresh = self.sbNameMatchSearchThresh.value()
self.config[0].Issue_Identifier__publisher_filter = utils.split(self.tePublisherFilter.toPlainText(), "\n")
self.config[0].Filename_Parsing__complicated_parser = self.cbxComplicatedParser.isChecked()
self.config[0].Filename_Parsing__filename_parser = utils.Parser(self.cbFilenameParser.currentText())
self.config[0].Filename_Parsing__remove_c2c = self.cbxRemoveC2C.isChecked()
self.config[0].Filename_Parsing__remove_fcbd = self.cbxRemoveFCBD.isChecked()
self.config[0].Filename_Parsing__remove_publisher = self.cbxRemovePublisher.isChecked()

View File

@ -27,11 +27,12 @@ import sys
import webbrowser
from datetime import datetime
from typing import Any, Callable
from urllib.parse import urlparse
import natsort
import settngs
import urllib3.util
from PyQt5 import QtCore, QtGui, QtNetwork, QtWidgets, uic
from urllib3.util.url import LocationParseError
import comictaggerlib.ui
from comicapi import utils
@ -112,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
"alternate_count": self.leAltIssueCount,
"imprint": self.leImprint,
"notes": self.teNotes,
"web_link": self.leWebLink,
"web_links": (self.leWebLink, self.btnOpenWebLink, self.btnAddWebLink, self.btnRemoveWebLink),
"format": self.cbFormat,
"manga": self.cbManga,
"black_and_white": self.cbBW,
@ -124,7 +125,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
"characters": self.teCharacters,
"teams": self.teTeams,
"locations": self.teLocations,
"credits": [self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit],
"credits": (self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit),
"credits.person": 2,
"credits.role": 1,
"credits.primary": 0,
@ -532,6 +533,31 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.toolBar.addAction(self.actionPageBrowser)
self.toolBar.addAction(self.actionAutoImprint)
self.leWebLink.addAction(self.actionAddWebLink)
self.leWebLink.addAction(self.actionRemoveWebLink)
self.actionAddWebLink.triggered.connect(self.add_weblink_item)
self.actionRemoveWebLink.triggered.connect(self.remove_weblink_item)
def add_weblink_item(self, url: str = "") -> None:
item = ""
if isinstance(url, str):
item = url
self.leWebLink.addItem(item)
self.leWebLink.item(self.leWebLink.count() - 1).setFlags(
QtCore.Qt.ItemFlag.ItemIsEditable
| QtCore.Qt.ItemFlag.ItemIsEnabled
| QtCore.Qt.ItemFlag.ItemIsDragEnabled
| QtCore.Qt.ItemFlag.ItemIsSelectable
)
self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True)
if not url:
self.leWebLink.editItem(self.leWebLink.item(self.leWebLink.count() - 1))
def remove_weblink_item(self) -> None:
item = self.leWebLink.takeItem(self.leWebLink.currentRow())
del item
def repackage_archive(self) -> None:
ca_list = self.fileSelectionList.get_selected_archive_list()
non_zip_count = 0
@ -784,6 +810,8 @@ class TaggerWindow(QtWidgets.QMainWindow):
widget.currentIndexChanged.connect(self.set_dirty_flag)
if isinstance(widget, QtWidgets.QCheckBox):
widget.stateChanged.connect(self.set_dirty_flag)
if isinstance(widget, QtWidgets.QListWidget):
widget.itemChanged.connect(self.set_dirty_flag)
# recursive call on children
for child in widget.children():
@ -844,7 +872,9 @@ class TaggerWindow(QtWidgets.QMainWindow):
assign_text(self.leAltSeries, md.alternate_series)
assign_text(self.leAltIssueNum, md.alternate_number)
assign_text(self.leAltIssueCount, md.alternate_count)
assign_text(self.leWebLink, md.web_link)
self.leWebLink.clear()
for u in md.web_links:
self.add_weblink_item(u.url)
assign_text(self.teCharacters, "\n".join(md.characters))
assign_text(self.teTeams, "\n".join(md.teams))
assign_text(self.teLocations, "\n".join(md.locations))
@ -967,7 +997,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
md.scan_info = utils.xlate(self.leScanInfo.text())
md.series_groups = utils.split(self.leSeriesGroup.text(), ",")
md.alternate_series = self.leAltSeries.text()
md.web_link = utils.xlate(self.leWebLink.text())
md.web_links = [urllib3.util.parse_url(self.leWebLink.item(i).text()) for i in range(self.leWebLink.count())]
md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n"))
md.teams = set(utils.split(self.teTeams.toPlainText(), "\n"))
md.locations = set(utils.split(self.teLocations.toPlainText(), "\n"))
@ -1004,7 +1034,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
# copy the form onto metadata object
self.form_to_metadata()
new_metadata = self.comic_archive.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,
@ -1044,6 +1074,9 @@ class TaggerWindow(QtWidgets.QMainWindow):
dialog.setNameFilters(filters)
dialog.setFileMode(QtWidgets.QFileDialog.FileMode.ExistingFiles)
if os.environ.get("XDG_SESSION_DESKTOP", "") == "KDE":
dialog.setOption(QtWidgets.QFileDialog.Option.DontUseNativeDialog)
if self.config[0].internal__last_opened_folder is not None:
dialog.setDirectory(self.config[0].internal__last_opened_folder)
return dialog
@ -1144,40 +1177,44 @@ class TaggerWindow(QtWidgets.QMainWindow):
def commit_metadata(self) -> None:
if self.metadata is not None and self.comic_archive is not None:
reply = QtWidgets.QMessageBox.question(
self,
"Save Tags",
f"Are you sure you wish to save {', '.join([metadata_styles[style].name() for style in self.save_data_styles])} tags to this archive?",
QtWidgets.QMessageBox.StandardButton.Yes,
QtWidgets.QMessageBox.StandardButton.No,
)
if self.config[0].General__prompt_on_save:
reply = QtWidgets.QMessageBox.question(
self,
"Save Tags",
f"Are you sure you wish to save {', '.join([metadata_styles[style].name() for style in self.save_data_styles])} tags to this archive?",
QtWidgets.QMessageBox.StandardButton.Yes,
QtWidgets.QMessageBox.StandardButton.No,
)
else:
reply = QtWidgets.QMessageBox.StandardButton.Yes
if reply == QtWidgets.QMessageBox.StandardButton.Yes:
QtWidgets.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.CursorShape.WaitCursor))
self.form_to_metadata()
if reply != QtWidgets.QMessageBox.StandardButton.Yes:
return
QtWidgets.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.CursorShape.WaitCursor))
self.form_to_metadata()
failed_style: str = ""
# Save each style
for style in self.save_data_styles:
success = self.comic_archive.write_metadata(self.metadata, style)
if not success:
failed_style = metadata_styles[style].name()
break
failed_style: str = ""
# Save each style
for style in self.save_data_styles:
success = self.comic_archive.write_metadata(self.metadata, style)
if not success:
failed_style = metadata_styles[style].name()
break
self.comic_archive.load_cache(list(metadata_styles))
QtWidgets.QApplication.restoreOverrideCursor()
self.comic_archive.load_cache(list(metadata_styles))
QtWidgets.QApplication.restoreOverrideCursor()
if failed_style:
QtWidgets.QMessageBox.warning(
self,
"Save failed",
f"The tag save operation seemed to fail for: {failed_style}",
)
else:
self.clear_dirty_flag()
self.update_info_box()
self.update_menus()
self.fileSelectionList.update_current_row()
if failed_style:
QtWidgets.QMessageBox.warning(
self,
"Save failed",
f"The tag save operation seemed to fail for: {failed_style}",
)
else:
self.clear_dirty_flag()
self.update_info_box()
self.update_menus()
self.fileSelectionList.update_current_row()
self.metadata = self.comic_archive.read_metadata(self.load_data_style)
self.update_ui_for_archive()
@ -1336,14 +1373,17 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.set_dirty_flag()
def open_web_link(self) -> None:
if self.leWebLink is not None:
web_link = self.leWebLink.text().strip()
try:
result = urlparse(web_link)
all([result.scheme in ["http", "https"], result.netloc])
webbrowser.open_new_tab(web_link)
except ValueError:
QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid."))
row = self.leWebLink.currentRow()
if row < 0:
if self.leWebLink.count() < 1:
return
row = 0
web_link = self.leWebLink.item(row).text()
try:
urllib3.util.parse_url(web_link)
webbrowser.open_new_tab(web_link)
except LocationParseError:
QtWidgets.QMessageBox.warning(self, "Web Link", "Web Link is invalid.")
def show_settings(self) -> None:
settingswin = SettingsWindow(self, self.config, self.talkers)
@ -1725,7 +1765,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
logger.error("Failed to load metadata for %s: %s", ca.path, e)
if md.is_empty:
md = ca.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__filename_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,
@ -1752,17 +1792,13 @@ class TaggerWindow(QtWidgets.QMainWindow):
md.issue = "1"
else:
md.issue = utils.xlate(md.volume)
ii.set_additional_metadata(md)
ii.only_use_additional_meta_data = True
ii.set_output_function(self.auto_tag_log)
ii.cover_page_index = md.get_cover_page_index_list()[0]
if self.atprogdialog is not None:
ii.set_cover_url_callback(self.atprogdialog.set_test_image)
ii.set_name_series_match_threshold(dlg.name_length_match_tolerance)
matches: list[IssueResult] = ii.search()
result = ii.search_result
result, matches = ii.identify(ca, md)
found_match = False
choices = False

View File

@ -6,6 +6,7 @@ import io
import logging
import traceback
import webbrowser
from collections.abc import Sequence
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QWidget
@ -155,7 +156,7 @@ if qt_available:
active_palette = None
def enable_widget(widget: QtWidgets.QWidget | list[QtWidgets.QWidget], enable: bool) -> None:
if isinstance(widget, list):
if isinstance(widget, Sequence):
for w in widget:
_enable_widget(w, enable)
else:
@ -214,6 +215,8 @@ if qt_available:
widget.setReadOnly(True)
widget.setPalette(inactive_palette[0])
elif isinstance(widget, QtWidgets.QListWidget):
inactive_palette = palettes()
widget.setPalette(inactive_palette[0])
widget.setMovement(QtWidgets.QListWidget.Static)
def replaceWidget(

View File

@ -41,64 +41,6 @@
<string/>
</property>
<layout class="QGridLayout" name="gridLayout_4">
<item row="2" column="0">
<widget class="QPushButton" name="btnResetSettings">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Default Settings</string>
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QPushButton" name="btnClearCache">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Clear Cache</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLabel" name="label_2">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>If you need to free up the disk space, or the responses seems out of date, clear the online cache.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QLabel" name="lblDefaultSettings">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Revert to default settings</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
<item row="0" column="0" colspan="2">
<widget class="QCheckBox" name="cbxCheckForNewVersion">
<property name="text">
@ -116,6 +58,77 @@
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QPushButton" name="btnResetSettings">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Default Settings</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QPushButton" name="btnClearCache">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Clear Cache</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QLabel" name="lblDefaultSettings">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Revert to default settings</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="buddy">
<cstring>btnResetSettings</cstring>
</property>
</widget>
</item>
<item row="4" column="1">
<widget class="QLabel" name="label_2">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>If you need to free up the disk space, or the responses seems out of date, clear the online cache.</string>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
<property name="buddy">
<cstring>btnClearCache</cstring>
</property>
</widget>
</item>
<item row="2" column="0">
<widget class="QCheckBox" name="cbxPromptOnSave">
<property name="text">
<string>Prompts the user to confirm saving tags</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
@ -166,6 +179,9 @@
<property name="text">
<string>Default Name Match Ratio Threshold: Search:</string>
</property>
<property name="buddy">
<cstring>sbNameMatchSearchThresh</cstring>
</property>
</widget>
</item>
<item row="1" column="0">
@ -176,6 +192,9 @@
<property name="alignment">
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
</property>
<property name="buddy">
<cstring>sbNameMatchIdentifyThresh</cstring>
</property>
</widget>
</item>
<item row="2" column="0">
@ -183,6 +202,9 @@
<property name="text">
<string>Always use Publisher Filter on &quot;manual&quot; searches:</string>
</property>
<property name="buddy">
<cstring>cbxUseFilter</cstring>
</property>
</widget>
</item>
<item row="2" column="1">
@ -200,6 +222,9 @@
<property name="text">
<string>Publisher Filter:</string>
</property>
<property name="buddy">
<cstring>tePublisherFilter</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
@ -301,9 +326,19 @@
<widget class="QGroupBox" name="groupBox_2">
<layout class="QVBoxLayout" name="verticalLayout_7">
<item>
<widget class="QCheckBox" name="cbxComplicatedParser">
<widget class="QLabel" name="lblFilenamearser">
<property name="text">
<string>Use &quot;Complicated&quot; Parser</string>
<string>Select the filename parser</string>
</property>
<property name="buddy">
<cstring>cbFilenameParser</cstring>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="cbFilenameParser">
<property name="insertPolicy">
<enum>QComboBox::NoInsert</enum>
</property>
</widget>
</item>
@ -539,6 +574,9 @@
<property name="text">
<string>Template:</string>
</property>
<property name="buddy">
<cstring>leRenameTemplate</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
@ -569,6 +607,9 @@
<property name="text">
<string>Issue # Zero Padding</string>
</property>
<property name="buddy">
<cstring>leIssueNumPadding</cstring>
</property>
</widget>
</item>
<item row="3" column="1">
@ -622,6 +663,9 @@
<property name="text">
<string>Destination Directory:</string>
</property>
<property name="buddy">
<cstring>leDirectory</cstring>
</property>
</widget>
</item>
<item row="9" column="1">
@ -727,6 +771,9 @@
<property name="text">
<string>Value Text Replacements</string>
</property>
<property name="buddy">
<cstring>twValueReplacements</cstring>
</property>
</widget>
</item>
<item row="1" column="0" colspan="2">
@ -734,6 +781,9 @@
<property name="text">
<string>Literal Text Replacements</string>
</property>
<property name="buddy">
<cstring>twLiteralReplacements</cstring>
</property>
</widget>
</item>
</layout>
@ -769,6 +819,9 @@
<property name="text">
<string>RAR program</string>
</property>
<property name="buddy">
<cstring>leRarExePath</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
@ -780,7 +833,7 @@
</sizepolicy>
</property>
<property name="readOnly">
<bool>true</bool>
<bool>false</bool>
</property>
</widget>
</item>

View File

@ -927,6 +927,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
<item row="1" column="0">
@ -941,6 +944,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
<item row="2" column="0">
@ -952,10 +958,32 @@
</item>
<item row="2" column="1">
<layout class="QGridLayout" name="gridLayout_7">
<item row="0" column="0">
<widget class="QLineEdit" name="leWebLink">
<property name="acceptDrops">
<bool>false</bool>
<item row="0" column="0" rowspan="3">
<widget class="QListWidget" name="leWebLink">
<property name="contextMenuPolicy">
<enum>Qt::ActionsContextMenu</enum>
</property>
<property name="dragEnabled">
<bool>true</bool>
</property>
<property name="dragDropMode">
<enum>QAbstractItemView::DropOnly</enum>
</property>
<property name="defaultDropAction">
<enum>Qt::MoveAction</enum>
</property>
<property name="alternatingRowColors">
<bool>true</bool>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::SingleSelection</enum>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QPushButton" name="btnRemoveWebLink">
<property name="text">
<string>Delete Item</string>
</property>
</widget>
</item>
@ -975,6 +1003,13 @@
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QPushButton" name="btnAddWebLink">
<property name="text">
<string>Add Item</string>
</property>
</widget>
</item>
</layout>
</item>
<item row="4" column="0">
@ -1053,6 +1088,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
<item row="2" column="0">
@ -1079,6 +1117,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
<item row="3" column="0">
@ -1105,6 +1146,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
<item row="4" column="0">
@ -1137,6 +1181,9 @@
<property name="acceptDrops">
<bool>false</bool>
</property>
<property name="acceptRichText">
<bool>false</bool>
</property>
</widget>
</item>
</layout>
@ -1169,7 +1216,7 @@
<x>0</x>
<y>0</y>
<width>1096</width>
<height>28</height>
<height>30</height>
</rect>
</property>
<widget class="QMenu" name="menuComicTagger">
@ -1459,6 +1506,16 @@
<string>Open Folder as Comic</string>
</property>
</action>
<action name="actionAddWebLink">
<property name="text">
<string>Add Item</string>
</property>
</action>
<action name="actionRemoveWebLink">
<property name="text">
<string>Remove Web Link</string>
</property>
</action>
</widget>
<layoutdefault spacing="6" margin="11"/>
<customwidgets>
@ -1469,5 +1526,38 @@
</customwidget>
</customwidgets>
<resources/>
<connections/>
<connections>
<connection>
<sender>btnAddWebLink</sender>
<signal>clicked()</signal>
<receiver>actionAddWebLink</receiver>
<slot>trigger()</slot>
<hints>
<hint type="sourcelabel">
<x>900</x>
<y>536</y>
</hint>
<hint type="destinationlabel">
<x>-1</x>
<y>-1</y>
</hint>
</hints>
</connection>
<connection>
<sender>btnRemoveWebLink</sender>
<signal>clicked()</signal>
<receiver>actionRemoveWebLink</receiver>
<slot>trigger()</slot>
<hints>
<hint type="sourcelabel">
<x>900</x>
<y>576</y>
</hint>
<hint type="destinationlabel">
<x>-1</x>
<y>-1</y>
</hint>
</hints>
</connection>
</connections>
</ui>

View File

@ -29,6 +29,8 @@ import requests
import settngs
from pyrate_limiter import Limiter, RequestRate
from typing_extensions import Required, TypedDict
from urllib3.exceptions import LocationParseError
from urllib3.util import parse_url
from comicapi import utils
from comicapi.genericmetadata import ComicSeries, GenericMetadata, TagOrigin
@ -643,10 +645,15 @@ class ComicVineTalker(ComicTalker):
format=utils.xlate(series.format),
volume_count=utils.xlate_int(series.count_of_volumes),
title=utils.xlate(issue.get("name")),
web_link=utils.xlate(issue.get("site_detail_url")),
series=utils.xlate(series.name),
series_aliases=series.aliases,
)
url = utils.xlate(issue.get("site_detail_url"))
if url:
try:
md.web_links = [parse_url(url)]
except LocationParseError:
...
if issue.get("image") is None:
md._cover_image = ""
else:

View File

@ -37,6 +37,7 @@ install_requires =
appdirs==1.4.4
beautifulsoup4>=4.1
chardet>=5.1.0,<6
comicfn2dict>=0.2.1
importlib-metadata>=3.3.0
isocodes>=2023.11.26
natsort>=8.1.0
@ -46,7 +47,7 @@ install_requires =
pyrate-limiter>=2.6,<3
rapidfuzz>=2.12.0
requests==2.*
settngs==0.9.2
settngs==0.10.0
text2digits
typing-extensions>=4.3.0
wordninja
@ -86,7 +87,7 @@ QTW =
all =
PyQt5
PyQtWebEngine
comicinfoxml
comicinfoxml>=0.2.0
gcd-talker>=0.1.0
metron-talker>=0.1.5
pillow-avif-plugin>=1.4.1
@ -96,7 +97,7 @@ all =
avif =
pillow-avif-plugin>=1.4.1
cix =
comicinfoxml
comicinfoxml>=0.2.0
gcd =
gcd-talker>=0.1.0
metron =

View File

@ -78,33 +78,17 @@ metadata = [
metadata_keys = [
(
comicapi.genericmetadata.GenericMetadata(),
comicapi.genericmetadata.md_test,
{
"issue_count": 6,
"issue_number": "1",
"month": 10,
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
"year": 2007,
},
),
(
comicapi.genericmetadata.GenericMetadata(series="test"),
{
"issue_count": 6,
"issue_number": "1",
"month": 10,
"series": "test",
"year": 2007,
},
),
(
comicapi.genericmetadata.GenericMetadata(series="test", issue="3"),
{
"issue_count": 6,
"issue_number": "3",
"month": 10,
"series": "test",
"year": 2007,
"alternate_count": 7,
"alternate_number": "2",
"imprint": "craphound.com",
"publisher": "IDW Publishing",
},
),
]

View File

@ -185,7 +185,7 @@ comic_issue_result = comicapi.genericmetadata.GenericMetadata(
issue=cv_issue_result["results"]["issue_number"],
volume=None,
title=cv_issue_result["results"]["name"],
web_link=cv_issue_result["results"]["site_detail_url"],
web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
)
cv_md = comicapi.genericmetadata.GenericMetadata(
@ -213,7 +213,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
alternate_count=None,
imprint=None,
notes=None,
web_link=cv_issue_result["results"]["site_detail_url"],
web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
format=None,
manga=None,
black_and_white=None,

View File

@ -1152,6 +1152,13 @@ rnames = [
"Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
does_not_raise(),
),
(
"{title} {web_links!j}", # Test that join forces str conversion
False,
"Linux",
"Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
does_not_raise(),
),
(
"{series}:{title} #{issue} ({year})", # on windows the ':' is replaced
False,

View File

@ -5,6 +5,7 @@ import io
import pytest
from PIL import Image
import comictaggerlib.imagehasher
import comictaggerlib.issueidentifier
import testing.comicdata
import testing.comicvine
@ -13,12 +14,16 @@ from comictaggerlib.resulttypes import IssueResult
def test_crop(cbz_double_cover, config, tmp_path, comicvine_api):
config, definitions = config
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz_double_cover, config, comicvine_api)
cropped = ii.crop_cover(cbz_double_cover.archiver.read_file("double_cover.jpg"))
original_cover = cbz_double_cover.get_page(0)
original_hash = ii.calculate_hash(original_cover)
cropped_hash = ii.calculate_hash(cropped)
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz_double_cover, config, comicvine_api)
im = Image.open(io.BytesIO(cbz_double_cover.archiver.read_file("double_cover.jpg")))
cropped = ii._crop_double_page(im)
original = cbz_double_cover.get_page(0)
original_hash = comictaggerlib.imagehasher.ImageHasher(data=original).average_hash()
cropped_hash = comictaggerlib.imagehasher.ImageHasher(image=cropped).average_hash()
assert original_hash == cropped_hash
@ -27,23 +32,24 @@ def test_crop(cbz_double_cover, config, tmp_path, comicvine_api):
def test_get_search_keys(cbz, config, additional_md, expected, comicvine_api):
config, definitions = config
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, config, comicvine_api)
ii.set_additional_metadata(additional_md)
assert expected == ii.get_search_keys()
assert expected == ii._get_search_keys(additional_md)
def test_get_issue_cover_match_score(cbz, config, comicvine_api):
config, definitions = config
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, config, comicvine_api)
score = ii.get_issue_cover_match_score(
score = ii._get_issue_cover_match_score(
"https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
[ii.calculate_hash(cbz.get_page(0))],
[("Cover 1", ii.calculate_hash(cbz.get_page(0)))],
)
expected = {
"hash": 212201432349720,
"remote_hash": 212201432349720,
"score": 0,
"url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
}
assert expected == score
@ -51,13 +57,13 @@ def test_get_issue_cover_match_score(cbz, config, comicvine_api):
def test_search(cbz, config, comicvine_api):
config, definitions = config
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, config, comicvine_api)
results = ii.search()
result, issues = ii.identify(cbz, cbz.read_metadata("cr"))
cv_expected = IssueResult(
series=f"{testing.comicvine.cv_volume_result['results']['name']} ({testing.comicvine.cv_volume_result['results']['start_year']})",
distance=0,
issue_number=testing.comicvine.cv_issue_result["results"]["issue_number"],
alt_image_urls=[],
cv_issue_count=testing.comicvine.cv_volume_result["results"]["count_of_issues"],
issue_count=testing.comicvine.cv_volume_result["results"]["count_of_issues"],
issue_title=testing.comicvine.cv_issue_result["results"]["name"],
issue_id=str(testing.comicvine.cv_issue_result["results"]["id"]),
series_id=str(testing.comicvine.cv_volume_result["results"]["id"]),
@ -68,7 +74,7 @@ def test_search(cbz, config, comicvine_api):
description=testing.comicvine.cv_issue_result["results"]["description"],
url_image_hash=212201432349720,
)
for r, e in zip(results, [cv_expected]):
for r, e in zip(issues, [cv_expected]):
assert r == e
@ -80,14 +86,10 @@ def test_crop_border(cbz, config, comicvine_api):
bg = Image.new("RGBA", (100, 100), (0, 0, 0, 255))
fg = Image.new("RGBA", (50, 50), (255, 255, 255, 255))
bg.paste(fg, (bg.width // 2 - (fg.width // 2), bg.height // 2 - (fg.height // 2)))
output = io.BytesIO()
bg.save(output, format="PNG")
image_data = output.getvalue()
output.close()
cropped = ii.crop_border(image_data, 49)
cropped = ii._crop_border(bg, 49)
im = Image.open(io.BytesIO(cropped))
assert im.width == fg.width
assert im.height == fg.height
assert list(im.getdata()) == list(fg.getdata())
assert cropped
assert cropped.width == fg.width
assert cropped.height == fg.height
assert list(cropped.getdata()) == list(fg.getdata())