comictagger/comictaggerlib/issueidentifier.py

681 lines
26 KiB
Python
Raw Normal View History

"""A class to automatically identify a comic archive"""
2022-06-02 18:32:16 -07:00
#
# Copyright 2012-2014 ComicTagger Authors
2022-06-02 18:32:16 -07:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
2022-06-02 18:32:16 -07:00
#
# http://www.apache.org/licenses/LICENSE-2.0
2022-06-02 18:32:16 -07:00
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2022-06-02 18:32:16 -07:00
from __future__ import annotations
import io
2022-04-04 18:59:26 -07:00
import logging
from operator import attrgetter
2022-06-02 18:32:16 -07:00
from typing import Any, Callable
2022-05-17 13:57:04 -07:00
from typing_extensions import NotRequired, TypedDict
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
from comicapi import utils
2024-02-23 20:49:54 -08:00
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import ComicSeries, GenericMetadata
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
from comicapi.issuestring import IssueString
from comictaggerlib.ctsettings import ct_ns
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
from comictaggerlib.imagehasher import ImageHasher
from comictaggerlib.resulttypes import IssueResult
2023-02-09 19:33:10 -08:00
from comictalker.comictalker import ComicTalker, TalkerError
2022-04-04 18:59:26 -07:00
logger = logging.getLogger(__name__)
try:
from PIL import Image, ImageChops
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
pil_available = True
except ImportError:
pil_available = False
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
class SearchKeys(TypedDict):
series: str
issue_number: str
alternate_number: str | None
2022-06-02 18:32:16 -07:00
month: int | None
year: int | None
issue_count: int | None
alternate_count: int | None
publisher: str | None
imprint: str | None
2022-05-17 13:57:04 -07:00
class Score(TypedDict):
score: NotRequired[int]
url: str
remote_hash: int
local_hash_name: str
local_hash: int
class IssueIdentifierNetworkError(Exception): ...
class IssueIdentifierCancelled(Exception): ...
class IssueIdentifier:
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
result_no_matches = 0
result_found_match_but_bad_cover_score = 1
result_found_match_but_not_first_page = 2
result_multiple_matches_with_bad_image_scores = 3
result_one_good_match = 4
result_multiple_good_matches = 5
def __init__(
self,
comic_archive: ComicArchive,
config: ct_ns,
talker: ComicTalker,
metadata: GenericMetadata = GenericMetadata(),
) -> None:
self.config = config
2023-02-09 19:33:10 -08:00
self.talker = talker
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
self.comic_archive: ComicArchive = comic_archive
self.md = metadata
self.image_hasher = 1
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
self.only_use_additional_meta_data = False
# a decent hamming score, good enough to call it a match
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
self.min_score_thresh: int = 16
# for alternate covers, be more stringent, since we're a bit more
# scattershot in comparisons
self.min_alternate_score_thresh = 12
# the min distance a hamming score must be to separate itself from
# closest neighbor
self.min_score_distance = 4
# a very strong hamming score, almost certainly the same image
self.strong_score_thresh = 8
# used to eliminate series names that are too long based on our search
# string
2023-11-19 23:14:40 -08:00
self.series_match_thresh = config.Issue_Identifier__series_match_identify_thresh
# used to eliminate unlikely publishers
2023-11-19 23:14:40 -08:00
self.publisher_filter = [s.strip().casefold() for s in config.Issue_Identifier__publisher_filter]
self.additional_metadata = GenericMetadata()
2023-12-17 15:27:02 -08:00
self.output_function: Callable[[str], None] = print
self.progress_callback: Callable[[int, int], None] | None = None
2022-06-02 18:32:16 -07:00
self.cover_url_callback: Callable[[bytes], None] | None = None
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
self.search_result = self.result_no_matches
self.cover_page_index = 0
self.cancel = False
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
2022-05-17 13:57:04 -07:00
self.match_list: list[IssueResult] = []
2022-05-17 13:57:04 -07:00
def set_score_min_threshold(self, thresh: int) -> None:
self.min_score_thresh = thresh
2022-05-17 13:57:04 -07:00
def set_score_min_distance(self, distance: int) -> None:
self.min_score_distance = distance
2022-05-17 13:57:04 -07:00
def set_additional_metadata(self, md: GenericMetadata) -> None:
self.additional_metadata = md
2022-07-08 12:33:00 -07:00
def set_name_series_match_threshold(self, delta: int) -> None:
self.series_match_thresh = delta
2022-06-02 18:32:16 -07:00
def set_publisher_filter(self, flt: list[str]) -> None:
2022-04-18 18:32:45 -07:00
self.publisher_filter = flt
2022-05-17 13:57:04 -07:00
def set_hasher_algorithm(self, algo: int) -> None:
self.image_hasher = algo
2022-05-17 13:57:04 -07:00
def set_output_function(self, func: Callable[[str], None]) -> None:
self.output_function = func
def calculate_hash(self, image_data: bytes) -> int:
if self.image_hasher == 3:
return ImageHasher(data=image_data).p_hash()
if self.image_hasher == 2:
return -1 # ImageHasher(data=image_data).average_hash2()
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
return ImageHasher(data=image_data).average_hash()
def _crop_double_page(self, im: Image.Image) -> Image.Image | None:
w, h = im.size
try:
cropped_im = im.crop((int(w / 2), 0, w, h))
except Exception:
logger.exception("cropCover() error")
return None
return cropped_im
# Adapted from https://stackoverflow.com/a/10616717/20629671
def _crop_border(self, im: Image.Image, ratio: int) -> Image.Image | None:
assert Image
assert ImageChops
# RGBA doesn't work????
tmp = im.convert("RGB")
bg = Image.new("RGB", tmp.size, "black")
diff = ImageChops.difference(tmp, bg)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
width_percent = 0
height_percent = 0
# If bbox is None that should mean it's solid black
if bbox:
width = bbox[2] - bbox[0]
height = bbox[3] - bbox[1]
# Convert to percent
2023-02-09 19:33:10 -08:00
width_percent = int(100 - ((width / im.width) * 100))
height_percent = int(100 - ((height / im.height) * 100))
logger.debug(
"Width: %s Height: %s, ratio: %s %s ratio met: %s",
im.width,
im.height,
width_percent,
height_percent,
width_percent > ratio or height_percent > ratio,
)
# If there is a difference return the image otherwise return None
if width_percent > ratio or height_percent > ratio:
return im.crop(bbox)
return None
2022-05-17 13:57:04 -07:00
def set_progress_callback(self, cb_func: Callable[[int, int], None]) -> None:
self.progress_callback = cb_func
2022-05-17 13:57:04 -07:00
def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None:
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
self.cover_url_callback = cb_func
def log_msg(self, msg: Any) -> None:
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
msg = str(msg)
for handler in logging.getLogger().handlers:
handler.flush()
self.output(msg)
def output(self, *args: Any, file: Any = None, **kwargs: Any) -> None:
2023-12-17 15:27:02 -08:00
# We intercept and discard the file argument otherwise everything is passed to self.output_function
# Ensure args[0] is defined and is a string for logger.info
if not args:
log_args: tuple[Any, ...] = ("",)
elif isinstance(args[0], str):
log_args = (args[0].strip("\n"), *args[1:])
else:
log_args = args
log_msg = " ".join([str(x) for x in log_args])
# Always send to logger so that we have a record for troubleshooting
logger.info(log_msg, **kwargs)
# If we are verbose or quiet we don't need to call the output function
if self.config.Runtime_Options__verbose > 0 or self.config.Runtime_Options__quiet:
return
2023-12-17 15:27:02 -08:00
# default output is stdout
self.output_function(*args, **kwargs)
def _get_remote_hashes(self, urls: list[str]) -> list[tuple[str, int]]:
remote_hashes: list[tuple[str, int]] = []
for url in urls:
try:
alt_url_image_data = ImageFetcher(self.config.Runtime_Options__config.user_cache_dir).fetch(
url, blocking=True
)
except ImageFetcherException as e:
self.log_msg(f"Network issue while fetching alt. cover image from {self.talker.name}. Aborting...")
raise IssueIdentifierNetworkError from e
self._user_canceled(self.cover_url_callback, alt_url_image_data)
remote_hashes.append((url, self.calculate_hash(alt_url_image_data)))
if self.cancel:
raise IssueIdentifierCancelled
return remote_hashes
def _get_issue_cover_match_score(
self,
primary_img_url: str,
alt_urls: list[str],
local_hashes: list[tuple[str, int]],
use_alt_urls: bool = False,
) -> Score:
# local_hashes is a list of pre-calculated hashes.
# use_alt_urls - indicates to use alternate covers from CV
# If there is no URL return 100
if not primary_img_url:
return Score(score=100, url="", remote_hash=0)
self._user_canceled()
urls = [primary_img_url]
if use_alt_urls:
urls.extend(alt_urls)
self.log_msg(f"[{len(alt_urls)} alt. covers]")
remote_hashes = self._get_remote_hashes(urls)
score_list = []
done = False
for local_hash in local_hashes:
for remote_hash in remote_hashes:
score = ImageHasher.hamming_distance(local_hash[1], remote_hash[1])
score_list.append(
Score(
score=score,
url=remote_hash[0],
remote_hash=remote_hash[1],
local_hash_name=local_hash[0],
local_hash=local_hash[1],
)
)
self.log_msg(f" - {score:03}")
if score <= self.strong_score_thresh:
# such a good score, we can quit now, since for sure we have a winner
done = True
break
if done:
break
best_score_item = min(score_list, key=lambda x: x["score"])
return best_score_item
def _check_requirements(self, ca: ComicArchive) -> bool:
if not pil_available:
self.log_msg("Python Imaging Library (PIL) is not available and is needed for issue identification.")
return False
if not ca.seems_to_be_a_comic_archive():
self.log_msg(f"Sorry, but {ca.path} is not a comic archive!")
return False
return True
def _process_cover(self, name: str, image_data: bytes) -> list[tuple[str, Image.Image]]:
assert Image
cover_image = Image.open(io.BytesIO(image_data))
images = [(name, cover_image)]
# check the aspect ratio
# if it's wider than it is high, it's probably a two page spread (back_cover, front_cover)
# if so, crop it and calculate a second hash
aspect_ratio = float(cover_image.height) / float(cover_image.width)
if aspect_ratio < 1.0:
im = self._crop_double_page(cover_image)
if im is not None:
images.append(("double page", im))
# Check and remove black borders. Helps in identifying comics with an excessive black border like https://comicvine.gamespot.com/marvel-graphic-novel-1-the-death-of-captain-marvel/4000-21782/
cropped = self._crop_border(cover_image, self.config.Issue_Identifier__border_crop_percent)
if cropped is not None:
images.append(("black border cropped", cropped))
return images
def _get_images(self, ca: ComicArchive, md: GenericMetadata) -> list[tuple[str, Image.Image]]:
covers: list[tuple[str, Image.Image]] = []
for cover_index in md.get_cover_page_index_list():
image_data = ca.get_page(cover_index)
covers.extend(self._process_cover(f"{cover_index}", image_data))
return covers
def _get_extra_images(self, ca: ComicArchive, md: GenericMetadata) -> list[tuple[str, Image.Image]]:
assert md
covers: list[tuple[str, Image.Image]] = []
for cover_index in range(1, min(3, ca.get_number_of_pages())):
image_data = ca.get_page(cover_index)
covers.extend(self._process_cover(f"{cover_index}", image_data))
return covers
def _get_search_keys(self, md: GenericMetadata) -> Any:
search_keys = SearchKeys(
series=md.series,
issue_number=IssueString(md.issue).as_string(),
alternate_number=IssueString(md.alternate_number).as_string(),
month=md.month,
year=md.year,
issue_count=md.issue_count,
alternate_count=md.alternate_count,
publisher=md.publisher,
imprint=md.imprint,
)
return search_keys
def _get_search_terms(
self, ca: ComicArchive, md: GenericMetadata
) -> tuple[SearchKeys, list[tuple[str, Image.Image]], list[tuple[str, Image.Image]]]:
return self._get_search_keys(md), self._get_images(ca, md), self._get_extra_images(ca, md)
def _user_canceled(self, callback: Callable[..., Any] | None = None, *args: Any) -> Any:
if self.cancel:
raise IssueIdentifierCancelled
if callback is not None:
return callback(*args)
def _print_terms(self, keys: SearchKeys, images: list[tuple[str, Image.Image]]) -> None:
assert keys["series"]
assert keys["issue_number"]
self.log_msg(f"Using {self.talker.name} to search for:")
self.log_msg("\tSeries: " + keys["series"])
self.log_msg("\tIssue: " + keys["issue_number"])
# if keys["alternate_number"] is not None:
# self.log_msg("\tAlternate Issue: " + str(keys["alternate_number"]))
if keys["month"] is not None:
self.log_msg("\tMonth: " + str(keys["month"]))
if keys["year"] is not None:
self.log_msg("\tYear: " + str(keys["year"]))
if keys["issue_count"] is not None:
self.log_msg("\tCount: " + str(keys["issue_count"]))
# if keys["alternate_count"] is not None:
# self.log_msg("\tAlternate Count: " + str(keys["alternate_count"]))
# if keys["publisher"] is not None:
# self.log_msg("\tPublisher: " + str(keys["publisher"]))
# if keys["imprint"] is not None:
# self.log_msg("\tImprint: " + str(keys["imprint"]))
for name, _ in images:
self.log_msg("Cover: " + name)
self.log_msg(f"Searching for {keys['series']} #{keys['issue_number']} ...")
def _filter_series(self, terms: SearchKeys, search_results: list[ComicSeries]) -> list[ComicSeries]:
assert terms["series"]
filtered_results = []
for item in search_results:
length_approved = False
publisher_approved = True
date_approved = True
# remove any series that starts after the issue year
if terms["year"] is not None and item.start_year is not None:
if terms["year"] < item.start_year:
date_approved = False
for name in [item.name, *item.aliases]:
if utils.titles_match(terms["series"], name, self.series_match_thresh):
length_approved = True
break
# remove any series from publishers on the filter
if item.publisher is not None:
if item.publisher is not None and item.publisher.casefold() in self.publisher_filter:
publisher_approved = False
if length_approved and publisher_approved and date_approved:
filtered_results.append(item)
else:
logger.debug(
"Filtered out series: '%s' length approved: '%s', publisher approved: '%s', date approved: '%s'",
item.name,
length_approved,
publisher_approved,
date_approved,
)
return filtered_results
def _calculate_hashes(self, images: list[tuple[str, Image.Image]]) -> list[tuple[str, int]]:
hashes = []
for name, image in images:
hashes.append((name, ImageHasher(image=image).average_hash()))
return hashes
def _match_covers(
self,
terms: SearchKeys,
images: list[tuple[str, Image.Image]],
issues: list[tuple[ComicSeries, GenericMetadata]],
use_alternates: bool,
) -> list[IssueResult]:
assert terms["issue_number"]
match_results: list[IssueResult] = []
hashes = self._calculate_hashes(images)
counter = 0
alternate = ""
if use_alternates:
alternate = " Alternate"
for series, issue in issues:
self._user_canceled(self.progress_callback, counter, len(issues))
counter += 1
self.log_msg(
f"Examining{alternate} covers for Series ID: {series.id} {series.name} ({series.start_year}):",
)
try:
image_url = issue._cover_image or ""
alt_urls = issue._alternate_images
score_item = self._get_issue_cover_match_score(image_url, alt_urls, hashes, use_alt_urls=use_alternates)
except Exception:
logger.exception(f"Scoring series{alternate} covers failed")
return []
match = IssueResult(
series=f"{series.name} ({series.start_year})",
distance=score_item["score"],
issue_number=terms["issue_number"],
issue_count=series.count_of_issues,
url_image_hash=score_item["remote_hash"],
issue_title=issue.title or "",
issue_id=issue.issue_id or "",
series_id=series.id,
month=issue.month,
year=issue.year,
publisher=None,
image_url=image_url,
alt_image_urls=alt_urls,
description=issue.description or "",
)
if series.publisher is not None:
match.publisher = series.publisher
match_results.append(match)
self.log_msg(f"best score {match.distance:03}")
self.log_msg("")
return match_results
def _print_match(self, item: IssueResult) -> None:
self.log_msg(
"-----> {} #{} {} ({}/{}) -- score: {}".format(
item.series,
item.issue_number,
item.issue_title,
item.month,
item.year,
item.distance,
)
)
def _search_for_issues(self, terms: SearchKeys) -> list[tuple[ComicSeries, GenericMetadata]]:
try:
search_results = self.talker.search_for_series(
terms["series"],
callback=lambda x, y: self._user_canceled(self.progress_callback, x, y),
series_match_thresh=self.config.Issue_Identifier__series_match_search_thresh,
)
except TalkerError as e:
self.log_msg(f"Error searching for series.\n{e}")
return []
# except IssueIdentifierCancelled:
# return []
if not search_results:
return []
filtered_series = self._filter_series(terms, search_results)
if not filtered_series:
return []
self.log_msg(f"Searching in {len(filtered_series)} series")
self._user_canceled(self.progress_callback, 0, len(filtered_series))
series_by_id = {series.id: series for series in filtered_series}
try:
talker_result = self.talker.fetch_issues_by_series_issue_num_and_year(
list(series_by_id.keys()), terms["issue_number"], terms["year"]
)
except TalkerError as e:
self.log_msg(f"Issue with while searching for series details. Aborting...\n{e}")
return []
# except IssueIdentifierCancelled:
# return []
if not talker_result:
return []
self._user_canceled(self.progress_callback, 0, 0)
issues: list[tuple[ComicSeries, GenericMetadata]] = []
# now re-associate the issues and series
for issue in talker_result:
if issue.series_id in series_by_id:
issues.append((series_by_id[issue.series_id], issue))
else:
logger.warning("Talker '%s' is returning arbitrary series when searching by id", self.talker.id)
return issues
def _cover_matching(
self,
terms: SearchKeys,
images: list[tuple[str, Image.Image]],
extra_images: list[tuple[str, Image.Image]],
issues: list[tuple[ComicSeries, GenericMetadata]],
) -> list[IssueResult]:
cover_matching_1 = self._match_covers(terms, images, issues, use_alternates=False)
if len(cover_matching_1) == 0:
self.log_msg(":-( no matches!")
self.search_result = self.result_no_matches
return cover_matching_1
# sort list by image match scores
cover_matching_1.sort(key=attrgetter("distance"))
lst = []
for i in cover_matching_1:
lst.append(i.distance)
self.log_msg(f"Compared to covers in {len(cover_matching_1)} issue(s): {lst}")
cover_matching_2 = []
final_cover_matching = cover_matching_1
if cover_matching_1[0].distance >= self.min_score_thresh:
# we have 1 or more low-confidence matches (all bad cover scores)
# look at a few more pages in the archive, and also alternate covers online
self.log_msg("Very weak scores for the cover. Analyzing alternate pages and covers...")
temp = self._match_covers(terms, images + extra_images, issues, use_alternates=True)
for score in temp:
if score.distance < self.min_alternate_score_thresh:
cover_matching_2.append(score)
if len(cover_matching_2) == 0:
if len(cover_matching_1) == 1:
self.log_msg("No matching pages in the issue.")
self.log_msg("--------------------------------------------------------------------------")
self._print_match(cover_matching_1[0])
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_found_match_but_bad_cover_score
else:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("Multiple bad cover matches! Need to use other info...")
self.log_msg("--------------------------------------------------------------------------")
self.search_result = self.result_multiple_matches_with_bad_image_scores
return cover_matching_1
# We did good, found something!
self.log_msg("Success in secondary/alternate cover matching!")
final_cover_matching = cover_matching_2
# sort new list by image match scores
final_cover_matching.sort(key=attrgetter("distance"))
self.log_msg("[Second round cover matching: best score = {best_score}]")
# now drop down into the rest of the processing
best_score = final_cover_matching[0].distance
# now pare down list, remove any item more than specified distant from the top scores
for match_item in reversed(final_cover_matching):
if match_item.distance > (best_score + self.min_score_distance):
final_cover_matching.remove(match_item)
return final_cover_matching
def identify(self, ca: ComicArchive, md: GenericMetadata) -> tuple[int, list[IssueResult]]:
if not self._check_requirements(ca):
return self.result_no_matches, []
terms, images, extra_images = self._get_search_terms(ca, md)
# we need, at minimum, a series and issue number
if not (terms["series"] and terms["issue_number"]):
self.log_msg("Not enough info for a search!")
return self.result_no_matches, []
self._print_terms(terms, images)
issues = self._search_for_issues(terms)
self.log_msg(f"Found {len(issues)} series that have an issue #{terms['issue_number']}")
final_cover_matching = self._cover_matching(terms, images, extra_images, issues)
# One more test for the case choosing limited series first issue vs a trade with the same cover:
# if we have a given issue count > 1 and the series from CV has count==1, remove it from match list
if len(final_cover_matching) > 1 and terms["issue_count"] is not None and terms["issue_count"] != 1:
for match in final_cover_matching.copy():
if match.issue_count == 1:
self.log_msg(
f"Removing series {match.series} [{match.series_id}] from consideration (only 1 issue)"
)
final_cover_matching.remove(match)
if len(final_cover_matching) == 1:
self.log_msg("--------------------------------------------------------------------------")
self._print_match(final_cover_matching[0])
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_one_good_match
elif len(self.match_list) == 0:
self.log_msg("--------------------------------------------------------------------------")
self.log_msg("No matches found :(")
self.log_msg("--------------------------------------------------------------------------")
search_result = self.result_no_matches
else:
# we've got multiple good matches:
self.log_msg("More than one likely candidate.")
search_result = self.result_multiple_good_matches
self.log_msg("--------------------------------------------------------------------------")
for match_item in final_cover_matching:
self._print_match(match_item)
self.log_msg("--------------------------------------------------------------------------")
return search_result, final_cover_matching