From c3f5badc7d3d071b842965b9b3a73ef50671e892 Mon Sep 17 00:00:00 2001 From: Mizaki Date: Tue, 11 Feb 2025 01:03:12 +0000 Subject: [PATCH] Use source hashes for cover matching --- comicapi/genericmetadata.py | 10 ++++++- comictaggerlib/cli.py | 2 +- comictaggerlib/issueidentifier.py | 39 +++++++++++++++++--------- comictaggerlib/issueselectionwindow.py | 5 +++- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 6713e8e..7315195 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -136,6 +136,14 @@ class MetadataOrigin(NamedTuple): return self.name +class ImageHash(NamedTuple): + Hash: int + Kind: str # ahash, phash + + def __str__(self) -> str: + return str(self.Hash) + ": " + self.Kind + + @dataclasses.dataclass class GenericMetadata: writer_synonyms = ("writer", "plotter", "scripter", "script") @@ -202,7 +210,7 @@ class GenericMetadata: last_mark: str | None = None # urls to cover image, not generally part of the metadata - _cover_image: str | None = None + _cover_image: str | ImageHash | None = None _alternate_images: list[str] = dataclasses.field(default_factory=list) def __post_init__(self) -> None: diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 94965a4..0cbf876 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -639,7 +639,7 @@ class CLI: month=ct_md.month, year=ct_md.year, publisher=None, - image_url=ct_md._cover_image or "", + image_url=str(ct_md._cover_image) or "", alt_image_urls=[], description=ct_md.description or "", ) diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index 8605d20..bf9bc3d 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -25,7 +25,7 @@ from typing_extensions import NotRequired, TypedDict from comicapi import utils from comicapi.comicarchive import ComicArchive -from comicapi.genericmetadata import ComicSeries, GenericMetadata +from comicapi.genericmetadata import ComicSeries, GenericMetadata, ImageHash from comicapi.issuestring import IssueString from comictaggerlib.ctsettings import ct_ns from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException @@ -132,13 +132,13 @@ class IssueIdentifier: def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None: self.cover_url_callback = cb_func - def calculate_hash(self, image_data: bytes) -> int: + def calculate_hash(self, image_data: bytes = b"", image: Image = None) -> int: if self.image_hasher == 3: - return ImageHasher(data=image_data).p_hash() + return ImageHasher(data=image_data, image=image).p_hash() if self.image_hasher == 2: - return -1 # ImageHasher(data=image_data).average_hash2() + return -1 # ImageHasher(data=image_data, image=image).average_hash2() - return ImageHasher(data=image_data).average_hash() + return ImageHasher(data=image_data, image=image).average_hash() def log_msg(self, msg: Any) -> None: msg = str(msg) @@ -306,7 +306,7 @@ class IssueIdentifier: def _get_issue_cover_match_score( self, - primary_img_url: str, + primary_img_url: str | ImageHash, alt_urls: list[str], local_hashes: list[tuple[str, int]], use_alt_urls: bool = False, @@ -320,12 +320,15 @@ class IssueIdentifier: self._user_canceled() - urls = [primary_img_url] - if use_alt_urls: - urls.extend(alt_urls) - self.log_msg(f"[{len(alt_urls)} alt. covers]") + if isinstance(primary_img_url, ImageHash): + remote_hashes = [("0", primary_img_url.Hash)] + else: + urls = [primary_img_url] + if use_alt_urls: + urls.extend(alt_urls) + self.log_msg(f"[{len(alt_urls)} alt. covers]") - remote_hashes = self._get_remote_hashes(urls) + remote_hashes = self._get_remote_hashes(urls) score_list = [] done = False @@ -490,7 +493,7 @@ class IssueIdentifier: def _calculate_hashes(self, images: list[tuple[str, Image.Image]]) -> list[tuple[str, int]]: hashes = [] for name, image in images: - hashes.append((name, ImageHasher(image=image).average_hash())) + hashes.append((name, self.calculate_hash(image=image))) return hashes def _match_covers( @@ -536,7 +539,7 @@ class IssueIdentifier: month=issue.month, year=issue.year, publisher=None, - image_url=image_url, + image_url=str(image_url), alt_image_urls=alt_urls, description=issue.description or "", ) @@ -620,6 +623,16 @@ class IssueIdentifier: extra_images: list[tuple[str, Image.Image]], issues: list[tuple[ComicSeries, GenericMetadata]], ) -> list[IssueResult]: + # Set hashing kind, will presume all hashes are of the same kind + for series, issue in issues: + if isinstance(issue._cover_image, ImageHash): + if issue._cover_image.Kind == "phash": + self.image_hasher = 3 + break + elif issue._cover_image == "ahash": + self.image_hasher = 1 # Set to 1 on init but might as well be sure + break + cover_matching_1 = self._match_covers(terms, images, issues, use_alternates=False) if len(cover_matching_1) == 0: diff --git a/comictaggerlib/issueselectionwindow.py b/comictaggerlib/issueselectionwindow.py index c2b14ae..1ef2b2c 100644 --- a/comictaggerlib/issueselectionwindow.py +++ b/comictaggerlib/issueselectionwindow.py @@ -221,7 +221,10 @@ class IssueSelectionWindow(QtWidgets.QDialog): QtWidgets.QApplication.restoreOverrideCursor() self.issue_number = issue.issue or "" - self.coverWidget.set_issue_details(self.issue_id, [issue._cover_image or "", *issue._alternate_images]) + cover_image = "" + if isinstance(issue._cover_image, str): + cover_image = issue._cover_image + self.coverWidget.set_issue_details(self.issue_id, [cover_image, *issue._alternate_images]) if issue.description is None: self.set_description(self.teDescription, "") else: