Merge branch 'mizaki/image_urls_hashes' into develop

This commit is contained in:
Timmy Welch 2025-02-26 21:28:39 -08:00
commit 8837fea957
6 changed files with 141 additions and 32 deletions

View File

@ -136,6 +136,14 @@ class MetadataOrigin(NamedTuple):
return self.name
class ImageHash(NamedTuple):
Hash: int
Kind: str # ahash, phash
def __str__(self) -> str:
return str(self.Hash) + ": " + self.Kind
@dataclasses.dataclass
class GenericMetadata:
writer_synonyms = ("writer", "plotter", "scripter", "script")
@ -202,8 +210,8 @@ class GenericMetadata:
last_mark: str | None = None
# urls to cover image, not generally part of the metadata
_cover_image: str | None = None
_alternate_images: list[str] = dataclasses.field(default_factory=list)
_cover_image: str | ImageHash | None = None
_alternate_images: list[str | ImageHash] = dataclasses.field(default_factory=list)
def __post_init__(self) -> None:
for key, value in self.__dict__.items():

View File

@ -639,7 +639,7 @@ class CLI:
month=ct_md.month,
year=ct_md.year,
publisher=None,
image_url=ct_md._cover_image or "",
image_url=str(ct_md._cover_image) or "",
alt_image_urls=[],
description=ct_md.description or "",
)

View File

@ -25,7 +25,7 @@ from typing_extensions import NotRequired, TypedDict
from comicapi import utils
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import ComicSeries, GenericMetadata
from comicapi.genericmetadata import ComicSeries, GenericMetadata, ImageHash
from comicapi.issuestring import IssueString
from comictaggerlib.ctsettings import ct_ns
from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
@ -132,13 +132,13 @@ class IssueIdentifier:
def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None:
self.cover_url_callback = cb_func
def calculate_hash(self, image_data: bytes) -> int:
def calculate_hash(self, image_data: bytes = b"", image: Image = None) -> int:
if self.image_hasher == 3:
return ImageHasher(data=image_data).p_hash()
return ImageHasher(data=image_data, image=image).p_hash()
if self.image_hasher == 2:
return -1 # ImageHasher(data=image_data).average_hash2()
return -1 # ImageHasher(data=image_data, image=image).average_hash2()
return ImageHasher(data=image_data).average_hash()
return ImageHasher(data=image_data, image=image).average_hash()
def log_msg(self, msg: Any) -> None:
msg = str(msg)
@ -306,8 +306,8 @@ class IssueIdentifier:
def _get_issue_cover_match_score(
self,
primary_img_url: str,
alt_urls: list[str],
primary_img_url: str | ImageHash,
alt_urls: list[str | ImageHash],
local_hashes: list[tuple[str, int]],
use_alt_urls: bool = False,
) -> Score:
@ -316,16 +316,25 @@ class IssueIdentifier:
# If there is no URL return 100
if not primary_img_url:
return Score(score=100, url="", remote_hash=0)
return Score(score=100, url="", remote_hash=0, local_hash=0, local_hash_name="0")
self._user_canceled()
urls = [primary_img_url]
if use_alt_urls:
urls.extend(alt_urls)
self.log_msg(f"[{len(alt_urls)} alt. covers]")
remote_hashes = []
# If the cover is ImageHash and the alternate covers are URLs, the alts will not be hashed/checked currently
if isinstance(primary_img_url, ImageHash):
# ImageHash doesn't have a url so we just give it an empty string
remote_hashes.append(("", primary_img_url.Hash))
if use_alt_urls and alt_urls:
remote_hashes.extend(("", alt_hash.Hash) for alt_hash in alt_urls if isinstance(alt_hash, ImageHash))
else:
urls = [primary_img_url]
if use_alt_urls:
only_urls = [url for url in alt_urls if isinstance(url, str)]
urls.extend(only_urls)
self.log_msg(f"[{len(only_urls)} alt. covers]")
remote_hashes = self._get_remote_hashes(urls)
remote_hashes = self._get_remote_hashes(urls)
score_list = []
done = False
@ -490,7 +499,7 @@ class IssueIdentifier:
def _calculate_hashes(self, images: list[tuple[str, Image.Image]]) -> list[tuple[str, int]]:
hashes = []
for name, image in images:
hashes.append((name, ImageHasher(image=image).average_hash()))
hashes.append((name, self.calculate_hash(image=image)))
return hashes
def _match_covers(
@ -516,10 +525,14 @@ class IssueIdentifier:
)
try:
image_url = issue._cover_image or ""
alt_urls = issue._alternate_images
image_url = issue._cover_image if isinstance(issue._cover_image, str) else ""
# We only include urls in the IssueResult so we don't have to deal with it down the line
# TODO: display the hash to the user so they know a direct hash was used instead of downloading an image
alt_urls: list[str] = [url for url in issue._alternate_images if isinstance(url, str)]
score_item = self._get_issue_cover_match_score(image_url, alt_urls, hashes, use_alt_urls=use_alternates)
score_item = self._get_issue_cover_match_score(
image_url, issue._alternate_images, hashes, use_alt_urls=use_alternates
)
except Exception:
logger.exception(f"Scoring series{alternate} covers failed")
return []
@ -620,6 +633,16 @@ class IssueIdentifier:
extra_images: list[tuple[str, Image.Image]],
issues: list[tuple[ComicSeries, GenericMetadata]],
) -> list[IssueResult]:
# Set hashing kind, will presume all hashes are of the same kind
for series, issue in issues:
if isinstance(issue._cover_image, ImageHash):
if issue._cover_image.Kind == "phash":
self.image_hasher = 3
break
elif issue._cover_image.Kind == "ahash":
self.image_hasher = 1 # Set to 1 on init but might as well be sure
break
cover_matching_1 = self._match_covers(terms, images, issues, use_alternates=False)
if len(cover_matching_1) == 0:

View File

@ -221,7 +221,10 @@ class IssueSelectionWindow(QtWidgets.QDialog):
QtWidgets.QApplication.restoreOverrideCursor()
self.issue_number = issue.issue or ""
self.coverWidget.set_issue_details(self.issue_id, [issue._cover_image or "", *issue._alternate_images])
# We don't currently have a way to display hashes to the user
# TODO: display the hash to the user so they know it will be used for cover matching
alt_images = [url for url in issue._alternate_images if isinstance(url, str)]
self.coverWidget.set_issue_details(self.issue_id, [str(issue._cover_image) or "", *alt_images])
if issue.description is None:
self.set_description(self.teDescription, "")
else:

View File

@ -288,3 +288,76 @@ metadata_prepared = (
),
),
)
issueidentifier_score = (
(
(
comicapi.genericmetadata.ImageHash(
Hash=0, # Force using the alternate, since the alternate is a url it will be ignored
Kind="ahash",
),
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
True,
),
{
"remote_hash": 0,
"score": 31,
"url": "",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
},
),
(
(
comicapi.genericmetadata.ImageHash(
Hash=0,
Kind="ahash",
),
[
comicapi.genericmetadata.ImageHash(
Hash=212201432349720,
Kind="ahash",
),
],
True,
),
{
"remote_hash": 212201432349720,
"score": 0,
"url": "",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
},
),
(
(
comicapi.genericmetadata.ImageHash(
Hash=212201432349720,
Kind="ahash",
),
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
False,
),
{
"remote_hash": 212201432349720,
"score": 0,
"url": "",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
},
),
(
(
"https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
False,
),
{
"remote_hash": 212201432349720,
"score": 0,
"url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
},
),
)

View File

@ -9,6 +9,7 @@ import comictaggerlib.imagehasher
import comictaggerlib.issueidentifier
import testing.comicdata
import testing.comicvine
from comicapi.genericmetadata import ImageHash
from comictaggerlib.resulttypes import IssueResult
@ -36,21 +37,22 @@ def test_get_search_keys(cbz, config, additional_md, expected, comicvine_api):
assert expected == ii._get_search_keys(additional_md)
def test_get_issue_cover_match_score(cbz, config, comicvine_api):
@pytest.mark.parametrize("data, expected", testing.comicdata.issueidentifier_score)
def test_get_issue_cover_match_score(
cbz,
config,
comicvine_api,
data: tuple[str | ImageHash, list[str | ImageHash], bool],
expected: comictaggerlib.issueidentifier.Score,
):
config, definitions = config
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, config, comicvine_api)
score = ii._get_issue_cover_match_score(
"https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
[("Cover 1", ii.calculate_hash(cbz.get_page(0)))],
primary_img_url=data[0],
alt_urls=data[1],
local_hashes=[("Cover 1", ii.calculate_hash(cbz.get_page(0)))],
use_alt_urls=data[2],
)
expected = {
"remote_hash": 212201432349720,
"score": 0,
"url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
"local_hash": 212201432349720,
"local_hash_name": "Cover 1",
}
assert expected == score