Merge branch 'mizaki/image_urls_hashes' into develop
This commit is contained in:
commit
8837fea957
@ -136,6 +136,14 @@ class MetadataOrigin(NamedTuple):
|
||||
return self.name
|
||||
|
||||
|
||||
class ImageHash(NamedTuple):
|
||||
Hash: int
|
||||
Kind: str # ahash, phash
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self.Hash) + ": " + self.Kind
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class GenericMetadata:
|
||||
writer_synonyms = ("writer", "plotter", "scripter", "script")
|
||||
@ -202,8 +210,8 @@ class GenericMetadata:
|
||||
last_mark: str | None = None
|
||||
|
||||
# urls to cover image, not generally part of the metadata
|
||||
_cover_image: str | None = None
|
||||
_alternate_images: list[str] = dataclasses.field(default_factory=list)
|
||||
_cover_image: str | ImageHash | None = None
|
||||
_alternate_images: list[str | ImageHash] = dataclasses.field(default_factory=list)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
for key, value in self.__dict__.items():
|
||||
|
@ -639,7 +639,7 @@ class CLI:
|
||||
month=ct_md.month,
|
||||
year=ct_md.year,
|
||||
publisher=None,
|
||||
image_url=ct_md._cover_image or "",
|
||||
image_url=str(ct_md._cover_image) or "",
|
||||
alt_image_urls=[],
|
||||
description=ct_md.description or "",
|
||||
)
|
||||
|
@ -25,7 +25,7 @@ from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
from comicapi import utils
|
||||
from comicapi.comicarchive import ComicArchive
|
||||
from comicapi.genericmetadata import ComicSeries, GenericMetadata
|
||||
from comicapi.genericmetadata import ComicSeries, GenericMetadata, ImageHash
|
||||
from comicapi.issuestring import IssueString
|
||||
from comictaggerlib.ctsettings import ct_ns
|
||||
from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
|
||||
@ -132,13 +132,13 @@ class IssueIdentifier:
|
||||
def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None:
|
||||
self.cover_url_callback = cb_func
|
||||
|
||||
def calculate_hash(self, image_data: bytes) -> int:
|
||||
def calculate_hash(self, image_data: bytes = b"", image: Image = None) -> int:
|
||||
if self.image_hasher == 3:
|
||||
return ImageHasher(data=image_data).p_hash()
|
||||
return ImageHasher(data=image_data, image=image).p_hash()
|
||||
if self.image_hasher == 2:
|
||||
return -1 # ImageHasher(data=image_data).average_hash2()
|
||||
return -1 # ImageHasher(data=image_data, image=image).average_hash2()
|
||||
|
||||
return ImageHasher(data=image_data).average_hash()
|
||||
return ImageHasher(data=image_data, image=image).average_hash()
|
||||
|
||||
def log_msg(self, msg: Any) -> None:
|
||||
msg = str(msg)
|
||||
@ -306,8 +306,8 @@ class IssueIdentifier:
|
||||
|
||||
def _get_issue_cover_match_score(
|
||||
self,
|
||||
primary_img_url: str,
|
||||
alt_urls: list[str],
|
||||
primary_img_url: str | ImageHash,
|
||||
alt_urls: list[str | ImageHash],
|
||||
local_hashes: list[tuple[str, int]],
|
||||
use_alt_urls: bool = False,
|
||||
) -> Score:
|
||||
@ -316,16 +316,25 @@ class IssueIdentifier:
|
||||
|
||||
# If there is no URL return 100
|
||||
if not primary_img_url:
|
||||
return Score(score=100, url="", remote_hash=0)
|
||||
return Score(score=100, url="", remote_hash=0, local_hash=0, local_hash_name="0")
|
||||
|
||||
self._user_canceled()
|
||||
|
||||
urls = [primary_img_url]
|
||||
if use_alt_urls:
|
||||
urls.extend(alt_urls)
|
||||
self.log_msg(f"[{len(alt_urls)} alt. covers]")
|
||||
remote_hashes = []
|
||||
# If the cover is ImageHash and the alternate covers are URLs, the alts will not be hashed/checked currently
|
||||
if isinstance(primary_img_url, ImageHash):
|
||||
# ImageHash doesn't have a url so we just give it an empty string
|
||||
remote_hashes.append(("", primary_img_url.Hash))
|
||||
if use_alt_urls and alt_urls:
|
||||
remote_hashes.extend(("", alt_hash.Hash) for alt_hash in alt_urls if isinstance(alt_hash, ImageHash))
|
||||
else:
|
||||
urls = [primary_img_url]
|
||||
if use_alt_urls:
|
||||
only_urls = [url for url in alt_urls if isinstance(url, str)]
|
||||
urls.extend(only_urls)
|
||||
self.log_msg(f"[{len(only_urls)} alt. covers]")
|
||||
|
||||
remote_hashes = self._get_remote_hashes(urls)
|
||||
remote_hashes = self._get_remote_hashes(urls)
|
||||
|
||||
score_list = []
|
||||
done = False
|
||||
@ -490,7 +499,7 @@ class IssueIdentifier:
|
||||
def _calculate_hashes(self, images: list[tuple[str, Image.Image]]) -> list[tuple[str, int]]:
|
||||
hashes = []
|
||||
for name, image in images:
|
||||
hashes.append((name, ImageHasher(image=image).average_hash()))
|
||||
hashes.append((name, self.calculate_hash(image=image)))
|
||||
return hashes
|
||||
|
||||
def _match_covers(
|
||||
@ -516,10 +525,14 @@ class IssueIdentifier:
|
||||
)
|
||||
|
||||
try:
|
||||
image_url = issue._cover_image or ""
|
||||
alt_urls = issue._alternate_images
|
||||
image_url = issue._cover_image if isinstance(issue._cover_image, str) else ""
|
||||
# We only include urls in the IssueResult so we don't have to deal with it down the line
|
||||
# TODO: display the hash to the user so they know a direct hash was used instead of downloading an image
|
||||
alt_urls: list[str] = [url for url in issue._alternate_images if isinstance(url, str)]
|
||||
|
||||
score_item = self._get_issue_cover_match_score(image_url, alt_urls, hashes, use_alt_urls=use_alternates)
|
||||
score_item = self._get_issue_cover_match_score(
|
||||
image_url, issue._alternate_images, hashes, use_alt_urls=use_alternates
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(f"Scoring series{alternate} covers failed")
|
||||
return []
|
||||
@ -620,6 +633,16 @@ class IssueIdentifier:
|
||||
extra_images: list[tuple[str, Image.Image]],
|
||||
issues: list[tuple[ComicSeries, GenericMetadata]],
|
||||
) -> list[IssueResult]:
|
||||
# Set hashing kind, will presume all hashes are of the same kind
|
||||
for series, issue in issues:
|
||||
if isinstance(issue._cover_image, ImageHash):
|
||||
if issue._cover_image.Kind == "phash":
|
||||
self.image_hasher = 3
|
||||
break
|
||||
elif issue._cover_image.Kind == "ahash":
|
||||
self.image_hasher = 1 # Set to 1 on init but might as well be sure
|
||||
break
|
||||
|
||||
cover_matching_1 = self._match_covers(terms, images, issues, use_alternates=False)
|
||||
|
||||
if len(cover_matching_1) == 0:
|
||||
|
@ -221,7 +221,10 @@ class IssueSelectionWindow(QtWidgets.QDialog):
|
||||
QtWidgets.QApplication.restoreOverrideCursor()
|
||||
|
||||
self.issue_number = issue.issue or ""
|
||||
self.coverWidget.set_issue_details(self.issue_id, [issue._cover_image or "", *issue._alternate_images])
|
||||
# We don't currently have a way to display hashes to the user
|
||||
# TODO: display the hash to the user so they know it will be used for cover matching
|
||||
alt_images = [url for url in issue._alternate_images if isinstance(url, str)]
|
||||
self.coverWidget.set_issue_details(self.issue_id, [str(issue._cover_image) or "", *alt_images])
|
||||
if issue.description is None:
|
||||
self.set_description(self.teDescription, "")
|
||||
else:
|
||||
|
@ -288,3 +288,76 @@ metadata_prepared = (
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
issueidentifier_score = (
|
||||
(
|
||||
(
|
||||
comicapi.genericmetadata.ImageHash(
|
||||
Hash=0, # Force using the alternate, since the alternate is a url it will be ignored
|
||||
Kind="ahash",
|
||||
),
|
||||
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
|
||||
True,
|
||||
),
|
||||
{
|
||||
"remote_hash": 0,
|
||||
"score": 31,
|
||||
"url": "",
|
||||
"local_hash": 212201432349720,
|
||||
"local_hash_name": "Cover 1",
|
||||
},
|
||||
),
|
||||
(
|
||||
(
|
||||
comicapi.genericmetadata.ImageHash(
|
||||
Hash=0,
|
||||
Kind="ahash",
|
||||
),
|
||||
[
|
||||
comicapi.genericmetadata.ImageHash(
|
||||
Hash=212201432349720,
|
||||
Kind="ahash",
|
||||
),
|
||||
],
|
||||
True,
|
||||
),
|
||||
{
|
||||
"remote_hash": 212201432349720,
|
||||
"score": 0,
|
||||
"url": "",
|
||||
"local_hash": 212201432349720,
|
||||
"local_hash_name": "Cover 1",
|
||||
},
|
||||
),
|
||||
(
|
||||
(
|
||||
comicapi.genericmetadata.ImageHash(
|
||||
Hash=212201432349720,
|
||||
Kind="ahash",
|
||||
),
|
||||
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
|
||||
False,
|
||||
),
|
||||
{
|
||||
"remote_hash": 212201432349720,
|
||||
"score": 0,
|
||||
"url": "",
|
||||
"local_hash": 212201432349720,
|
||||
"local_hash_name": "Cover 1",
|
||||
},
|
||||
),
|
||||
(
|
||||
(
|
||||
"https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
|
||||
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
|
||||
False,
|
||||
),
|
||||
{
|
||||
"remote_hash": 212201432349720,
|
||||
"score": 0,
|
||||
"url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
|
||||
"local_hash": 212201432349720,
|
||||
"local_hash_name": "Cover 1",
|
||||
},
|
||||
),
|
||||
)
|
||||
|
@ -9,6 +9,7 @@ import comictaggerlib.imagehasher
|
||||
import comictaggerlib.issueidentifier
|
||||
import testing.comicdata
|
||||
import testing.comicvine
|
||||
from comicapi.genericmetadata import ImageHash
|
||||
from comictaggerlib.resulttypes import IssueResult
|
||||
|
||||
|
||||
@ -36,21 +37,22 @@ def test_get_search_keys(cbz, config, additional_md, expected, comicvine_api):
|
||||
assert expected == ii._get_search_keys(additional_md)
|
||||
|
||||
|
||||
def test_get_issue_cover_match_score(cbz, config, comicvine_api):
|
||||
@pytest.mark.parametrize("data, expected", testing.comicdata.issueidentifier_score)
|
||||
def test_get_issue_cover_match_score(
|
||||
cbz,
|
||||
config,
|
||||
comicvine_api,
|
||||
data: tuple[str | ImageHash, list[str | ImageHash], bool],
|
||||
expected: comictaggerlib.issueidentifier.Score,
|
||||
):
|
||||
config, definitions = config
|
||||
ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, config, comicvine_api)
|
||||
score = ii._get_issue_cover_match_score(
|
||||
"https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
|
||||
["https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/"],
|
||||
[("Cover 1", ii.calculate_hash(cbz.get_page(0)))],
|
||||
primary_img_url=data[0],
|
||||
alt_urls=data[1],
|
||||
local_hashes=[("Cover 1", ii.calculate_hash(cbz.get_page(0)))],
|
||||
use_alt_urls=data[2],
|
||||
)
|
||||
expected = {
|
||||
"remote_hash": 212201432349720,
|
||||
"score": 0,
|
||||
"url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg",
|
||||
"local_hash": 212201432349720,
|
||||
"local_hash_name": "Cover 1",
|
||||
}
|
||||
assert expected == score
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user