Return int instead of hex and revert hamming_distance etc.
This commit is contained in:
parent
14a4055040
commit
3f180612d3
@ -20,6 +20,7 @@ import logging
|
||||
import math
|
||||
from functools import reduce
|
||||
from statistics import median
|
||||
from typing import TypeVar
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
@ -72,9 +73,27 @@ class ImageHasher:
|
||||
|
||||
return result
|
||||
|
||||
def p_hash(self) -> str:
|
||||
def average_hash2(self) -> None:
|
||||
"""
|
||||
# Got this one from somewhere on the net. Not a clue how the 'convolve2d' works!
|
||||
|
||||
from numpy import array
|
||||
from scipy.signal import convolve2d
|
||||
|
||||
im = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert('L')
|
||||
|
||||
in_data = array((im.getdata())).reshape(self.width, self.height)
|
||||
filt = array([[0,1,0],[1,-4,1],[0,1,0]])
|
||||
filt_data = convolve2d(in_data,filt,mode='same',boundary='symm').flatten()
|
||||
|
||||
result = reduce(lambda x, (y, z): x | (z << y),
|
||||
enumerate(map(lambda i: 0 if i < 0 else 1, filt_data)),
|
||||
0)
|
||||
return result
|
||||
"""
|
||||
|
||||
def p_hash(self) -> int:
|
||||
"""
|
||||
Output a hex string
|
||||
Pure python version of Perceptual Hash computation of https://github.com/JohannesBuchner/imagehash/tree/master
|
||||
Implementation follows http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
|
||||
"""
|
||||
@ -135,7 +154,7 @@ class ImageHasher:
|
||||
image = self.image.convert("L").resize((img_size, img_size), Image.Resampling.LANCZOS)
|
||||
except Exception:
|
||||
logger.exception("p_hash error converting to greyscale and resizing")
|
||||
return ""
|
||||
return 0
|
||||
|
||||
pixels = convert_image_to_ndarray(image)
|
||||
dct = generate_dct2(generate_dct2(pixels, axis=0), axis=1)
|
||||
@ -143,29 +162,27 @@ class ImageHasher:
|
||||
med = median([item for sublist in dctlowfreq for item in sublist])
|
||||
# Convert to a bit string
|
||||
diff = "".join(str(int(item > med)) for row in dctlowfreq for item in row)
|
||||
# Convert to hex
|
||||
width = int(math.ceil(len(diff) / 4))
|
||||
result = "{:0>{width}x}".format(int(diff, 2), width=width)
|
||||
|
||||
result = int(diff, 2)
|
||||
|
||||
return result
|
||||
|
||||
# accepts 2 hashes (longs or hex strings) and returns the hamming distance
|
||||
|
||||
T = TypeVar("T", int, str)
|
||||
|
||||
@staticmethod
|
||||
def hamming_distance(h1: int | str, h2: int | str) -> int:
|
||||
if isinstance(h1, int) and isinstance(h2, int):
|
||||
def hamming_distance(h1: T, h2: T) -> int:
|
||||
if isinstance(h1, int) or isinstance(h2, int):
|
||||
n1 = h1
|
||||
n2 = h2
|
||||
elif isinstance(h1, str) and isinstance(h2, str):
|
||||
else:
|
||||
# convert hex strings to ints
|
||||
n1 = int(h1, 16)
|
||||
n2 = int(h2, 16)
|
||||
else:
|
||||
# Mixed hashes or some other problem so return a high number. Should return None instead?
|
||||
return 999
|
||||
|
||||
# xor the two numbers
|
||||
n: int = n1 ^ n2
|
||||
n = n1 ^ n2
|
||||
|
||||
# count up the 1's in the binary string
|
||||
return sum(b == "1" for b in bin(n)[2:])
|
||||
|
@ -53,7 +53,7 @@ class SearchKeys(TypedDict):
|
||||
class Score(TypedDict):
|
||||
score: NotRequired[int]
|
||||
url: str
|
||||
hash: int | str
|
||||
hash: int
|
||||
|
||||
|
||||
class IssueIdentifierNetworkError(Exception):
|
||||
@ -132,9 +132,11 @@ class IssueIdentifier:
|
||||
def set_output_function(self, func: Callable[[str], None]) -> None:
|
||||
self.output_function = func
|
||||
|
||||
def calculate_hash(self, image_data: bytes) -> int | str:
|
||||
if self.image_hasher == 2:
|
||||
def calculate_hash(self, image_data: bytes) -> int:
|
||||
if self.image_hasher == 3:
|
||||
return ImageHasher(data=image_data).p_hash()
|
||||
if self.image_hasher == 2:
|
||||
return -1 # ImageHasher(data=image_data).average_hash2()
|
||||
|
||||
return ImageHasher(data=image_data).average_hash()
|
||||
|
||||
@ -278,7 +280,7 @@ class IssueIdentifier:
|
||||
issue_id: str,
|
||||
primary_img_url: str,
|
||||
alt_urls: list[str],
|
||||
local_cover_hash_list: list[int | str],
|
||||
local_cover_hash_list: list[int],
|
||||
use_remote_alternates: bool = False,
|
||||
use_log: bool = True,
|
||||
) -> Score:
|
||||
|
@ -10,7 +10,7 @@ class IssueResult(TypedDict):
|
||||
distance: int
|
||||
issue_number: str
|
||||
cv_issue_count: int | None
|
||||
url_image_hash: int | str
|
||||
url_image_hash: int
|
||||
issue_title: str
|
||||
issue_id: str
|
||||
series_id: str
|
||||
|
Loading…
Reference in New Issue
Block a user