diff --git a/comictaggerlib/imagehasher.py b/comictaggerlib/imagehasher.py index f30270c..573a7cd 100644 --- a/comictaggerlib/imagehasher.py +++ b/comictaggerlib/imagehasher.py @@ -17,7 +17,9 @@ from __future__ import annotations import io import logging +import math from functools import reduce +from statistics import median from typing import TypeVar try: @@ -90,82 +92,80 @@ class ImageHasher: return result """ - def dct_average_hash(self) -> None: + def p_hash(self) -> int: + """ + Pure python version of Perceptual Hash computation of https://github.com/JohannesBuchner/imagehash/tree/master + Implementation follows http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html """ - # Algorithm source: http://syntaxcandy.blogspot.com/2012/08/perceptual-hash.html - 1. Reduce size. Like Average Hash, pHash starts with a small image. - However, the image is larger than 8x8; 32x32 is a good size. This - is really done to simplify the DCT computation and not because it - is needed to reduce the high frequencies. + def generate_dct2(block, axis=0): + def dct1(block): + """Perform 1D Discrete Cosine Transform (DCT) on a given block.""" + N = len(block) + dct_block = [0.0] * N - 2. Reduce color. The image is reduced to a grayscale just to further - simplify the number of computations. + for k in range(N): + sum_val = 0.0 + for n in range(N): + cos_val = math.cos(math.pi * k * (2 * n + 1) / (2 * N)) + sum_val += block[n] * cos_val + dct_block[k] = sum_val - 3. Compute the DCT. The DCT separates the image into a collection of - frequencies and scalars. While JPEG uses an 8x8 DCT, this algorithm - uses a 32x32 DCT. + return dct_block - 4. Reduce the DCT. This is the magic step. While the DCT is 32x32, - just keep the top-left 8x8. Those represent the lowest frequencies in - the picture. + """Perform 2D Discrete Cosine Transform (DCT) on a given block along the specified axis.""" + rows = len(block) + cols = len(block[0]) + dct_block = [[0.0] * cols for _ in range(rows)] - 5. Compute the average value. Like the Average Hash, compute the mean DCT - value (using only the 8x8 DCT low-frequency values and excluding the first - term since the DC coefficient can be significantly different from the other - values and will throw off the average). Thanks to David Starkweather for the - added information about pHash. He wrote: "the dct hash is based on the low 2D - DCT coefficients starting at the second from lowest, leaving out the first DC - term. This excludes completely flat image information (i.e. solid colors) from - being included in the hash description." + if axis == 0: + # Apply 1D DCT on each row + for i in range(rows): + dct_block[i] = dct1(block[i]) + elif axis == 1: + # Apply 1D DCT on each column + for j in range(cols): + column = [block[i][j] for i in range(rows)] + dct_column = dct1(column) + for i in range(rows): + dct_block[i][j] = dct_column[i] + else: + raise ValueError("Invalid axis value. Must be either 0 or 1.") - 6. Further reduce the DCT. This is the magic step. Set the 64 hash bits to 0 or - 1 depending on whether each of the 64 DCT values is above or below the average - value. The result doesn't tell us the actual low frequencies; it just tells us - the very-rough relative scale of the frequencies to the mean. The result will not - vary as long as the overall structure of the image remains the same; this can - survive gamma and color histogram adjustments without a problem. + return dct_block - 7. Construct the hash. Set the 64 bits into a 64-bit integer. The order does not - matter, just as long as you are consistent. + def convert_image_to_ndarray(image): + width, height = image.size + pixels2 = [] + for y in range(height): + row = [] + for x in range(width): + pixel = image.getpixel((x, y)) + row.append(pixel) + pixels2.append(row) - import numpy - import scipy.fftpack - numpy.set_printoptions(threshold=10000, linewidth=200, precision=2, suppress=True) + return pixels2 - # Step 1,2 - im = self.image.resize((32, 32), Image.ANTIALIAS).convert("L") - in_data = numpy.asarray(im) + highfreq_factor = 4 + img_size = 8 * highfreq_factor - # Step 3 - dct = scipy.fftpack.dct(in_data.astype(float)) + try: + image = self.image.convert("L").resize((img_size, img_size), Image.Resampling.LANCZOS) + except Exception: + logger.exception("p_hash error converting to greyscale and resizing") + return 0 - # Step 4 - # Just skip the top and left rows when slicing, as suggested somewhere else... - lofreq_dct = dct[1:9, 1:9].flatten() - - # Step 5 - avg = (lofreq_dct.sum()) / (lofreq_dct.size) - median = numpy.median(lofreq_dct) - - thresh = avg - - # Step 6 - def compare_value_to_thresh(i): - return (1 if i > thresh else 0) - - bitlist = map(compare_value_to_thresh, lofreq_dct) - - #Step 7 - def set_bit(x, (idx, val)): - return (x | (val << idx)) - - result = reduce(set_bit, enumerate(bitlist), long(0)) + pixels = convert_image_to_ndarray(image) + dct = generate_dct2(generate_dct2(pixels, axis=0), axis=1) + dctlowfreq = [row[:8] for row in dct[:8]] + med = median([item for sublist in dctlowfreq for item in sublist]) + # Convert to a bit string + diff = "".join(str(int(item > med)) for row in dctlowfreq for item in row) + result = int(diff, 2) return result - """ # accepts 2 hashes (longs or hex strings) and returns the hamming distance diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index d0514b7..e6f35e1 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -134,7 +134,7 @@ class IssueIdentifier: def calculate_hash(self, image_data: bytes) -> int: if self.image_hasher == 3: - return -1 # ImageHasher(data=image_data).dct_average_hash() + return ImageHasher(data=image_data).p_hash() if self.image_hasher == 2: return -1 # ImageHasher(data=image_data).average_hash2()