189 lines
6.6 KiB
Python
189 lines
6.6 KiB
Python
"""A class to manage creating image content hashes, and calculate hamming distances"""
|
|
#
|
|
# Copyright 2013 ComicTagger Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import logging
|
|
from functools import reduce
|
|
from typing import TypeVar
|
|
|
|
try:
|
|
from PIL import Image
|
|
|
|
pil_available = True
|
|
except ImportError:
|
|
pil_available = False
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ImageHasher:
|
|
def __init__(self, path: str | None = None, data: bytes = b"", width: int = 8, height: int = 8) -> None:
|
|
self.width = width
|
|
self.height = height
|
|
|
|
if path is None and not data:
|
|
raise OSError
|
|
|
|
try:
|
|
if path is not None:
|
|
self.image = Image.open(path)
|
|
else:
|
|
self.image = Image.open(io.BytesIO(data))
|
|
except Exception:
|
|
logger.exception("Image data seems corrupted!")
|
|
# just generate a bogus image
|
|
self.image = Image.new("L", (1, 1))
|
|
|
|
def average_hash(self) -> int:
|
|
try:
|
|
image = self.image.resize((self.width, self.height), Image.Resampling.LANCZOS).convert("L")
|
|
except Exception:
|
|
logger.exception("average_hash error")
|
|
return 0
|
|
|
|
pixels = list(image.getdata())
|
|
avg = sum(pixels) / len(pixels)
|
|
|
|
def compare_value_to_avg(i: int) -> int:
|
|
return 1 if i > avg else 0
|
|
|
|
bitlist = list(map(compare_value_to_avg, pixels))
|
|
|
|
# build up an int value from the bit list, one bit at a time
|
|
def set_bit(x: int, idx_val: tuple[int, int]) -> int:
|
|
(idx, val) = idx_val
|
|
return x | (val << idx)
|
|
|
|
result = reduce(set_bit, enumerate(bitlist), 0)
|
|
|
|
return result
|
|
|
|
def average_hash2(self) -> None:
|
|
"""
|
|
# Got this one from somewhere on the net. Not a clue how the 'convolve2d' works!
|
|
|
|
from numpy import array
|
|
from scipy.signal import convolve2d
|
|
|
|
im = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert('L')
|
|
|
|
in_data = array((im.getdata())).reshape(self.width, self.height)
|
|
filt = array([[0,1,0],[1,-4,1],[0,1,0]])
|
|
filt_data = convolve2d(in_data,filt,mode='same',boundary='symm').flatten()
|
|
|
|
result = reduce(lambda x, (y, z): x | (z << y),
|
|
enumerate(map(lambda i: 0 if i < 0 else 1, filt_data)),
|
|
0)
|
|
return result
|
|
"""
|
|
|
|
def dct_average_hash(self) -> None:
|
|
"""
|
|
# Algorithm source: http://syntaxcandy.blogspot.com/2012/08/perceptual-hash.html
|
|
|
|
1. Reduce size. Like Average Hash, pHash starts with a small image.
|
|
However, the image is larger than 8x8; 32x32 is a good size. This
|
|
is really done to simplify the DCT computation and not because it
|
|
is needed to reduce the high frequencies.
|
|
|
|
2. Reduce color. The image is reduced to a grayscale just to further
|
|
simplify the number of computations.
|
|
|
|
3. Compute the DCT. The DCT separates the image into a collection of
|
|
frequencies and scalars. While JPEG uses an 8x8 DCT, this algorithm
|
|
uses a 32x32 DCT.
|
|
|
|
4. Reduce the DCT. This is the magic step. While the DCT is 32x32,
|
|
just keep the top-left 8x8. Those represent the lowest frequencies in
|
|
the picture.
|
|
|
|
5. Compute the average value. Like the Average Hash, compute the mean DCT
|
|
value (using only the 8x8 DCT low-frequency values and excluding the first
|
|
term since the DC coefficient can be significantly different from the other
|
|
values and will throw off the average). Thanks to David Starkweather for the
|
|
added information about pHash. He wrote: "the dct hash is based on the low 2D
|
|
DCT coefficients starting at the second from lowest, leaving out the first DC
|
|
term. This excludes completely flat image information (i.e. solid colors) from
|
|
being included in the hash description."
|
|
|
|
6. Further reduce the DCT. This is the magic step. Set the 64 hash bits to 0 or
|
|
1 depending on whether each of the 64 DCT values is above or below the average
|
|
value. The result doesn't tell us the actual low frequencies; it just tells us
|
|
the very-rough relative scale of the frequencies to the mean. The result will not
|
|
vary as long as the overall structure of the image remains the same; this can
|
|
survive gamma and color histogram adjustments without a problem.
|
|
|
|
7. Construct the hash. Set the 64 bits into a 64-bit integer. The order does not
|
|
matter, just as long as you are consistent.
|
|
|
|
|
|
import numpy
|
|
import scipy.fftpack
|
|
numpy.set_printoptions(threshold=10000, linewidth=200, precision=2, suppress=True)
|
|
|
|
# Step 1,2
|
|
im = self.image.resize((32, 32), Image.ANTIALIAS).convert("L")
|
|
in_data = numpy.asarray(im)
|
|
|
|
# Step 3
|
|
dct = scipy.fftpack.dct(in_data.astype(float))
|
|
|
|
# Step 4
|
|
# Just skip the top and left rows when slicing, as suggested somewhere else...
|
|
lofreq_dct = dct[1:9, 1:9].flatten()
|
|
|
|
# Step 5
|
|
avg = (lofreq_dct.sum()) / (lofreq_dct.size)
|
|
median = numpy.median(lofreq_dct)
|
|
|
|
thresh = avg
|
|
|
|
# Step 6
|
|
def compare_value_to_thresh(i):
|
|
return (1 if i > thresh else 0)
|
|
|
|
bitlist = map(compare_value_to_thresh, lofreq_dct)
|
|
|
|
#Step 7
|
|
def set_bit(x, (idx, val)):
|
|
return (x | (val << idx))
|
|
|
|
result = reduce(set_bit, enumerate(bitlist), long(0))
|
|
|
|
|
|
return result
|
|
"""
|
|
|
|
# accepts 2 hashes (longs or hex strings) and returns the hamming distance
|
|
|
|
T = TypeVar("T", int, str)
|
|
|
|
@staticmethod
|
|
def hamming_distance(h1: T, h2: T) -> int:
|
|
if isinstance(h1, int) or isinstance(h2, int):
|
|
n1 = h1
|
|
n2 = h2
|
|
else:
|
|
# convert hex strings to ints
|
|
n1 = int(h1, 16)
|
|
n2 = int(h2, 16)
|
|
|
|
# xor the two numbers
|
|
n = n1 ^ n2
|
|
|
|
# count up the 1's in the binary string
|
|
return sum(b == "1" for b in bin(n)[2:])
|