From cc9c963c5b4d8dfbc8cda64dc726204799483841 Mon Sep 17 00:00:00 2001 From: "beville@gmail.com" Date: Thu, 8 Nov 2012 06:25:29 +0000 Subject: [PATCH] Took a whack at perceptual image hashing. Ugh. git-svn-id: http://comictagger.googlecode.com/svn/trunk@17 6c5673fe-1810-88d6-992b-cd32ca31540c --- imagehasher.py | 132 ++++++++++++++++++++++++++++++++++++++++++++----- tagger.py | 17 +++++-- todo.txt | 10 ++++ 3 files changed, 143 insertions(+), 16 deletions(-) diff --git a/imagehasher.py b/imagehasher.py index cfa771e..e626e1e 100755 --- a/imagehasher.py +++ b/imagehasher.py @@ -1,15 +1,18 @@ import Image -#import numpy -#import math -#import operator import StringIO +import numpy +import scipy.signal + + #from bitarray import bitarray class ImageHasher(object): - def __init__(self, path=None, data=None, size=8): - self.hash_size = size + def __init__(self, path=None, data=None, width=8, height=8): + #self.hash_size = size + self.width = width + self.height = height if path is None and data is None: raise IOError @@ -19,7 +22,8 @@ class ImageHasher(object): self.image = Image.open(StringIO.StringIO(data)) def average_hash(self): - image = self.image.resize((self.hash_size, self.hash_size), Image.ANTIALIAS).convert("L") + #image = self.image.resize((self.hash_size, self.hash_size), Image.ANTIALIAS).convert("L") + image = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert("L") pixels = list(image.getdata()) avg = sum(pixels) / len(pixels) @@ -35,13 +39,115 @@ class ImageHasher(object): # (Build up a hex string from the binary list of bits) hash = "" binary_string = "".join(diff) - for i in range(0,self.hash_size**2,8): + for i in range(0, self.width*self.height, 8): # 8 bits at time, reverse, for little-endian s = binary_string[i:i+8][::-1] hash = hash + "{0:02x}".format( int(s,2)) return hash + + def average_hash2( self ): + im = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert('L') + + in_data = numpy.array((im.getdata())).reshape(self.width, self.height) + filt = numpy.array([[0,1,0],[1,-4,1],[0,1,0]]) + filt_data = scipy.signal.convolve2d(in_data,filt,mode='same',boundary='symm').flatten() + + result = reduce(lambda x, (y, z): x | (z << y), + enumerate(map(lambda i: 0 if i < 0 else 1, filt_data)), + 0) + return result + + + def perceptual_hash(self): + """ + # Algorithm source: http://syntaxcandy.blogspot.com/2012/08/perceptual-hash.html + + 1. Reduce size. Like Average Hash, pHash starts with a small image. + However, the image is larger than 8x8; 32x32 is a good size. This + is really done to simplify the DCT computation and not because it + is needed to reduce the high frequencies. + + 2. Reduce color. The image is reduced to a grayscale just to further + simplify the number of computations. + + 3. Compute the DCT. The DCT separates the image into a collection of + frequencies and scalars. While JPEG uses an 8x8 DCT, this algorithm + uses a 32x32 DCT. + + 4. Reduce the DCT. This is the magic step. While the DCT is 32x32, + just keep the top-left 8x8. Those represent the lowest frequencies in + the picture. + + 5. Compute the average value. Like the Average Hash, compute the mean DCT + value (using only the 8x8 DCT low-frequency values and excluding the first + term since the DC coefficient can be significantly different from the other + values and will throw off the average). Thanks to David Starkweather for the + added information about pHash. He wrote: "the dct hash is based on the low 2D + DCT coefficients starting at the second from lowest, leaving out the first DC + term. This excludes completely flat image information (i.e. solid colors) from + being included in the hash description." + + 6. Further reduce the DCT. This is the magic step. Set the 64 hash bits to 0 or + 1 depending on whether each of the 64 DCT values is above or below the average + value. The result doesn't tell us the actual low frequencies; it just tells us + the very-rough relative scale of the frequencies to the mean. The result will not + vary as long as the overall structure of the image remains the same; this can + survive gamma and color histogram adjustments without a problem. + + 7. Construct the hash. Set the 64 bits into a 64-bit integer. The order does not + matter, just as long as you are consistent. + """ + + # Step 1,2 + im = self.image.resize((32, 32), Image.ANTIALIAS).convert("L") + in_data = numpy.array(im.getdata(), dtype=numpy.dtype('float')).reshape(self.width, self.height) + #print len(im.getdata()) + #print in_data + + # Step 3 + dct = scipy.fftpack.dct( in_data ) + + # Step 4 + # NO! -- lofreq_dct = dct[:8,:8].flatten() + # NO? -- lofreq_dct = dct[24:32, 24:32].flatten() + lofreq_dct = dct[:8, 24:32].flatten() + #print dct[:8, 24:32] + # NO! -- lofreq_dct = dct[24:32, :8 ].flatten() + + #omit = 0 + #omit = 7 + #omit = 56 + #omit = 63 + + # Step 5 + #avg = ( lofreq_dct.sum() - lofreq_dct[omit] ) / ( lofreq_dct.size - 1 ) + avg = ( lofreq_dct.sum() ) / ( lofreq_dct.size ) + #print lofreq_dct.sum() + #print lofreq_dct[0] + #print avg, lofreq_dct.size + + # Step 6 + def compare_value_to_avg(i): + if i > avg: + return (1) + else: + return (0) + + bitlist = map(compare_value_to_avg, lofreq_dct) + + #Step 7 + def accumulate( accumulator, (idx, val) ): + return (accumulator | (val << idx)) + + result = reduce(accumulate, enumerate(bitlist), long(0)) + + + print "{0:016x}".format(result) + return result + + @staticmethod def count_bits(number): bit = 1 @@ -52,14 +158,18 @@ class ImageHasher(object): bit <<= 1 return count - #accepts 2 hash strings, and returns the hamming distance + #accepts 2 hashes (long or hex strings) and returns the hamming distance @staticmethod def hamming_distance(h1, h2): - # conver hex strings to ints - n1 = long( h1, 16) - n2 = long( h2, 16) + if type(h1) == long: + n1 = h1 + n2 = h2 + else: + # conver hex strings to ints + n1 = long( h1, 16) + n2 = long( h2, 16) # xor the two numbers n = n1 ^ n2 diff --git a/tagger.py b/tagger.py index efb3d8b..628095f 100755 --- a/tagger.py +++ b/tagger.py @@ -52,9 +52,14 @@ def cliProcedure( opts, settings ): return cover_image_data = ca.getCoverPage() - cover_hash = ImageHasher( data=cover_image_data ).average_hash() - print "Cover hash = ",cover_hash - + #cover_hash = ImageHasher( data=cover_image_data ).average_hash() + #print "Cover hash = ",cover_hash + + cover_hash = ImageHasher( data=cover_image_data ).average_hash2() + #print "Cover hash = ",cover_hash + + #cover_hash = ImageHasher( data=cover_image_data , width=32, height=32 ).perceptual_hash() + # see if the archive has any useful meta data for searching with if ca.hasCIX(): internal_metadata = ca.readCIX() @@ -148,8 +153,10 @@ def cliProcedure( opts, settings ): img_url = comicVine.fetchIssueCoverURL( issue['id'] ) #TODO get the URL, and calc hash!! url_image_data = urllib.urlopen(img_url).read() - url_image_hash = ImageHasher( data=url_image_data ).average_hash() - print "-----> ID: {0} #{1} ({2}) Hash: {3} Distance: {4}\n-------> url:{5}".format( + #url_image_hash = ImageHasher( data=url_image_data ).average_hash() + url_image_hash = ImageHasher( data=url_image_data, ).average_hash2() + #url_image_hash = ImageHasher( data=url_image_data, width=32, height=32 ).perceptual_hash() + print u"-----> ID: {0} #{1} ({2}) Hash: {3} Distance: {4}\n-------> url:{5}".format( issue['id'], num_s, issue['name'], url_image_hash, ImageHasher.hamming_distance(cover_hash, url_image_hash), diff --git a/todo.txt b/todo.txt index c6a8b9d..0418315 100644 --- a/todo.txt +++ b/todo.txt @@ -40,6 +40,16 @@ Other settings possibilities: Content Hashes!! +Image Hashes: + Failures of average hash: + Thor 600 Wrap-around w/ different aspect ratio + Bone 3 - Variant Cover, + Old Avengers -- Best match, but high difference + +Filename parsing: + Concatenation of Name and Issue?? + "1602" + App option to covert RAR to ZIP