From 47a61885d9073e10d1d447e60ffec9270ce74445 Mon Sep 17 00:00:00 2001 From: "beville@gmail.com" Date: Sat, 17 Nov 2012 04:36:03 +0000 Subject: [PATCH] Added check for wide cover, and allow for only checking right side git-svn-id: http://comictagger.googlecode.com/svn/trunk@55 6c5673fe-1810-88d6-992b-cd32ca31540c --- issueidentifier.py | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/issueidentifier.py b/issueidentifier.py index 67e89bd..6adf147 100644 --- a/issueidentifier.py +++ b/issueidentifier.py @@ -21,6 +21,8 @@ limitations under the License. import sys import math import urllib2, urllib +import StringIO +import Image from settings import ComicTaggerSettings from comicvinecacher import ComicVineCacher @@ -89,6 +91,26 @@ class IssueIdentifier: else: return ImageHasher( data=image_data ).average_hash() + def getAspectRatio( self, image_data ): + + im = Image.open(StringIO.StringIO(image_data)) + w,h = im.size + return float(h)/float(w) + + def cropCover( self, image_data ): + + im = Image.open(StringIO.StringIO(image_data)) + w,h = im.size + + cropped_im = im.crop( (int(w/2), 0, w, h) ) + output = StringIO.StringIO() + cropped_im.save(output, format="JPEG") + cropped_image_data = output.getvalue() + output.close() + + return cropped_image_data + + def setProgressCallback( self, cb_func ): self.callback = cb_func @@ -178,9 +200,18 @@ class IssueIdentifier: return self.ResultNoMatches, [] cover_image_data = ca.getCoverPage() - cover_hash = self.calculateHash( cover_image_data ) + #check the apect ratio + # if it's wider than it is high, it's probably a two page spread + # if so, crop it and calculate a second hash + narrow_cover_hash = None + aspect_ratio = self.getAspectRatio( cover_image_data ) + if aspect_ratio < 1.0: + right_side_image_data = self.cropCover( cover_image_data ) + narrow_cover_hash = self.calculateHash( right_side_image_data ) + print "narrow_cover_hash", narrow_cover_hash + #self.log_msg( "Cover hash = {0:016x}".format(cover_hash) ) keys = self.getSearchKeys() @@ -190,7 +221,7 @@ class IssueIdentifier: self.log_msg("Not enough info for a search!") return [] - + """ self.log_msg( "Going to search for:" ) self.log_msg( "Series: " + keys['series'] ) self.log_msg( "Issue : " + keys['issue_number'] ) @@ -198,6 +229,7 @@ class IssueIdentifier: self.log_msg( "Year : " + keys['year'] ) if keys['month'] is not None: self.log_msg( "Month : " + keys['month'] ) + """ comicVine = ComicVineTalker( self.cv_api_key ) @@ -280,10 +312,16 @@ class IssueIdentifier: return self.match_list url_image_hash = self.calculateHash( url_image_data ) + score = ImageHasher.hamming_distance(cover_hash, url_image_hash) + # if we have a cropped version of the cover, check that one also, and use the best score + if narrow_cover_hash is not None: + score2 = ImageHasher.hamming_distance(narrow_cover_hash, url_image_hash) + score = min( score, score2 ) + match = dict() match['series'] = "{0} ({1})".format(series['name'], series['start_year']) - match['distance'] = ImageHasher.hamming_distance(cover_hash, url_image_hash) + match['distance'] = score match['issue_number'] = num_s match['url_image_hash'] = url_image_hash match['issue_title'] = issue['name']