diff --git a/comicvinecacher.py b/comicvinecacher.py index a7afe66..1284e2d 100644 --- a/comicvinecacher.py +++ b/comicvinecacher.py @@ -76,6 +76,8 @@ class ComicVineCacher: "image_hash TEXT," + "thumb_image_url TEXT," + "thumb_image_hash TEXT," + + "publish_month TEXT," + + "publish_year TEXT," + "timestamp TEXT," + "PRIMARY KEY (id ) )" ) @@ -229,7 +231,7 @@ class ComicVineCacher: return result - def add_issue_image_url( self, issue_id, image_url, thumb_image_url ): + def add_issue_select_details( self, issue_id, image_url, thumb_image_url, publish_month, publish_year ): con = lite.connect( self.db_file ) @@ -240,25 +242,27 @@ class ComicVineCacher: data = { "image_url": image_url, "thumb_image_url": thumb_image_url, + "publish_month": publish_month, + "publish_year": publish_year, "timestamp": timestamp } self.upsert( cur, "issues" , "id", issue_id, data) - def get_issue_image_url( self, issue_id ): + def get_issue_select_details( self, issue_id ): con = lite.connect( self.db_file ) with con: cur = con.cursor() - cur.execute("SELECT image_url,thumb_image_url FROM Issues WHERE id=?", [ issue_id ]) + cur.execute("SELECT image_url,thumb_image_url,publish_month,publish_year FROM Issues WHERE id=?", [ issue_id ]) row = cur.fetchone() if row[0] is None : - return None, None + return None, None, None, None else: - return row[0],row[1] + return row[0],row[1],row[2],row[3] def upsert( self, cur, tablename, pkname, pkval, data): diff --git a/comicvinetalker.py b/comicvinetalker.py index b47712c..75efc84 100644 --- a/comicvinetalker.py +++ b/comicvinetalker.py @@ -250,38 +250,47 @@ class ComicVineTalker(QObject): return newstring - + def fetchIssueDate( self, issue_id ): + image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id ) + return month, year + def fetchIssueCoverURLs( self, issue_id ): + image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id ) + return image_url, thumb_url + + def fetchIssueSelectDetails( self, issue_id ): - cached_image_url,cached_thumb_url = self.fetchCachedIssueCoverURLs( issue_id ) + cached_image_url,cached_thumb_url,cached_month,cached_year = self.fetchCachedIssueSelectDetails( issue_id ) if cached_image_url is not None: - return cached_image_url,cached_thumb_url + return cached_image_url,cached_thumb_url, cached_month, cached_year - issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image" + issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,publish_month,publish_year" resp = urllib2.urlopen(issue_url) content = resp.read() cv_response = json.loads(content) if cv_response[ 'status_code' ] != 1: print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )) - return None, None + return None, None,None,None image_url = cv_response['results']['image']['super_url'] thumb_url = cv_response['results']['image']['thumb_url'] + year = cv_response['results']['publish_year'] + month = cv_response['results']['publish_month'] if image_url is not None: - self.cacheIssueCoverURLs( issue_id, image_url,thumb_url ) - return image_url,thumb_url + self.cacheIssueSelectDetails( issue_id, image_url,thumb_url, month, year ) + return image_url,thumb_url,month,year - def fetchCachedIssueCoverURLs( self, issue_id ): + def fetchCachedIssueSelectDetails( self, issue_id ): # before we search online, look in our cache, since we might already # have this info cvc = ComicVineCacher( ComicTaggerSettings.getSettingsFolder() ) - return cvc.get_issue_image_url( issue_id ) + return cvc.get_issue_select_details( issue_id ) - def cacheIssueCoverURLs( self, issue_id, image_url,thumb_url ): + def cacheIssueSelectDetails( self, issue_id, image_url, thumb_url, month, year ): cvc = ComicVineCacher( ComicTaggerSettings.getSettingsFolder() ) - cvc.add_issue_image_url( issue_id, image_url, thumb_url ) + cvc.add_issue_select_details( issue_id, image_url, thumb_url, month, year ) #--------------------------------------------------------------------------- @@ -290,12 +299,12 @@ class ComicVineTalker(QObject): def asyncFetchIssueCoverURLs( self, issue_id ): self.issue_id = issue_id - cached_image_url,cached_thumb_url = self.fetchCachedIssueCoverURLs( issue_id ) + cached_image_url,cached_thumb_url,month,year = self.fetchCachedIssueSelectDetails( issue_id ) if cached_image_url is not None: self.urlFetchComplete.emit( cached_image_url,cached_thumb_url, self.issue_id ) return - issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image" + issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,publish_month,publish_year" self.nam = QNetworkAccessManager() self.nam.finished.connect( self.asyncFetchIssueCoverURLComplete ) self.nam.get(QNetworkRequest(QUrl(issue_url))) @@ -311,8 +320,10 @@ class ComicVineTalker(QObject): image_url = cv_response['results']['image']['super_url'] thumb_url = cv_response['results']['image']['thumb_url'] + year = cv_response['results']['publish_year'] + month = cv_response['results']['publish_month'] - self.cacheIssueCoverURLs( self.issue_id, image_url, thumb_url ) + self.cacheIssueSelectDetails( self.issue_id, image_url, thumb_url, month, year ) self.urlFetchComplete.emit( image_url, thumb_url, self.issue_id ) diff --git a/issueidentifier.py b/issueidentifier.py index a5b3818..67e89bd 100644 --- a/issueidentifier.py +++ b/issueidentifier.py @@ -43,14 +43,26 @@ class IssueIdentifier: def __init__(self, comic_archive, cv_api_key ): self.comic_archive = comic_archive self.image_hasher = 1 - self.additional_metadata = None - self.min_score_thresh = 22 + + self.onlyUseAdditionalMetaData = False + + # a decent hamming score, good enough to call it a match + self.min_score_thresh = 20 + + # the min distance a hamming score must be to separate itself from closest neighbor self.min_score_distance = 2 + + # a very strong hamming score, almost certainly the same image self.strong_score_thresh = 8 + + # used to eliminate series names that are too long based on our search string + self.length_delta_thresh = 3 + self.additional_metadata = GenericMetadata() self.cv_api_key = cv_api_key self.output_function = IssueIdentifier.defaultWriteOutput self.callback = None + self.search_result = self.ResultNoMatches def setScoreMinThreshold( self, thresh ): self.min_score_thresh = thresh @@ -91,7 +103,14 @@ class IssueIdentifier: if ca is None: return - + + if self.onlyUseAdditionalMetaData: + search_keys['series'] = self.additional_metadata.series + search_keys['issue_number'] = self.additional_metadata.issueNumber + search_keys['year'] = self.additional_metadata.publicationYear + search_keys['month'] = self.additional_metadata.publicationMonth + return search_keys + # see if the archive has any useful meta data for searching with if ca.hasCIX(): internal_metadata = ca.readCIX() @@ -128,7 +147,7 @@ class IssueIdentifier: search_keys['year'] = internal_metadata.publicationYear else: search_keys['year'] = md_from_filename.publicationYear - + if self.additional_metadata.publicationMonth is not None: search_keys['month'] = self.additional_metadata.publicationMonth elif internal_metadata.publicationMonth is not None: @@ -156,7 +175,7 @@ class IssueIdentifier: if not ca.seemsToBeAComicArchive(): self.log_msg( "Sorry, but "+ opts.filename + " is not a comic archive!") - return [] + return self.ResultNoMatches, [] cover_image_data = ca.getCoverPage() @@ -171,7 +190,7 @@ class IssueIdentifier: self.log_msg("Not enough info for a search!") return [] - """ + self.log_msg( "Going to search for:" ) self.log_msg( "Series: " + keys['series'] ) self.log_msg( "Issue : " + keys['issue_number'] ) @@ -179,7 +198,7 @@ class IssueIdentifier: self.log_msg( "Year : " + keys['year'] ) if keys['month'] is not None: self.log_msg( "Month : " + keys['month'] ) - """ + comicVine = ComicVineTalker( self.cv_api_key ) #self.log_msg( ( "Searching for " + keys['series'] + "...") @@ -195,8 +214,10 @@ class IssueIdentifier: #self.log_msg( "Removing results with too long names" ) for item in cv_search_results: - #assume that our search name is close to the actual name, say within 5 characters - if len( utils.removearticles(item['name'])) < len( keys['series'] ) + 5: + #assume that our search name is close to the actual name, say within ,e.g. 5 chars + shortened_key = utils.removearticles(keys['series']) + shortened_item_name = utils.removearticles(item['name']) + if len( shortened_item_name ) < ( len( shortened_key ) + self.length_delta_thresh) : series_shortlist.append(item) # if we don't think it's an issue number 1, remove any series' that are one-shots @@ -241,6 +262,17 @@ class IssueIdentifier: if num_s == keys['issue_number']: # found a matching issue number! now get the issue data img_url, thumb_url = comicVine.fetchIssueCoverURLs( issue['id'] ) + month, year = comicVine.fetchIssueDate( issue['id'] ) + + if self.cancel == True: + self.match_list = [] + return self.match_list + + # now, if we have an issue year key given, reject this one if not a match + if keys['year'] is not None: + if keys['year'] != year: + break + url_image_data = ImageFetcher().fetch(thumb_url, blocking=True) if self.cancel == True: @@ -258,6 +290,8 @@ class IssueIdentifier: match['img_url'] = thumb_url match['issue_id'] = issue['id'] match['volume_id'] = series['id'] + match['month'] = month + match['year'] = year self.match_list.append(match) self.log_msg( " --> {0}".format(match['distance']), newline=False ) @@ -268,8 +302,10 @@ class IssueIdentifier: if len(self.match_list) == 0: self.log_msg( ":-( no matches!" ) + self.search_result = self.ResultNoMatches return self.match_list + # sort list by image match scores self.match_list.sort(key=lambda k: k['distance']) @@ -281,20 +317,22 @@ class IssueIdentifier: self.log_msg( str(l)) def print_match(item): - self.log_msg( u"-----> {0} #{1} {2} -- score: {3}".format( + self.log_msg( u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format( item['series'], item['issue_number'], item['issue_title'], + item['month'], + item['year'], item['distance']) ) best_score = self.match_list[0]['distance'] if len(self.match_list) == 1: + self.search_result = self.ResultOneGoodMatch if best_score > self.min_score_thresh: self.log_msg( "!!!! Very weak score for the cover. Maybe it's not the cover?" ) - - self.log_msg( "Comparing other pages now..." ) + self.log_msg( "Comparing other archive pages now..." ) found = False for i in range(ca.getNumberOfPages()): image_data = ca.getPage(i) @@ -311,12 +349,15 @@ class IssueIdentifier: self.log_msg( "" ) if not found: self.log_msg( "No matching pages in the issue. Bummer" ) + self.search_result = self.ResultFoundMatchButBadCoverScore print_match(self.match_list[0]) return self.match_list elif best_score > self.min_score_thresh and len(self.match_list) > 1: self.log_msg( "No good image matches! Need to use other info..." ) + self.search_result = self.ResultMultipleMatchesWithBadImageScores + return self.match_list #now pare down list, remove any item more than specified distant from the top scores @@ -326,11 +367,15 @@ class IssueIdentifier: if len(self.match_list) == 1: print_match(self.match_list[0]) + self.search_result = self.ResultOneGoodMatch + elif len(self.match_list) == 0: self.log_msg( "No matches found :(" ) + self.search_result = self.ResultNoMatches else: print - self.log_msg( "More than one likley candiate. Maybe a lexical comparison??" ) + self.log_msg( "More than one likley candiate." ) + self.search_result = self.ResultMultipleGoodMatches for item in self.match_list: print_match(item) diff --git a/taggerwindow.py b/taggerwindow.py index c9f1f76..02aa270 100644 --- a/taggerwindow.py +++ b/taggerwindow.py @@ -848,8 +848,12 @@ class TaggerWindow( QtGui.QMainWindow): return issue_number = str(self.leIssueNum.text()).strip() - - selector = VolumeSelectionWindow( self, self.settings.cv_api_key, series_name, issue_number, self.comic_archive, self.settings, autoselect ) + + year = str(self.lePubYear.text()).strip() + if year == "": + year = None + + selector = VolumeSelectionWindow( self, self.settings.cv_api_key, series_name, issue_number, year, self.comic_archive, self.settings, autoselect ) title = "Search: '" + series_name + "' - " selector.setWindowTitle( title + "Select Series") diff --git a/todo.txt b/todo.txt index 7cd3afc..b258648 100644 --- a/todo.txt +++ b/todo.txt @@ -3,12 +3,8 @@ Features ---------------- - Auto-select: - msgbox on autoselect failure, or warning Multi-match dialog - More auto-select logic using metadata - Maybe, if only one match, but bad score, compare each page in the archive to online cover Check aspect ratio, and maybe break cover into two parts for hashing? Stand-alone CLI @@ -19,7 +15,7 @@ Stand-alone CLI TaggerWindow entry fields - Special tabbed Dialogs needed for: + Special tabbed Dialog needed for: Pages Info - maybe a custom painted widget At minimum, preserve the page data @@ -44,7 +40,7 @@ Disable CBL for RAR SERIOUS BUG: rebuilding zips! http://stackoverflow.com/questions/11578443/trigger-io-errno-18-cross-device-link -MAC: +OSX: toolbar weird unrar complaints Page browser sizing @@ -75,8 +71,7 @@ Image Hashes: Filename parsing: Concatenation of Name and Issue?? "1602" - -Issue identifier - compare names with aricles removed + App option to covert RAR to ZIP diff --git a/utils.py b/utils.py index 773c047..1ef17ba 100644 --- a/utils.py +++ b/utils.py @@ -54,6 +54,7 @@ def addtopath( dir ): os.environ['PATH'] = dir + os.pathsep + os.environ['PATH'] def removearticles( text ): + text = text.lower() articles = ['and', 'the', 'a', '&' ] newText = '' for word in text.split(' '): diff --git a/volumeselectionwindow.py b/volumeselectionwindow.py index 874068a..bf9e8dd 100644 --- a/volumeselectionwindow.py +++ b/volumeselectionwindow.py @@ -77,12 +77,11 @@ class IdentifyThread( QtCore.QThread): def run(self): matches =self.identifier.search() self.identifyComplete.emit( ) - class VolumeSelectionWindow(QtGui.QDialog): - def __init__(self, parent, cv_api_key, series_name, issue_number, comic_archive, settings, autoselect=False): + def __init__(self, parent, cv_api_key, series_name, issue_number, year, comic_archive, settings, autoselect=False): super(VolumeSelectionWindow, self).__init__(parent) uic.loadUi(os.path.join(ComicTaggerSettings.baseDir(), 'volumeselectionwindow.ui' ), self) @@ -90,6 +89,7 @@ class VolumeSelectionWindow(QtGui.QDialog): self.settings = settings self.series_name = series_name self.issue_number = issue_number + self.year = year self.cv_api_key = cv_api_key self.volume_id = 0 self.comic_archive = comic_archive @@ -121,7 +121,10 @@ class VolumeSelectionWindow(QtGui.QDialog): md = GenericMetadata() md.series = self.series_name md.issueNumber = self.issue_number + md.publicationYear = self.year + self.ii.setAdditionalMetadata( md ) + self.ii.onlyUseAdditionalMetaData = True self.id_thread = IdentifyThread( self.ii ) self.id_thread.identifyComplete.connect( self.identifyComplete ) @@ -147,7 +150,25 @@ class VolumeSelectionWindow(QtGui.QDialog): def identifyComplete( self ): matches = self.ii.match_list - if len(matches) == 1: + result = self.ii.search_result + + found_match = False + if result == self.ii.ResultNoMatches: + QtGui.QMessageBox.information(self,"Auto-Select Result", " No matches found :-(") + elif result == self.ii.ResultFoundMatchButBadCoverScore: + QtGui.QMessageBox.information(self,"Auto-Select Result", " Found a match, but cover doesn't seem to match. Verify before commiting!") + found_match = True + elif result == self.ii.ResultFoundMatchButNotFirstPage : + QtGui.QMessageBox.information(self,"Auto-Select Result", " Found a match, but not with the first page of the archive.") + found_match = True + elif result == self.ii.ResultMultipleMatchesWithBadImageScores: + QtGui.QMessageBox.information(self,"Auto-Select Result", " Found some possibilities, but no confidence. Proceed manually.") + elif result == self.ii.ResultOneGoodMatch: + found_match = True + elif result == self.ii.ResultMultipleGoodMatches: + QtGui.QMessageBox.information(self,"Auto-Select Result", " Found multiple likely matches! Selection DIALOG TBD.") + + if found_match: self.iddialog.accept() print "VolumeSelectionWindow found a match!!", matches[0]['volume_id'], matches[0]['issue_number']