autoselect now uses the issue year to help filter
git-svn-id: http://comictagger.googlecode.com/svn/trunk@54 6c5673fe-1810-88d6-992b-cd32ca31540c
This commit is contained in:
parent
cdb22347ab
commit
9f9d9a2635
@ -76,6 +76,8 @@ class ComicVineCacher:
|
||||
"image_hash TEXT," +
|
||||
"thumb_image_url TEXT," +
|
||||
"thumb_image_hash TEXT," +
|
||||
"publish_month TEXT," +
|
||||
"publish_year TEXT," +
|
||||
"timestamp TEXT," +
|
||||
"PRIMARY KEY (id ) )"
|
||||
)
|
||||
@ -229,7 +231,7 @@ class ComicVineCacher:
|
||||
return result
|
||||
|
||||
|
||||
def add_issue_image_url( self, issue_id, image_url, thumb_image_url ):
|
||||
def add_issue_select_details( self, issue_id, image_url, thumb_image_url, publish_month, publish_year ):
|
||||
|
||||
con = lite.connect( self.db_file )
|
||||
|
||||
@ -240,25 +242,27 @@ class ComicVineCacher:
|
||||
data = {
|
||||
"image_url": image_url,
|
||||
"thumb_image_url": thumb_image_url,
|
||||
"publish_month": publish_month,
|
||||
"publish_year": publish_year,
|
||||
"timestamp": timestamp
|
||||
}
|
||||
self.upsert( cur, "issues" , "id", issue_id, data)
|
||||
|
||||
|
||||
|
||||
def get_issue_image_url( self, issue_id ):
|
||||
def get_issue_select_details( self, issue_id ):
|
||||
|
||||
con = lite.connect( self.db_file )
|
||||
with con:
|
||||
cur = con.cursor()
|
||||
|
||||
cur.execute("SELECT image_url,thumb_image_url FROM Issues WHERE id=?", [ issue_id ])
|
||||
cur.execute("SELECT image_url,thumb_image_url,publish_month,publish_year FROM Issues WHERE id=?", [ issue_id ])
|
||||
row = cur.fetchone()
|
||||
|
||||
if row[0] is None :
|
||||
return None, None
|
||||
return None, None, None, None
|
||||
else:
|
||||
return row[0],row[1]
|
||||
return row[0],row[1],row[2],row[3]
|
||||
|
||||
|
||||
def upsert( self, cur, tablename, pkname, pkval, data):
|
||||
|
@ -250,38 +250,47 @@ class ComicVineTalker(QObject):
|
||||
|
||||
return newstring
|
||||
|
||||
|
||||
def fetchIssueDate( self, issue_id ):
|
||||
image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id )
|
||||
return month, year
|
||||
|
||||
def fetchIssueCoverURLs( self, issue_id ):
|
||||
image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id )
|
||||
return image_url, thumb_url
|
||||
|
||||
def fetchIssueSelectDetails( self, issue_id ):
|
||||
|
||||
cached_image_url,cached_thumb_url = self.fetchCachedIssueCoverURLs( issue_id )
|
||||
cached_image_url,cached_thumb_url,cached_month,cached_year = self.fetchCachedIssueSelectDetails( issue_id )
|
||||
if cached_image_url is not None:
|
||||
return cached_image_url,cached_thumb_url
|
||||
return cached_image_url,cached_thumb_url, cached_month, cached_year
|
||||
|
||||
issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image"
|
||||
issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,publish_month,publish_year"
|
||||
resp = urllib2.urlopen(issue_url)
|
||||
content = resp.read()
|
||||
cv_response = json.loads(content)
|
||||
if cv_response[ 'status_code' ] != 1:
|
||||
print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] ))
|
||||
return None, None
|
||||
return None, None,None,None
|
||||
|
||||
image_url = cv_response['results']['image']['super_url']
|
||||
thumb_url = cv_response['results']['image']['thumb_url']
|
||||
year = cv_response['results']['publish_year']
|
||||
month = cv_response['results']['publish_month']
|
||||
|
||||
if image_url is not None:
|
||||
self.cacheIssueCoverURLs( issue_id, image_url,thumb_url )
|
||||
return image_url,thumb_url
|
||||
self.cacheIssueSelectDetails( issue_id, image_url,thumb_url, month, year )
|
||||
return image_url,thumb_url,month,year
|
||||
|
||||
def fetchCachedIssueCoverURLs( self, issue_id ):
|
||||
def fetchCachedIssueSelectDetails( self, issue_id ):
|
||||
|
||||
# before we search online, look in our cache, since we might already
|
||||
# have this info
|
||||
cvc = ComicVineCacher( ComicTaggerSettings.getSettingsFolder() )
|
||||
return cvc.get_issue_image_url( issue_id )
|
||||
return cvc.get_issue_select_details( issue_id )
|
||||
|
||||
def cacheIssueCoverURLs( self, issue_id, image_url,thumb_url ):
|
||||
def cacheIssueSelectDetails( self, issue_id, image_url, thumb_url, month, year ):
|
||||
cvc = ComicVineCacher( ComicTaggerSettings.getSettingsFolder() )
|
||||
cvc.add_issue_image_url( issue_id, image_url, thumb_url )
|
||||
cvc.add_issue_select_details( issue_id, image_url, thumb_url, month, year )
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
@ -290,12 +299,12 @@ class ComicVineTalker(QObject):
|
||||
def asyncFetchIssueCoverURLs( self, issue_id ):
|
||||
|
||||
self.issue_id = issue_id
|
||||
cached_image_url,cached_thumb_url = self.fetchCachedIssueCoverURLs( issue_id )
|
||||
cached_image_url,cached_thumb_url,month,year = self.fetchCachedIssueSelectDetails( issue_id )
|
||||
if cached_image_url is not None:
|
||||
self.urlFetchComplete.emit( cached_image_url,cached_thumb_url, self.issue_id )
|
||||
return
|
||||
|
||||
issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image"
|
||||
issue_url = "http://api.comicvine.com/issue/" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,publish_month,publish_year"
|
||||
self.nam = QNetworkAccessManager()
|
||||
self.nam.finished.connect( self.asyncFetchIssueCoverURLComplete )
|
||||
self.nam.get(QNetworkRequest(QUrl(issue_url)))
|
||||
@ -311,8 +320,10 @@ class ComicVineTalker(QObject):
|
||||
|
||||
image_url = cv_response['results']['image']['super_url']
|
||||
thumb_url = cv_response['results']['image']['thumb_url']
|
||||
year = cv_response['results']['publish_year']
|
||||
month = cv_response['results']['publish_month']
|
||||
|
||||
self.cacheIssueCoverURLs( self.issue_id, image_url, thumb_url )
|
||||
self.cacheIssueSelectDetails( self.issue_id, image_url, thumb_url, month, year )
|
||||
|
||||
self.urlFetchComplete.emit( image_url, thumb_url, self.issue_id )
|
||||
|
||||
|
@ -43,14 +43,26 @@ class IssueIdentifier:
|
||||
def __init__(self, comic_archive, cv_api_key ):
|
||||
self.comic_archive = comic_archive
|
||||
self.image_hasher = 1
|
||||
self.additional_metadata = None
|
||||
self.min_score_thresh = 22
|
||||
|
||||
self.onlyUseAdditionalMetaData = False
|
||||
|
||||
# a decent hamming score, good enough to call it a match
|
||||
self.min_score_thresh = 20
|
||||
|
||||
# the min distance a hamming score must be to separate itself from closest neighbor
|
||||
self.min_score_distance = 2
|
||||
|
||||
# a very strong hamming score, almost certainly the same image
|
||||
self.strong_score_thresh = 8
|
||||
|
||||
# used to eliminate series names that are too long based on our search string
|
||||
self.length_delta_thresh = 3
|
||||
|
||||
self.additional_metadata = GenericMetadata()
|
||||
self.cv_api_key = cv_api_key
|
||||
self.output_function = IssueIdentifier.defaultWriteOutput
|
||||
self.callback = None
|
||||
self.search_result = self.ResultNoMatches
|
||||
|
||||
def setScoreMinThreshold( self, thresh ):
|
||||
self.min_score_thresh = thresh
|
||||
@ -91,7 +103,14 @@ class IssueIdentifier:
|
||||
|
||||
if ca is None:
|
||||
return
|
||||
|
||||
|
||||
if self.onlyUseAdditionalMetaData:
|
||||
search_keys['series'] = self.additional_metadata.series
|
||||
search_keys['issue_number'] = self.additional_metadata.issueNumber
|
||||
search_keys['year'] = self.additional_metadata.publicationYear
|
||||
search_keys['month'] = self.additional_metadata.publicationMonth
|
||||
return search_keys
|
||||
|
||||
# see if the archive has any useful meta data for searching with
|
||||
if ca.hasCIX():
|
||||
internal_metadata = ca.readCIX()
|
||||
@ -128,7 +147,7 @@ class IssueIdentifier:
|
||||
search_keys['year'] = internal_metadata.publicationYear
|
||||
else:
|
||||
search_keys['year'] = md_from_filename.publicationYear
|
||||
|
||||
|
||||
if self.additional_metadata.publicationMonth is not None:
|
||||
search_keys['month'] = self.additional_metadata.publicationMonth
|
||||
elif internal_metadata.publicationMonth is not None:
|
||||
@ -156,7 +175,7 @@ class IssueIdentifier:
|
||||
|
||||
if not ca.seemsToBeAComicArchive():
|
||||
self.log_msg( "Sorry, but "+ opts.filename + " is not a comic archive!")
|
||||
return []
|
||||
return self.ResultNoMatches, []
|
||||
|
||||
cover_image_data = ca.getCoverPage()
|
||||
|
||||
@ -171,7 +190,7 @@ class IssueIdentifier:
|
||||
self.log_msg("Not enough info for a search!")
|
||||
return []
|
||||
|
||||
"""
|
||||
|
||||
self.log_msg( "Going to search for:" )
|
||||
self.log_msg( "Series: " + keys['series'] )
|
||||
self.log_msg( "Issue : " + keys['issue_number'] )
|
||||
@ -179,7 +198,7 @@ class IssueIdentifier:
|
||||
self.log_msg( "Year : " + keys['year'] )
|
||||
if keys['month'] is not None:
|
||||
self.log_msg( "Month : " + keys['month'] )
|
||||
"""
|
||||
|
||||
comicVine = ComicVineTalker( self.cv_api_key )
|
||||
|
||||
#self.log_msg( ( "Searching for " + keys['series'] + "...")
|
||||
@ -195,8 +214,10 @@ class IssueIdentifier:
|
||||
|
||||
#self.log_msg( "Removing results with too long names" )
|
||||
for item in cv_search_results:
|
||||
#assume that our search name is close to the actual name, say within 5 characters
|
||||
if len( utils.removearticles(item['name'])) < len( keys['series'] ) + 5:
|
||||
#assume that our search name is close to the actual name, say within ,e.g. 5 chars
|
||||
shortened_key = utils.removearticles(keys['series'])
|
||||
shortened_item_name = utils.removearticles(item['name'])
|
||||
if len( shortened_item_name ) < ( len( shortened_key ) + self.length_delta_thresh) :
|
||||
series_shortlist.append(item)
|
||||
|
||||
# if we don't think it's an issue number 1, remove any series' that are one-shots
|
||||
@ -241,6 +262,17 @@ class IssueIdentifier:
|
||||
if num_s == keys['issue_number']:
|
||||
# found a matching issue number! now get the issue data
|
||||
img_url, thumb_url = comicVine.fetchIssueCoverURLs( issue['id'] )
|
||||
month, year = comicVine.fetchIssueDate( issue['id'] )
|
||||
|
||||
if self.cancel == True:
|
||||
self.match_list = []
|
||||
return self.match_list
|
||||
|
||||
# now, if we have an issue year key given, reject this one if not a match
|
||||
if keys['year'] is not None:
|
||||
if keys['year'] != year:
|
||||
break
|
||||
|
||||
url_image_data = ImageFetcher().fetch(thumb_url, blocking=True)
|
||||
|
||||
if self.cancel == True:
|
||||
@ -258,6 +290,8 @@ class IssueIdentifier:
|
||||
match['img_url'] = thumb_url
|
||||
match['issue_id'] = issue['id']
|
||||
match['volume_id'] = series['id']
|
||||
match['month'] = month
|
||||
match['year'] = year
|
||||
self.match_list.append(match)
|
||||
|
||||
self.log_msg( " --> {0}".format(match['distance']), newline=False )
|
||||
@ -268,8 +302,10 @@ class IssueIdentifier:
|
||||
|
||||
if len(self.match_list) == 0:
|
||||
self.log_msg( ":-( no matches!" )
|
||||
self.search_result = self.ResultNoMatches
|
||||
return self.match_list
|
||||
|
||||
|
||||
# sort list by image match scores
|
||||
self.match_list.sort(key=lambda k: k['distance'])
|
||||
|
||||
@ -281,20 +317,22 @@ class IssueIdentifier:
|
||||
self.log_msg( str(l))
|
||||
|
||||
def print_match(item):
|
||||
self.log_msg( u"-----> {0} #{1} {2} -- score: {3}".format(
|
||||
self.log_msg( u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format(
|
||||
item['series'],
|
||||
item['issue_number'],
|
||||
item['issue_title'],
|
||||
item['month'],
|
||||
item['year'],
|
||||
item['distance']) )
|
||||
|
||||
best_score = self.match_list[0]['distance']
|
||||
|
||||
if len(self.match_list) == 1:
|
||||
self.search_result = self.ResultOneGoodMatch
|
||||
if best_score > self.min_score_thresh:
|
||||
self.log_msg( "!!!! Very weak score for the cover. Maybe it's not the cover?" )
|
||||
|
||||
|
||||
self.log_msg( "Comparing other pages now..." )
|
||||
self.log_msg( "Comparing other archive pages now..." )
|
||||
found = False
|
||||
for i in range(ca.getNumberOfPages()):
|
||||
image_data = ca.getPage(i)
|
||||
@ -311,12 +349,15 @@ class IssueIdentifier:
|
||||
self.log_msg( "" )
|
||||
if not found:
|
||||
self.log_msg( "No matching pages in the issue. Bummer" )
|
||||
self.search_result = self.ResultFoundMatchButBadCoverScore
|
||||
|
||||
print_match(self.match_list[0])
|
||||
return self.match_list
|
||||
|
||||
elif best_score > self.min_score_thresh and len(self.match_list) > 1:
|
||||
self.log_msg( "No good image matches! Need to use other info..." )
|
||||
self.search_result = self.ResultMultipleMatchesWithBadImageScores
|
||||
|
||||
return self.match_list
|
||||
|
||||
#now pare down list, remove any item more than specified distant from the top scores
|
||||
@ -326,11 +367,15 @@ class IssueIdentifier:
|
||||
|
||||
if len(self.match_list) == 1:
|
||||
print_match(self.match_list[0])
|
||||
self.search_result = self.ResultOneGoodMatch
|
||||
|
||||
elif len(self.match_list) == 0:
|
||||
self.log_msg( "No matches found :(" )
|
||||
self.search_result = self.ResultNoMatches
|
||||
else:
|
||||
print
|
||||
self.log_msg( "More than one likley candiate. Maybe a lexical comparison??" )
|
||||
self.log_msg( "More than one likley candiate." )
|
||||
self.search_result = self.ResultMultipleGoodMatches
|
||||
for item in self.match_list:
|
||||
print_match(item)
|
||||
|
||||
|
@ -848,8 +848,12 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
return
|
||||
|
||||
issue_number = str(self.leIssueNum.text()).strip()
|
||||
|
||||
selector = VolumeSelectionWindow( self, self.settings.cv_api_key, series_name, issue_number, self.comic_archive, self.settings, autoselect )
|
||||
|
||||
year = str(self.lePubYear.text()).strip()
|
||||
if year == "":
|
||||
year = None
|
||||
|
||||
selector = VolumeSelectionWindow( self, self.settings.cv_api_key, series_name, issue_number, year, self.comic_archive, self.settings, autoselect )
|
||||
|
||||
title = "Search: '" + series_name + "' - "
|
||||
selector.setWindowTitle( title + "Select Series")
|
||||
|
11
todo.txt
11
todo.txt
@ -3,12 +3,8 @@
|
||||
Features
|
||||
----------------
|
||||
|
||||
|
||||
Auto-select:
|
||||
msgbox on autoselect failure, or warning
|
||||
Multi-match dialog
|
||||
More auto-select logic using metadata
|
||||
Maybe, if only one match, but bad score, compare each page in the archive to online cover
|
||||
Check aspect ratio, and maybe break cover into two parts for hashing?
|
||||
|
||||
Stand-alone CLI
|
||||
@ -19,7 +15,7 @@ Stand-alone CLI
|
||||
|
||||
|
||||
TaggerWindow entry fields
|
||||
Special tabbed Dialogs needed for:
|
||||
Special tabbed Dialog needed for:
|
||||
Pages Info - maybe a custom painted widget
|
||||
At minimum, preserve the page data
|
||||
|
||||
@ -44,7 +40,7 @@ Disable CBL for RAR
|
||||
SERIOUS BUG: rebuilding zips!
|
||||
http://stackoverflow.com/questions/11578443/trigger-io-errno-18-cross-device-link
|
||||
|
||||
MAC:
|
||||
OSX:
|
||||
toolbar
|
||||
weird unrar complaints
|
||||
Page browser sizing
|
||||
@ -75,8 +71,7 @@ Image Hashes:
|
||||
Filename parsing:
|
||||
Concatenation of Name and Issue??
|
||||
"1602"
|
||||
|
||||
Issue identifier - compare names with aricles removed
|
||||
|
||||
|
||||
App option to covert RAR to ZIP
|
||||
|
||||
|
1
utils.py
1
utils.py
@ -54,6 +54,7 @@ def addtopath( dir ):
|
||||
os.environ['PATH'] = dir + os.pathsep + os.environ['PATH']
|
||||
|
||||
def removearticles( text ):
|
||||
text = text.lower()
|
||||
articles = ['and', 'the', 'a', '&' ]
|
||||
newText = ''
|
||||
for word in text.split(' '):
|
||||
|
@ -77,12 +77,11 @@ class IdentifyThread( QtCore.QThread):
|
||||
def run(self):
|
||||
matches =self.identifier.search()
|
||||
self.identifyComplete.emit( )
|
||||
|
||||
|
||||
|
||||
class VolumeSelectionWindow(QtGui.QDialog):
|
||||
|
||||
def __init__(self, parent, cv_api_key, series_name, issue_number, comic_archive, settings, autoselect=False):
|
||||
def __init__(self, parent, cv_api_key, series_name, issue_number, year, comic_archive, settings, autoselect=False):
|
||||
super(VolumeSelectionWindow, self).__init__(parent)
|
||||
|
||||
uic.loadUi(os.path.join(ComicTaggerSettings.baseDir(), 'volumeselectionwindow.ui' ), self)
|
||||
@ -90,6 +89,7 @@ class VolumeSelectionWindow(QtGui.QDialog):
|
||||
self.settings = settings
|
||||
self.series_name = series_name
|
||||
self.issue_number = issue_number
|
||||
self.year = year
|
||||
self.cv_api_key = cv_api_key
|
||||
self.volume_id = 0
|
||||
self.comic_archive = comic_archive
|
||||
@ -121,7 +121,10 @@ class VolumeSelectionWindow(QtGui.QDialog):
|
||||
md = GenericMetadata()
|
||||
md.series = self.series_name
|
||||
md.issueNumber = self.issue_number
|
||||
md.publicationYear = self.year
|
||||
|
||||
self.ii.setAdditionalMetadata( md )
|
||||
self.ii.onlyUseAdditionalMetaData = True
|
||||
|
||||
self.id_thread = IdentifyThread( self.ii )
|
||||
self.id_thread.identifyComplete.connect( self.identifyComplete )
|
||||
@ -147,7 +150,25 @@ class VolumeSelectionWindow(QtGui.QDialog):
|
||||
def identifyComplete( self ):
|
||||
|
||||
matches = self.ii.match_list
|
||||
if len(matches) == 1:
|
||||
result = self.ii.search_result
|
||||
|
||||
found_match = False
|
||||
if result == self.ii.ResultNoMatches:
|
||||
QtGui.QMessageBox.information(self,"Auto-Select Result", " No matches found :-(")
|
||||
elif result == self.ii.ResultFoundMatchButBadCoverScore:
|
||||
QtGui.QMessageBox.information(self,"Auto-Select Result", " Found a match, but cover doesn't seem to match. Verify before commiting!")
|
||||
found_match = True
|
||||
elif result == self.ii.ResultFoundMatchButNotFirstPage :
|
||||
QtGui.QMessageBox.information(self,"Auto-Select Result", " Found a match, but not with the first page of the archive.")
|
||||
found_match = True
|
||||
elif result == self.ii.ResultMultipleMatchesWithBadImageScores:
|
||||
QtGui.QMessageBox.information(self,"Auto-Select Result", " Found some possibilities, but no confidence. Proceed manually.")
|
||||
elif result == self.ii.ResultOneGoodMatch:
|
||||
found_match = True
|
||||
elif result == self.ii.ResultMultipleGoodMatches:
|
||||
QtGui.QMessageBox.information(self,"Auto-Select Result", " Found multiple likely matches! Selection DIALOG TBD.")
|
||||
|
||||
if found_match:
|
||||
self.iddialog.accept()
|
||||
|
||||
print "VolumeSelectionWindow found a match!!", matches[0]['volume_id'], matches[0]['issue_number']
|
||||
|
Loading…
x
Reference in New Issue
Block a user