more robust dealing with read errors in rar archives
more logging in auto-tag process git-svn-id: http://comictagger.googlecode.com/svn/trunk@349 6c5673fe-1810-88d6-992b-cd32ca31540c
This commit is contained in:
parent
ccde71f9d0
commit
93f316b820
@ -46,7 +46,7 @@ from comicbookinfo import ComicBookInfo
|
||||
from comet import CoMet
|
||||
from genericmetadata import GenericMetadata, PageType
|
||||
from filenameparser import FileNameParser
|
||||
|
||||
from settings import ComicTaggerSettings
|
||||
|
||||
class ZipArchiver:
|
||||
|
||||
@ -69,8 +69,10 @@ class ZipArchiver:
|
||||
data = zf.read( archive_file )
|
||||
except zipfile.BadZipfile:
|
||||
print "bad zipfile: {0} :: {1}".format(self.path, archive_file)
|
||||
raise IOError
|
||||
except Exception:
|
||||
print "bad zipfile: {0} :: {1}".format(self.path, archive_file)
|
||||
raise IOError
|
||||
finally:
|
||||
zf.close()
|
||||
return data
|
||||
@ -200,7 +202,7 @@ class ZipArchiver:
|
||||
try:
|
||||
zout = zipfile.ZipFile (self.path, 'w')
|
||||
for fname in otherArchive.getArchiveFilenameList():
|
||||
data = otherArchive.readArchiveFile( fname )
|
||||
data = otherArchive.readArchiveFile( fname )
|
||||
if data is not None:
|
||||
zout.writestr( fname, data )
|
||||
zout.close()
|
||||
@ -288,23 +290,30 @@ class RarArchiver:
|
||||
try:
|
||||
tries = tries+1
|
||||
entries = rarc.read_files( archive_file )
|
||||
|
||||
|
||||
if entries[0][0].size != len(entries[0][1]):
|
||||
print "readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format(
|
||||
entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries)
|
||||
continue
|
||||
|
||||
except (OSError, IOError) as e:
|
||||
print e, "in readArchiveFile! try %s" % tries
|
||||
print "readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
|
||||
time.sleep(1)
|
||||
except Exception as e:
|
||||
print "Unexpected exception in readArchiveFile! {0}".format( e )
|
||||
print "Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
|
||||
break
|
||||
|
||||
else:
|
||||
#Success"
|
||||
#entries is a list of of tuples: ( rarinfo, filedata)
|
||||
if tries > 1:
|
||||
print "Attempted read_files() {0} times".format(tries)
|
||||
if (len(entries) == 1):
|
||||
return entries[0][1]
|
||||
else:
|
||||
return None
|
||||
raise IOError
|
||||
|
||||
return None
|
||||
raise IOError
|
||||
|
||||
|
||||
|
||||
@ -370,7 +379,7 @@ class RarArchiver:
|
||||
namelist = [ item.filename for item in rarc.infolist() ]
|
||||
|
||||
except (OSError, IOError) as e:
|
||||
print e, "in getArchiveFilenameList! try %s" % tries
|
||||
print "getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
|
||||
time.sleep(1)
|
||||
|
||||
else:
|
||||
@ -388,7 +397,7 @@ class RarArchiver:
|
||||
rarc = UnRAR2.RarFile( self.path )
|
||||
|
||||
except (OSError, IOError) as e:
|
||||
print e, "in getRARObj! try %s" % tries
|
||||
print "getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
|
||||
time.sleep(1)
|
||||
|
||||
else:
|
||||
@ -471,7 +480,7 @@ class UnknownArchiver:
|
||||
return ""
|
||||
def setArchiveComment( self, comment ):
|
||||
return False
|
||||
def readArchiveFilen( self ):
|
||||
def readArchiveFile( self ):
|
||||
return ""
|
||||
def writeArchiveFile( self, archive_file, data ):
|
||||
return False
|
||||
@ -637,7 +646,13 @@ class ComicArchive:
|
||||
filename = self.getPageName( index )
|
||||
|
||||
if filename is not None:
|
||||
image_data = self.archiver.readArchiveFile( filename )
|
||||
try:
|
||||
image_data = self.archiver.readArchiveFile( filename )
|
||||
except IOError:
|
||||
print "Error reading in page. Substituting logo page."
|
||||
fname = os.path.join(ComicTaggerSettings.baseDir(), 'graphics/nocover.png' )
|
||||
with open(fname) as x:
|
||||
image_data = x.read()
|
||||
|
||||
return image_data
|
||||
|
||||
@ -733,7 +748,7 @@ class ComicArchive:
|
||||
def readCIX( self ):
|
||||
if self.cix_md is None:
|
||||
raw_cix = self.readRawCIX()
|
||||
if raw_cix is None:
|
||||
if raw_cix is None or raw_cix == "":
|
||||
self.cix_md = GenericMetadata()
|
||||
else:
|
||||
self.cix_md = ComicInfoXml().metadataFromString( raw_cix )
|
||||
@ -753,8 +768,12 @@ class ComicArchive:
|
||||
def readRawCIX( self ):
|
||||
if not self.hasCIX():
|
||||
return None
|
||||
|
||||
return self.archiver.readArchiveFile( self.ci_xml_filename )
|
||||
try:
|
||||
raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename )
|
||||
except IOError:
|
||||
print "Error reading in raw CIX!"
|
||||
raw_cix = ""
|
||||
return raw_cix
|
||||
|
||||
def writeCIX(self, metadata):
|
||||
|
||||
@ -798,7 +817,7 @@ class ComicArchive:
|
||||
def readCoMet( self ):
|
||||
if self.comet_md is None:
|
||||
raw_comet = self.readRawCoMet()
|
||||
if raw_comet is None:
|
||||
if raw_comet is None or raw_comet == "":
|
||||
self.comet_md = GenericMetadata()
|
||||
else:
|
||||
self.comet_md = CoMet().metadataFromString( raw_comet )
|
||||
@ -824,7 +843,12 @@ class ComicArchive:
|
||||
print self.path, "doesn't have CoMet data!"
|
||||
return None
|
||||
|
||||
return self.archiver.readArchiveFile( self.comet_filename )
|
||||
try:
|
||||
raw_comet = self.archiver.readArchiveFile( self.comet_filename )
|
||||
except IOError:
|
||||
print "Error reading in raw CoMet!"
|
||||
raw_comet = ""
|
||||
return raw_comet
|
||||
|
||||
def writeCoMet(self, metadata):
|
||||
|
||||
@ -871,7 +895,11 @@ class ComicArchive:
|
||||
if ( os.path.dirname(n) == "" and
|
||||
os.path.splitext(n)[1].lower() == '.xml'):
|
||||
# read in XML file, and validate it
|
||||
data = self.archiver.readArchiveFile( n )
|
||||
try:
|
||||
data = self.archiver.readArchiveFile( n )
|
||||
except:
|
||||
data = ""
|
||||
print "Error reading in Comet XML for validation!"
|
||||
if CoMet().validateString( data ):
|
||||
# since we found it, save it!
|
||||
self.comet_filename = n
|
||||
|
@ -26,6 +26,7 @@ import math
|
||||
import re
|
||||
import datetime
|
||||
import ctversion
|
||||
import sys
|
||||
|
||||
try:
|
||||
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
|
||||
@ -59,6 +60,17 @@ class ComicVineTalker(QObject):
|
||||
# key that is registered to comictagger
|
||||
self.api_key = '27431e6787042105bd3e47e169a624521f89f3a4'
|
||||
|
||||
self.log_func = None
|
||||
|
||||
def setLogFunc( self , log_func ):
|
||||
self.log_func = log_func
|
||||
|
||||
def writeLog( self , text ):
|
||||
if self.log_func is None:
|
||||
sys.stdout.write(text.encode( errors='replace') )
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
self.log_func( text )
|
||||
|
||||
def testKey( self ):
|
||||
|
||||
@ -76,7 +88,7 @@ class ComicVineTalker(QObject):
|
||||
resp = urllib2.urlopen( url )
|
||||
return resp.read()
|
||||
except Exception as e:
|
||||
print e
|
||||
self.writeLog( str(e) )
|
||||
raise ComicVineTalkerException("Network Error!")
|
||||
|
||||
def searchForSeries( self, series_name , callback=None, refresh_cache=False ):
|
||||
@ -104,7 +116,7 @@ class ComicVineTalker(QObject):
|
||||
cv_response = json.loads(content)
|
||||
|
||||
if cv_response[ 'status_code' ] != 1:
|
||||
print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] ))
|
||||
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
||||
return None
|
||||
|
||||
search_results = list()
|
||||
@ -116,7 +128,7 @@ class ComicVineTalker(QObject):
|
||||
total_result_count = cv_response['number_of_total_results']
|
||||
|
||||
if callback is None:
|
||||
print ("Found {0} of {1} results".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
|
||||
self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
|
||||
search_results.extend( cv_response['results'])
|
||||
offset = 0
|
||||
|
||||
@ -126,14 +138,14 @@ class ComicVineTalker(QObject):
|
||||
# see if we need to keep asking for more pages...
|
||||
while ( current_result_count < total_result_count ):
|
||||
if callback is None:
|
||||
print ("getting another page of results {0} of {1}...".format( current_result_count, total_result_count))
|
||||
self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count))
|
||||
offset += limit
|
||||
content = self.getUrlContent(search_url + "&offset="+str(offset))
|
||||
|
||||
cv_response = json.loads(content)
|
||||
|
||||
if cv_response[ 'status_code' ] != 1:
|
||||
print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] ))
|
||||
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
||||
return None
|
||||
search_results.extend( cv_response['results'])
|
||||
current_result_count += cv_response['number_of_page_results']
|
||||
|
@ -119,8 +119,14 @@ class IssueIdentifier:
|
||||
|
||||
im = Image.open(StringIO.StringIO(image_data))
|
||||
w,h = im.size
|
||||
|
||||
cropped_im = im.crop( (int(w/2), 0, w, h) )
|
||||
|
||||
try:
|
||||
cropped_im = im.crop( (int(w/2), 0, w, h) )
|
||||
except Exception as e:
|
||||
sys.exc_clear()
|
||||
print "cropCover() error:", e
|
||||
return None
|
||||
|
||||
output = StringIO.StringIO()
|
||||
cropped_im.save(output, format="JPEG")
|
||||
cropped_image_data = output.getvalue()
|
||||
@ -202,7 +208,7 @@ class IssueIdentifier:
|
||||
|
||||
@staticmethod
|
||||
def defaultWriteOutput( text ):
|
||||
sys.stdout.write(text.encode( errors='replace') )
|
||||
sys.stdout.write( text )
|
||||
sys.stdout.flush()
|
||||
|
||||
def log_msg( self, msg , newline=True ):
|
||||
@ -235,9 +241,10 @@ class IssueIdentifier:
|
||||
aspect_ratio = self.getAspectRatio( cover_image_data )
|
||||
if aspect_ratio < 1.0:
|
||||
right_side_image_data = self.cropCover( cover_image_data )
|
||||
narrow_cover_hash = self.calculateHash( right_side_image_data )
|
||||
print "narrow_cover_hash", narrow_cover_hash
|
||||
|
||||
if right_side_image_data is not None:
|
||||
narrow_cover_hash = self.calculateHash( right_side_image_data )
|
||||
self.log_msg(unicode(str(narrow_cover_hash)))
|
||||
|
||||
#self.log_msg( "Cover hash = {0:016x}".format(cover_hash) )
|
||||
|
||||
keys = self.getSearchKeys()
|
||||
@ -259,6 +266,7 @@ class IssueIdentifier:
|
||||
#self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist))
|
||||
|
||||
comicVine = ComicVineTalker( )
|
||||
comicVine.setLogFunc( self.output_function )
|
||||
|
||||
#self.log_msg( ( "Searching for " + keys['series'] + "...")
|
||||
self.log_msg( u"Searching for {0} #{1} ...".format( keys['series'], keys['issue_number']) )
|
||||
@ -433,16 +441,16 @@ class IssueIdentifier:
|
||||
page_hash = self.calculateHash( image_data )
|
||||
distance = ImageHasher.hamming_distance(page_hash, self.match_list[0]['url_image_hash'])
|
||||
if distance <= self.strong_score_thresh:
|
||||
self.log_msg( "Found a great match (distance = {0}) on page {1}!".format(distance, i+1) )
|
||||
self.log_msg( "Found a great match (score = {0}) on page {1}!".format(distance, i+1) )
|
||||
found = True
|
||||
break
|
||||
elif distance < self.min_score_thresh:
|
||||
self.log_msg( "Found a good match (distance = {0}) on page {1}".format(distance, i) )
|
||||
self.log_msg( "Found a good match (score = {0}) on page {1}".format(distance, i) )
|
||||
found = True
|
||||
self.log_msg( ".", newline=False )
|
||||
self.log_msg( "" )
|
||||
if not found:
|
||||
self.log_msg( "No matching pages in the issue. Bummer" )
|
||||
self.log_msg( "No matching pages in the issue." )
|
||||
self.search_result = self.ResultFoundMatchButBadCoverScore
|
||||
|
||||
self.log_msg( u"--------------------------------------------------")
|
||||
@ -451,7 +459,9 @@ class IssueIdentifier:
|
||||
return self.match_list
|
||||
|
||||
elif best_score > self.min_score_thresh and len(self.match_list) > 1:
|
||||
self.log_msg( "No good image matches! Need to use other info..." )
|
||||
self.log_msg( u"--------------------------------------------------")
|
||||
self.log_msg( u"Multiple bad cover matches! Need to use other info..." )
|
||||
self.log_msg( u"--------------------------------------------------")
|
||||
self.search_result = self.ResultMultipleMatchesWithBadImageScores
|
||||
|
||||
return self.match_list
|
||||
@ -468,7 +478,9 @@ class IssueIdentifier:
|
||||
self.search_result = self.ResultOneGoodMatch
|
||||
|
||||
elif len(self.match_list) == 0:
|
||||
self.log_msg( u"--------------------------------------------------")
|
||||
self.log_msg( "No matches found :(" )
|
||||
self.log_msg( u"--------------------------------------------------")
|
||||
self.search_result = self.ResultNoMatches
|
||||
else:
|
||||
print
|
||||
|
@ -63,7 +63,7 @@ class ComicTaggerSettings:
|
||||
|
||||
# identifier settings
|
||||
self.id_length_delta_thresh = 5
|
||||
self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services"
|
||||
self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services, Editorial Televisa"
|
||||
|
||||
# Show/ask dialog flags
|
||||
self.ask_about_cbi_in_rar = True
|
||||
|
@ -1499,7 +1499,15 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
|
||||
return cv_md
|
||||
|
||||
|
||||
def autoTagLog( self, text ):
|
||||
IssueIdentifier.defaultWriteOutput( text )
|
||||
if self.atprogdialog is not None:
|
||||
self.atprogdialog.textEdit.insertPlainText(text)
|
||||
self.atprogdialog.textEdit.ensureCursorVisible()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
|
||||
def identifyAndTagSingleArchive( self, ca, match_results, dlg):
|
||||
success = False
|
||||
ii = IssueIdentifier( ca, self.settings )
|
||||
@ -1513,13 +1521,7 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
print "!!!!No metadata given to search online with!"
|
||||
return False, match_results
|
||||
|
||||
def myoutput( text ):
|
||||
IssueIdentifier.defaultWriteOutput( text )
|
||||
self.atprogdialog.textEdit.insertPlainText(text)
|
||||
self.atprogdialog.textEdit.ensureCursorVisible()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
|
||||
|
||||
if dlg.dontUseYear:
|
||||
md.year = None
|
||||
@ -1527,7 +1529,7 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
md.issue = "1"
|
||||
ii.setAdditionalMetadata( md )
|
||||
ii.onlyUseAdditionalMetaData = True
|
||||
ii.setOutputFunction( myoutput )
|
||||
ii.setOutputFunction( self.autoTagLog )
|
||||
ii.cover_page_index = md.getCoverPageIndexList()[0]
|
||||
ii.setCoverURLCallback( self.atprogdialog.setTestImage )
|
||||
|
||||
@ -1555,13 +1557,16 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
choices = True
|
||||
|
||||
if choices:
|
||||
print "Online search: Multiple matches. Save aborted"
|
||||
self.autoTagLog( "Online search: Multiple matches. Save aborted\n" )
|
||||
match_results.multipleMatches.append(MultipleMatch(ca,matches))
|
||||
elif low_confidence and not dlg.autoSaveOnLow:
|
||||
print "Online search: Low confidence match. Save aborted"
|
||||
match_results.noMatches.append(ca.path)
|
||||
elif low_confidence:
|
||||
if dlg.autoSaveOnLow:
|
||||
self.autoTagLog( "Online search: Low confidence match, but saving anyways...\n" )
|
||||
else:
|
||||
self.autoTagLog( "Online search: Low confidence match. Save aborted\n" )
|
||||
match_results.noMatches.append(ca.path)
|
||||
elif not found_match:
|
||||
print "Online search: No match found. Save aborted"
|
||||
self.autoTagLog( "Online search: No match found. Save aborted\n" )
|
||||
match_results.noMatches.append(ca.path)
|
||||
else:
|
||||
|
||||
@ -1609,11 +1614,17 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
self.atprogdialog.show()
|
||||
self.atprogdialog.progressBar.setMaximum( len(ca_list) )
|
||||
self.atprogdialog.setWindowTitle( "Auto-Tagging" )
|
||||
|
||||
|
||||
self.autoTagLog( u"========================================================================\n" )
|
||||
self.autoTagLog( u"Auto-Tagging Started for {0} items\n".format(len(ca_list)))
|
||||
|
||||
prog_idx = 0
|
||||
|
||||
match_results = OnlineMatchResults()
|
||||
for ca in ca_list:
|
||||
self.autoTagLog( u"============================================================\n" )
|
||||
self.autoTagLog( u"Auto-Tagging {0} of {1}\n".format(prog_idx+1, len(ca_list)))
|
||||
self.autoTagLog( u"{0}\n".format(ca.path) )
|
||||
cover_idx = ca.readMetadata(style).getCoverPageIndexList()[0]
|
||||
image_data = ca.getPage( cover_idx )
|
||||
self.atprogdialog.setArchiveImage( image_data )
|
||||
@ -1635,23 +1646,25 @@ class TaggerWindow( QtGui.QMainWindow):
|
||||
self.loadArchive( self.fileSelectionList.getCurrentArchive() )
|
||||
self.atprogdialog = None
|
||||
|
||||
summary = ""
|
||||
summary += "Successfully tagged archives: {0}\n".format( len(match_results.goodMatches))
|
||||
|
||||
if len ( match_results.multipleMatches ) > 0:
|
||||
summary += "Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches))
|
||||
if len ( match_results.noMatches ) > 0:
|
||||
summary += "Archives with no matches: {0}\n".format( len(match_results.noMatches))
|
||||
if len ( match_results.fetchDataFailures ) > 0:
|
||||
summary += "Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures))
|
||||
if len ( match_results.writeFailures ) > 0:
|
||||
summary += "Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures))
|
||||
summary = u""
|
||||
summary += u"Successfully tagged archives: {0}\n".format( len(match_results.goodMatches))
|
||||
|
||||
if len ( match_results.multipleMatches ) > 0:
|
||||
summary += "\n\nDo you want to manually select the ones with multiple matches now?"
|
||||
summary += u"Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches))
|
||||
if len ( match_results.noMatches ) > 0:
|
||||
summary += u"Archives with no matches: {0}\n".format( len(match_results.noMatches))
|
||||
if len ( match_results.fetchDataFailures ) > 0:
|
||||
summary += u"Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures))
|
||||
if len ( match_results.writeFailures ) > 0:
|
||||
summary += u"Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures))
|
||||
|
||||
self.autoTagLog( summary )
|
||||
|
||||
if len ( match_results.multipleMatches ) > 0:
|
||||
summary += u"\n\nDo you want to manually select the ones with multiple matches now?"
|
||||
|
||||
reply = QtGui.QMessageBox.question(self,
|
||||
self.tr("Auto-Tag Summary"),
|
||||
self.tr(u"Auto-Tag Summary"),
|
||||
self.tr(summary),
|
||||
QtGui.QMessageBox.Yes, QtGui.QMessageBox.No )
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user