more robust dealing with read errors in rar archives

more logging in auto-tag process

git-svn-id: http://comictagger.googlecode.com/svn/trunk@349 6c5673fe-1810-88d6-992b-cd32ca31540c
This commit is contained in:
beville 2013-01-25 06:17:45 +00:00
parent ccde71f9d0
commit 93f316b820
5 changed files with 126 additions and 61 deletions

View File

@ -46,7 +46,7 @@ from comicbookinfo import ComicBookInfo
from comet import CoMet
from genericmetadata import GenericMetadata, PageType
from filenameparser import FileNameParser
from settings import ComicTaggerSettings
class ZipArchiver:
@ -69,8 +69,10 @@ class ZipArchiver:
data = zf.read( archive_file )
except zipfile.BadZipfile:
print "bad zipfile: {0} :: {1}".format(self.path, archive_file)
raise IOError
except Exception:
print "bad zipfile: {0} :: {1}".format(self.path, archive_file)
raise IOError
finally:
zf.close()
return data
@ -200,7 +202,7 @@ class ZipArchiver:
try:
zout = zipfile.ZipFile (self.path, 'w')
for fname in otherArchive.getArchiveFilenameList():
data = otherArchive.readArchiveFile( fname )
data = otherArchive.readArchiveFile( fname )
if data is not None:
zout.writestr( fname, data )
zout.close()
@ -288,23 +290,30 @@ class RarArchiver:
try:
tries = tries+1
entries = rarc.read_files( archive_file )
if entries[0][0].size != len(entries[0][1]):
print "readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format(
entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries)
continue
except (OSError, IOError) as e:
print e, "in readArchiveFile! try %s" % tries
print "readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
time.sleep(1)
except Exception as e:
print "Unexpected exception in readArchiveFile! {0}".format( e )
print "Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
break
else:
#Success"
#entries is a list of of tuples: ( rarinfo, filedata)
if tries > 1:
print "Attempted read_files() {0} times".format(tries)
if (len(entries) == 1):
return entries[0][1]
else:
return None
raise IOError
return None
raise IOError
@ -370,7 +379,7 @@ class RarArchiver:
namelist = [ item.filename for item in rarc.infolist() ]
except (OSError, IOError) as e:
print e, "in getArchiveFilenameList! try %s" % tries
print "getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
time.sleep(1)
else:
@ -388,7 +397,7 @@ class RarArchiver:
rarc = UnRAR2.RarFile( self.path )
except (OSError, IOError) as e:
print e, "in getRARObj! try %s" % tries
print "getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
time.sleep(1)
else:
@ -471,7 +480,7 @@ class UnknownArchiver:
return ""
def setArchiveComment( self, comment ):
return False
def readArchiveFilen( self ):
def readArchiveFile( self ):
return ""
def writeArchiveFile( self, archive_file, data ):
return False
@ -637,7 +646,13 @@ class ComicArchive:
filename = self.getPageName( index )
if filename is not None:
image_data = self.archiver.readArchiveFile( filename )
try:
image_data = self.archiver.readArchiveFile( filename )
except IOError:
print "Error reading in page. Substituting logo page."
fname = os.path.join(ComicTaggerSettings.baseDir(), 'graphics/nocover.png' )
with open(fname) as x:
image_data = x.read()
return image_data
@ -733,7 +748,7 @@ class ComicArchive:
def readCIX( self ):
if self.cix_md is None:
raw_cix = self.readRawCIX()
if raw_cix is None:
if raw_cix is None or raw_cix == "":
self.cix_md = GenericMetadata()
else:
self.cix_md = ComicInfoXml().metadataFromString( raw_cix )
@ -753,8 +768,12 @@ class ComicArchive:
def readRawCIX( self ):
if not self.hasCIX():
return None
return self.archiver.readArchiveFile( self.ci_xml_filename )
try:
raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename )
except IOError:
print "Error reading in raw CIX!"
raw_cix = ""
return raw_cix
def writeCIX(self, metadata):
@ -798,7 +817,7 @@ class ComicArchive:
def readCoMet( self ):
if self.comet_md is None:
raw_comet = self.readRawCoMet()
if raw_comet is None:
if raw_comet is None or raw_comet == "":
self.comet_md = GenericMetadata()
else:
self.comet_md = CoMet().metadataFromString( raw_comet )
@ -824,7 +843,12 @@ class ComicArchive:
print self.path, "doesn't have CoMet data!"
return None
return self.archiver.readArchiveFile( self.comet_filename )
try:
raw_comet = self.archiver.readArchiveFile( self.comet_filename )
except IOError:
print "Error reading in raw CoMet!"
raw_comet = ""
return raw_comet
def writeCoMet(self, metadata):
@ -871,7 +895,11 @@ class ComicArchive:
if ( os.path.dirname(n) == "" and
os.path.splitext(n)[1].lower() == '.xml'):
# read in XML file, and validate it
data = self.archiver.readArchiveFile( n )
try:
data = self.archiver.readArchiveFile( n )
except:
data = ""
print "Error reading in Comet XML for validation!"
if CoMet().validateString( data ):
# since we found it, save it!
self.comet_filename = n

View File

@ -26,6 +26,7 @@ import math
import re
import datetime
import ctversion
import sys
try:
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
@ -59,6 +60,17 @@ class ComicVineTalker(QObject):
# key that is registered to comictagger
self.api_key = '27431e6787042105bd3e47e169a624521f89f3a4'
self.log_func = None
def setLogFunc( self , log_func ):
self.log_func = log_func
def writeLog( self , text ):
if self.log_func is None:
sys.stdout.write(text.encode( errors='replace') )
sys.stdout.flush()
else:
self.log_func( text )
def testKey( self ):
@ -76,7 +88,7 @@ class ComicVineTalker(QObject):
resp = urllib2.urlopen( url )
return resp.read()
except Exception as e:
print e
self.writeLog( str(e) )
raise ComicVineTalkerException("Network Error!")
def searchForSeries( self, series_name , callback=None, refresh_cache=False ):
@ -104,7 +116,7 @@ class ComicVineTalker(QObject):
cv_response = json.loads(content)
if cv_response[ 'status_code' ] != 1:
print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] ))
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
return None
search_results = list()
@ -116,7 +128,7 @@ class ComicVineTalker(QObject):
total_result_count = cv_response['number_of_total_results']
if callback is None:
print ("Found {0} of {1} results".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
search_results.extend( cv_response['results'])
offset = 0
@ -126,14 +138,14 @@ class ComicVineTalker(QObject):
# see if we need to keep asking for more pages...
while ( current_result_count < total_result_count ):
if callback is None:
print ("getting another page of results {0} of {1}...".format( current_result_count, total_result_count))
self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count))
offset += limit
content = self.getUrlContent(search_url + "&offset="+str(offset))
cv_response = json.loads(content)
if cv_response[ 'status_code' ] != 1:
print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] ))
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
return None
search_results.extend( cv_response['results'])
current_result_count += cv_response['number_of_page_results']

View File

@ -119,8 +119,14 @@ class IssueIdentifier:
im = Image.open(StringIO.StringIO(image_data))
w,h = im.size
cropped_im = im.crop( (int(w/2), 0, w, h) )
try:
cropped_im = im.crop( (int(w/2), 0, w, h) )
except Exception as e:
sys.exc_clear()
print "cropCover() error:", e
return None
output = StringIO.StringIO()
cropped_im.save(output, format="JPEG")
cropped_image_data = output.getvalue()
@ -202,7 +208,7 @@ class IssueIdentifier:
@staticmethod
def defaultWriteOutput( text ):
sys.stdout.write(text.encode( errors='replace') )
sys.stdout.write( text )
sys.stdout.flush()
def log_msg( self, msg , newline=True ):
@ -235,9 +241,10 @@ class IssueIdentifier:
aspect_ratio = self.getAspectRatio( cover_image_data )
if aspect_ratio < 1.0:
right_side_image_data = self.cropCover( cover_image_data )
narrow_cover_hash = self.calculateHash( right_side_image_data )
print "narrow_cover_hash", narrow_cover_hash
if right_side_image_data is not None:
narrow_cover_hash = self.calculateHash( right_side_image_data )
self.log_msg(unicode(str(narrow_cover_hash)))
#self.log_msg( "Cover hash = {0:016x}".format(cover_hash) )
keys = self.getSearchKeys()
@ -259,6 +266,7 @@ class IssueIdentifier:
#self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist))
comicVine = ComicVineTalker( )
comicVine.setLogFunc( self.output_function )
#self.log_msg( ( "Searching for " + keys['series'] + "...")
self.log_msg( u"Searching for {0} #{1} ...".format( keys['series'], keys['issue_number']) )
@ -433,16 +441,16 @@ class IssueIdentifier:
page_hash = self.calculateHash( image_data )
distance = ImageHasher.hamming_distance(page_hash, self.match_list[0]['url_image_hash'])
if distance <= self.strong_score_thresh:
self.log_msg( "Found a great match (distance = {0}) on page {1}!".format(distance, i+1) )
self.log_msg( "Found a great match (score = {0}) on page {1}!".format(distance, i+1) )
found = True
break
elif distance < self.min_score_thresh:
self.log_msg( "Found a good match (distance = {0}) on page {1}".format(distance, i) )
self.log_msg( "Found a good match (score = {0}) on page {1}".format(distance, i) )
found = True
self.log_msg( ".", newline=False )
self.log_msg( "" )
if not found:
self.log_msg( "No matching pages in the issue. Bummer" )
self.log_msg( "No matching pages in the issue." )
self.search_result = self.ResultFoundMatchButBadCoverScore
self.log_msg( u"--------------------------------------------------")
@ -451,7 +459,9 @@ class IssueIdentifier:
return self.match_list
elif best_score > self.min_score_thresh and len(self.match_list) > 1:
self.log_msg( "No good image matches! Need to use other info..." )
self.log_msg( u"--------------------------------------------------")
self.log_msg( u"Multiple bad cover matches! Need to use other info..." )
self.log_msg( u"--------------------------------------------------")
self.search_result = self.ResultMultipleMatchesWithBadImageScores
return self.match_list
@ -468,7 +478,9 @@ class IssueIdentifier:
self.search_result = self.ResultOneGoodMatch
elif len(self.match_list) == 0:
self.log_msg( u"--------------------------------------------------")
self.log_msg( "No matches found :(" )
self.log_msg( u"--------------------------------------------------")
self.search_result = self.ResultNoMatches
else:
print

View File

@ -63,7 +63,7 @@ class ComicTaggerSettings:
# identifier settings
self.id_length_delta_thresh = 5
self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services"
self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services, Editorial Televisa"
# Show/ask dialog flags
self.ask_about_cbi_in_rar = True

View File

@ -1499,7 +1499,15 @@ class TaggerWindow( QtGui.QMainWindow):
return cv_md
def autoTagLog( self, text ):
IssueIdentifier.defaultWriteOutput( text )
if self.atprogdialog is not None:
self.atprogdialog.textEdit.insertPlainText(text)
self.atprogdialog.textEdit.ensureCursorVisible()
QtCore.QCoreApplication.processEvents()
QtCore.QCoreApplication.processEvents()
QtCore.QCoreApplication.processEvents()
def identifyAndTagSingleArchive( self, ca, match_results, dlg):
success = False
ii = IssueIdentifier( ca, self.settings )
@ -1513,13 +1521,7 @@ class TaggerWindow( QtGui.QMainWindow):
print "!!!!No metadata given to search online with!"
return False, match_results
def myoutput( text ):
IssueIdentifier.defaultWriteOutput( text )
self.atprogdialog.textEdit.insertPlainText(text)
self.atprogdialog.textEdit.ensureCursorVisible()
QtCore.QCoreApplication.processEvents()
QtCore.QCoreApplication.processEvents()
QtCore.QCoreApplication.processEvents()
if dlg.dontUseYear:
md.year = None
@ -1527,7 +1529,7 @@ class TaggerWindow( QtGui.QMainWindow):
md.issue = "1"
ii.setAdditionalMetadata( md )
ii.onlyUseAdditionalMetaData = True
ii.setOutputFunction( myoutput )
ii.setOutputFunction( self.autoTagLog )
ii.cover_page_index = md.getCoverPageIndexList()[0]
ii.setCoverURLCallback( self.atprogdialog.setTestImage )
@ -1555,13 +1557,16 @@ class TaggerWindow( QtGui.QMainWindow):
choices = True
if choices:
print "Online search: Multiple matches. Save aborted"
self.autoTagLog( "Online search: Multiple matches. Save aborted\n" )
match_results.multipleMatches.append(MultipleMatch(ca,matches))
elif low_confidence and not dlg.autoSaveOnLow:
print "Online search: Low confidence match. Save aborted"
match_results.noMatches.append(ca.path)
elif low_confidence:
if dlg.autoSaveOnLow:
self.autoTagLog( "Online search: Low confidence match, but saving anyways...\n" )
else:
self.autoTagLog( "Online search: Low confidence match. Save aborted\n" )
match_results.noMatches.append(ca.path)
elif not found_match:
print "Online search: No match found. Save aborted"
self.autoTagLog( "Online search: No match found. Save aborted\n" )
match_results.noMatches.append(ca.path)
else:
@ -1609,11 +1614,17 @@ class TaggerWindow( QtGui.QMainWindow):
self.atprogdialog.show()
self.atprogdialog.progressBar.setMaximum( len(ca_list) )
self.atprogdialog.setWindowTitle( "Auto-Tagging" )
self.autoTagLog( u"========================================================================\n" )
self.autoTagLog( u"Auto-Tagging Started for {0} items\n".format(len(ca_list)))
prog_idx = 0
match_results = OnlineMatchResults()
for ca in ca_list:
self.autoTagLog( u"============================================================\n" )
self.autoTagLog( u"Auto-Tagging {0} of {1}\n".format(prog_idx+1, len(ca_list)))
self.autoTagLog( u"{0}\n".format(ca.path) )
cover_idx = ca.readMetadata(style).getCoverPageIndexList()[0]
image_data = ca.getPage( cover_idx )
self.atprogdialog.setArchiveImage( image_data )
@ -1635,23 +1646,25 @@ class TaggerWindow( QtGui.QMainWindow):
self.loadArchive( self.fileSelectionList.getCurrentArchive() )
self.atprogdialog = None
summary = ""
summary += "Successfully tagged archives: {0}\n".format( len(match_results.goodMatches))
if len ( match_results.multipleMatches ) > 0:
summary += "Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches))
if len ( match_results.noMatches ) > 0:
summary += "Archives with no matches: {0}\n".format( len(match_results.noMatches))
if len ( match_results.fetchDataFailures ) > 0:
summary += "Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures))
if len ( match_results.writeFailures ) > 0:
summary += "Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures))
summary = u""
summary += u"Successfully tagged archives: {0}\n".format( len(match_results.goodMatches))
if len ( match_results.multipleMatches ) > 0:
summary += "\n\nDo you want to manually select the ones with multiple matches now?"
summary += u"Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches))
if len ( match_results.noMatches ) > 0:
summary += u"Archives with no matches: {0}\n".format( len(match_results.noMatches))
if len ( match_results.fetchDataFailures ) > 0:
summary += u"Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures))
if len ( match_results.writeFailures ) > 0:
summary += u"Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures))
self.autoTagLog( summary )
if len ( match_results.multipleMatches ) > 0:
summary += u"\n\nDo you want to manually select the ones with multiple matches now?"
reply = QtGui.QMessageBox.question(self,
self.tr("Auto-Tag Summary"),
self.tr(u"Auto-Tag Summary"),
self.tr(summary),
QtGui.QMessageBox.Yes, QtGui.QMessageBox.No )