From 93f316b8205ebfb59ef922787722dea2865c2ec9 Mon Sep 17 00:00:00 2001 From: beville Date: Fri, 25 Jan 2013 06:17:45 +0000 Subject: [PATCH] more robust dealing with read errors in rar archives more logging in auto-tag process git-svn-id: http://comictagger.googlecode.com/svn/trunk@349 6c5673fe-1810-88d6-992b-cd32ca31540c --- comicarchive.py | 62 +++++++++++++++++++++++++++++------------ comicvinetalker.py | 22 +++++++++++---- issueidentifier.py | 32 ++++++++++++++------- settings.py | 2 +- taggerwindow.py | 69 +++++++++++++++++++++++++++------------------- 5 files changed, 126 insertions(+), 61 deletions(-) diff --git a/comicarchive.py b/comicarchive.py index 74724a2..b3ebc63 100644 --- a/comicarchive.py +++ b/comicarchive.py @@ -46,7 +46,7 @@ from comicbookinfo import ComicBookInfo from comet import CoMet from genericmetadata import GenericMetadata, PageType from filenameparser import FileNameParser - +from settings import ComicTaggerSettings class ZipArchiver: @@ -69,8 +69,10 @@ class ZipArchiver: data = zf.read( archive_file ) except zipfile.BadZipfile: print "bad zipfile: {0} :: {1}".format(self.path, archive_file) + raise IOError except Exception: print "bad zipfile: {0} :: {1}".format(self.path, archive_file) + raise IOError finally: zf.close() return data @@ -200,7 +202,7 @@ class ZipArchiver: try: zout = zipfile.ZipFile (self.path, 'w') for fname in otherArchive.getArchiveFilenameList(): - data = otherArchive.readArchiveFile( fname ) + data = otherArchive.readArchiveFile( fname ) if data is not None: zout.writestr( fname, data ) zout.close() @@ -288,23 +290,30 @@ class RarArchiver: try: tries = tries+1 entries = rarc.read_files( archive_file ) - + + if entries[0][0].size != len(entries[0][1]): + print "readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format( + entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries) + continue + except (OSError, IOError) as e: - print e, "in readArchiveFile! try %s" % tries + print "readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) time.sleep(1) except Exception as e: - print "Unexpected exception in readArchiveFile! {0}".format( e ) + print "Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) break else: #Success" #entries is a list of of tuples: ( rarinfo, filedata) + if tries > 1: + print "Attempted read_files() {0} times".format(tries) if (len(entries) == 1): return entries[0][1] else: - return None + raise IOError - return None + raise IOError @@ -370,7 +379,7 @@ class RarArchiver: namelist = [ item.filename for item in rarc.infolist() ] except (OSError, IOError) as e: - print e, "in getArchiveFilenameList! try %s" % tries + print "getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) time.sleep(1) else: @@ -388,7 +397,7 @@ class RarArchiver: rarc = UnRAR2.RarFile( self.path ) except (OSError, IOError) as e: - print e, "in getRARObj! try %s" % tries + print "getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) time.sleep(1) else: @@ -471,7 +480,7 @@ class UnknownArchiver: return "" def setArchiveComment( self, comment ): return False - def readArchiveFilen( self ): + def readArchiveFile( self ): return "" def writeArchiveFile( self, archive_file, data ): return False @@ -637,7 +646,13 @@ class ComicArchive: filename = self.getPageName( index ) if filename is not None: - image_data = self.archiver.readArchiveFile( filename ) + try: + image_data = self.archiver.readArchiveFile( filename ) + except IOError: + print "Error reading in page. Substituting logo page." + fname = os.path.join(ComicTaggerSettings.baseDir(), 'graphics/nocover.png' ) + with open(fname) as x: + image_data = x.read() return image_data @@ -733,7 +748,7 @@ class ComicArchive: def readCIX( self ): if self.cix_md is None: raw_cix = self.readRawCIX() - if raw_cix is None: + if raw_cix is None or raw_cix == "": self.cix_md = GenericMetadata() else: self.cix_md = ComicInfoXml().metadataFromString( raw_cix ) @@ -753,8 +768,12 @@ class ComicArchive: def readRawCIX( self ): if not self.hasCIX(): return None - - return self.archiver.readArchiveFile( self.ci_xml_filename ) + try: + raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename ) + except IOError: + print "Error reading in raw CIX!" + raw_cix = "" + return raw_cix def writeCIX(self, metadata): @@ -798,7 +817,7 @@ class ComicArchive: def readCoMet( self ): if self.comet_md is None: raw_comet = self.readRawCoMet() - if raw_comet is None: + if raw_comet is None or raw_comet == "": self.comet_md = GenericMetadata() else: self.comet_md = CoMet().metadataFromString( raw_comet ) @@ -824,7 +843,12 @@ class ComicArchive: print self.path, "doesn't have CoMet data!" return None - return self.archiver.readArchiveFile( self.comet_filename ) + try: + raw_comet = self.archiver.readArchiveFile( self.comet_filename ) + except IOError: + print "Error reading in raw CoMet!" + raw_comet = "" + return raw_comet def writeCoMet(self, metadata): @@ -871,7 +895,11 @@ class ComicArchive: if ( os.path.dirname(n) == "" and os.path.splitext(n)[1].lower() == '.xml'): # read in XML file, and validate it - data = self.archiver.readArchiveFile( n ) + try: + data = self.archiver.readArchiveFile( n ) + except: + data = "" + print "Error reading in Comet XML for validation!" if CoMet().validateString( data ): # since we found it, save it! self.comet_filename = n diff --git a/comicvinetalker.py b/comicvinetalker.py index 07890c7..76ea35c 100644 --- a/comicvinetalker.py +++ b/comicvinetalker.py @@ -26,6 +26,7 @@ import math import re import datetime import ctversion +import sys try: from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest @@ -59,6 +60,17 @@ class ComicVineTalker(QObject): # key that is registered to comictagger self.api_key = '27431e6787042105bd3e47e169a624521f89f3a4' + self.log_func = None + + def setLogFunc( self , log_func ): + self.log_func = log_func + + def writeLog( self , text ): + if self.log_func is None: + sys.stdout.write(text.encode( errors='replace') ) + sys.stdout.flush() + else: + self.log_func( text ) def testKey( self ): @@ -76,7 +88,7 @@ class ComicVineTalker(QObject): resp = urllib2.urlopen( url ) return resp.read() except Exception as e: - print e + self.writeLog( str(e) ) raise ComicVineTalkerException("Network Error!") def searchForSeries( self, series_name , callback=None, refresh_cache=False ): @@ -104,7 +116,7 @@ class ComicVineTalker(QObject): cv_response = json.loads(content) if cv_response[ 'status_code' ] != 1: - print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )) + self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] )) return None search_results = list() @@ -116,7 +128,7 @@ class ComicVineTalker(QObject): total_result_count = cv_response['number_of_total_results'] if callback is None: - print ("Found {0} of {1} results".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])) + self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])) search_results.extend( cv_response['results']) offset = 0 @@ -126,14 +138,14 @@ class ComicVineTalker(QObject): # see if we need to keep asking for more pages... while ( current_result_count < total_result_count ): if callback is None: - print ("getting another page of results {0} of {1}...".format( current_result_count, total_result_count)) + self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count)) offset += limit content = self.getUrlContent(search_url + "&offset="+str(offset)) cv_response = json.loads(content) if cv_response[ 'status_code' ] != 1: - print ( "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )) + self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] )) return None search_results.extend( cv_response['results']) current_result_count += cv_response['number_of_page_results'] diff --git a/issueidentifier.py b/issueidentifier.py index 8d52375..2e3f329 100644 --- a/issueidentifier.py +++ b/issueidentifier.py @@ -119,8 +119,14 @@ class IssueIdentifier: im = Image.open(StringIO.StringIO(image_data)) w,h = im.size - - cropped_im = im.crop( (int(w/2), 0, w, h) ) + + try: + cropped_im = im.crop( (int(w/2), 0, w, h) ) + except Exception as e: + sys.exc_clear() + print "cropCover() error:", e + return None + output = StringIO.StringIO() cropped_im.save(output, format="JPEG") cropped_image_data = output.getvalue() @@ -202,7 +208,7 @@ class IssueIdentifier: @staticmethod def defaultWriteOutput( text ): - sys.stdout.write(text.encode( errors='replace') ) + sys.stdout.write( text ) sys.stdout.flush() def log_msg( self, msg , newline=True ): @@ -235,9 +241,10 @@ class IssueIdentifier: aspect_ratio = self.getAspectRatio( cover_image_data ) if aspect_ratio < 1.0: right_side_image_data = self.cropCover( cover_image_data ) - narrow_cover_hash = self.calculateHash( right_side_image_data ) - print "narrow_cover_hash", narrow_cover_hash - + if right_side_image_data is not None: + narrow_cover_hash = self.calculateHash( right_side_image_data ) + self.log_msg(unicode(str(narrow_cover_hash))) + #self.log_msg( "Cover hash = {0:016x}".format(cover_hash) ) keys = self.getSearchKeys() @@ -259,6 +266,7 @@ class IssueIdentifier: #self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist)) comicVine = ComicVineTalker( ) + comicVine.setLogFunc( self.output_function ) #self.log_msg( ( "Searching for " + keys['series'] + "...") self.log_msg( u"Searching for {0} #{1} ...".format( keys['series'], keys['issue_number']) ) @@ -433,16 +441,16 @@ class IssueIdentifier: page_hash = self.calculateHash( image_data ) distance = ImageHasher.hamming_distance(page_hash, self.match_list[0]['url_image_hash']) if distance <= self.strong_score_thresh: - self.log_msg( "Found a great match (distance = {0}) on page {1}!".format(distance, i+1) ) + self.log_msg( "Found a great match (score = {0}) on page {1}!".format(distance, i+1) ) found = True break elif distance < self.min_score_thresh: - self.log_msg( "Found a good match (distance = {0}) on page {1}".format(distance, i) ) + self.log_msg( "Found a good match (score = {0}) on page {1}".format(distance, i) ) found = True self.log_msg( ".", newline=False ) self.log_msg( "" ) if not found: - self.log_msg( "No matching pages in the issue. Bummer" ) + self.log_msg( "No matching pages in the issue." ) self.search_result = self.ResultFoundMatchButBadCoverScore self.log_msg( u"--------------------------------------------------") @@ -451,7 +459,9 @@ class IssueIdentifier: return self.match_list elif best_score > self.min_score_thresh and len(self.match_list) > 1: - self.log_msg( "No good image matches! Need to use other info..." ) + self.log_msg( u"--------------------------------------------------") + self.log_msg( u"Multiple bad cover matches! Need to use other info..." ) + self.log_msg( u"--------------------------------------------------") self.search_result = self.ResultMultipleMatchesWithBadImageScores return self.match_list @@ -468,7 +478,9 @@ class IssueIdentifier: self.search_result = self.ResultOneGoodMatch elif len(self.match_list) == 0: + self.log_msg( u"--------------------------------------------------") self.log_msg( "No matches found :(" ) + self.log_msg( u"--------------------------------------------------") self.search_result = self.ResultNoMatches else: print diff --git a/settings.py b/settings.py index 87b2fe9..3461a02 100644 --- a/settings.py +++ b/settings.py @@ -63,7 +63,7 @@ class ComicTaggerSettings: # identifier settings self.id_length_delta_thresh = 5 - self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services" + self.id_publisher_blacklist = "Panini Comics, Abril, Scholastic Book Services, Editorial Televisa" # Show/ask dialog flags self.ask_about_cbi_in_rar = True diff --git a/taggerwindow.py b/taggerwindow.py index 124b9e8..465fd5d 100644 --- a/taggerwindow.py +++ b/taggerwindow.py @@ -1499,7 +1499,15 @@ class TaggerWindow( QtGui.QMainWindow): return cv_md - + def autoTagLog( self, text ): + IssueIdentifier.defaultWriteOutput( text ) + if self.atprogdialog is not None: + self.atprogdialog.textEdit.insertPlainText(text) + self.atprogdialog.textEdit.ensureCursorVisible() + QtCore.QCoreApplication.processEvents() + QtCore.QCoreApplication.processEvents() + QtCore.QCoreApplication.processEvents() + def identifyAndTagSingleArchive( self, ca, match_results, dlg): success = False ii = IssueIdentifier( ca, self.settings ) @@ -1513,13 +1521,7 @@ class TaggerWindow( QtGui.QMainWindow): print "!!!!No metadata given to search online with!" return False, match_results - def myoutput( text ): - IssueIdentifier.defaultWriteOutput( text ) - self.atprogdialog.textEdit.insertPlainText(text) - self.atprogdialog.textEdit.ensureCursorVisible() - QtCore.QCoreApplication.processEvents() - QtCore.QCoreApplication.processEvents() - QtCore.QCoreApplication.processEvents() + if dlg.dontUseYear: md.year = None @@ -1527,7 +1529,7 @@ class TaggerWindow( QtGui.QMainWindow): md.issue = "1" ii.setAdditionalMetadata( md ) ii.onlyUseAdditionalMetaData = True - ii.setOutputFunction( myoutput ) + ii.setOutputFunction( self.autoTagLog ) ii.cover_page_index = md.getCoverPageIndexList()[0] ii.setCoverURLCallback( self.atprogdialog.setTestImage ) @@ -1555,13 +1557,16 @@ class TaggerWindow( QtGui.QMainWindow): choices = True if choices: - print "Online search: Multiple matches. Save aborted" + self.autoTagLog( "Online search: Multiple matches. Save aborted\n" ) match_results.multipleMatches.append(MultipleMatch(ca,matches)) - elif low_confidence and not dlg.autoSaveOnLow: - print "Online search: Low confidence match. Save aborted" - match_results.noMatches.append(ca.path) + elif low_confidence: + if dlg.autoSaveOnLow: + self.autoTagLog( "Online search: Low confidence match, but saving anyways...\n" ) + else: + self.autoTagLog( "Online search: Low confidence match. Save aborted\n" ) + match_results.noMatches.append(ca.path) elif not found_match: - print "Online search: No match found. Save aborted" + self.autoTagLog( "Online search: No match found. Save aborted\n" ) match_results.noMatches.append(ca.path) else: @@ -1609,11 +1614,17 @@ class TaggerWindow( QtGui.QMainWindow): self.atprogdialog.show() self.atprogdialog.progressBar.setMaximum( len(ca_list) ) self.atprogdialog.setWindowTitle( "Auto-Tagging" ) - + + self.autoTagLog( u"========================================================================\n" ) + self.autoTagLog( u"Auto-Tagging Started for {0} items\n".format(len(ca_list))) + prog_idx = 0 match_results = OnlineMatchResults() for ca in ca_list: + self.autoTagLog( u"============================================================\n" ) + self.autoTagLog( u"Auto-Tagging {0} of {1}\n".format(prog_idx+1, len(ca_list))) + self.autoTagLog( u"{0}\n".format(ca.path) ) cover_idx = ca.readMetadata(style).getCoverPageIndexList()[0] image_data = ca.getPage( cover_idx ) self.atprogdialog.setArchiveImage( image_data ) @@ -1635,23 +1646,25 @@ class TaggerWindow( QtGui.QMainWindow): self.loadArchive( self.fileSelectionList.getCurrentArchive() ) self.atprogdialog = None - summary = "" - summary += "Successfully tagged archives: {0}\n".format( len(match_results.goodMatches)) - - if len ( match_results.multipleMatches ) > 0: - summary += "Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches)) - if len ( match_results.noMatches ) > 0: - summary += "Archives with no matches: {0}\n".format( len(match_results.noMatches)) - if len ( match_results.fetchDataFailures ) > 0: - summary += "Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures)) - if len ( match_results.writeFailures ) > 0: - summary += "Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures)) + summary = u"" + summary += u"Successfully tagged archives: {0}\n".format( len(match_results.goodMatches)) if len ( match_results.multipleMatches ) > 0: - summary += "\n\nDo you want to manually select the ones with multiple matches now?" + summary += u"Archives with multiple matches: {0}\n".format( len(match_results.multipleMatches)) + if len ( match_results.noMatches ) > 0: + summary += u"Archives with no matches: {0}\n".format( len(match_results.noMatches)) + if len ( match_results.fetchDataFailures ) > 0: + summary += u"Archives that failed due to data fetch errors: {0}\n".format( len(match_results.fetchDataFailures)) + if len ( match_results.writeFailures ) > 0: + summary += u"Archives that failed due to file writing errors: {0}\n".format( len(match_results.writeFailures)) + + self.autoTagLog( summary ) + + if len ( match_results.multipleMatches ) > 0: + summary += u"\n\nDo you want to manually select the ones with multiple matches now?" reply = QtGui.QMessageBox.question(self, - self.tr("Auto-Tag Summary"), + self.tr(u"Auto-Tag Summary"), self.tr(summary), QtGui.QMessageBox.Yes, QtGui.QMessageBox.No )