From f97a76a01894ea43f1f2c3bf079cb99ff613538f Mon Sep 17 00:00:00 2001 From: "beville@gmail.com" Date: Mon, 19 Nov 2012 05:15:16 +0000 Subject: [PATCH] Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c --- comicarchive.py | 2 ++ filenameparser.py | 39 ++++++++++++++++++++++++++++++++------- options.py | 2 ++ tagger.py | 16 +++++++++++----- todo.txt | 12 +++++------- 5 files changed, 52 insertions(+), 19 deletions(-) diff --git a/comicarchive.py b/comicarchive.py index 4f96545..b3e35fe 100644 --- a/comicarchive.py +++ b/comicarchive.py @@ -559,6 +559,8 @@ class ComicArchive: metadata.volumeNumber = fnp.volume if fnp.year != "": metadata.publicationYear = fnp.year + if fnp.issue_count != "": + metadata.issueCount = fnp.issue_count metadata.isEmpty = False diff --git a/filenameparser.py b/filenameparser.py index 3f4d261..26760e4 100644 --- a/filenameparser.py +++ b/filenameparser.py @@ -47,11 +47,42 @@ class FileNameParser: except ValueError: pass return ret + + + def getIssueCount( self,filename ): + + count = "" + # replace any name seperators with spaces + tmpstr = self.fixSpaces(filename) + found = False + + match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + if not found: + match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + + count = count.lstrip("0") + + return count + def getIssueNumber( self,filename ): found = False issue = '' + + # first, look for multiple "--", this mean's it's formatted differently from most: + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" is the series name follow + filename = filename.split("--")[0] + # guess based on position # replace any name seperators with spaces @@ -129,15 +160,8 @@ class FileNameParser: year = re.sub("[^0-9]", "", year) return year - self.issue = "" - self.series = "" - self.volume = "" - self.year = "" - - def parseFilename( self, filename ): - # remove the path filename = os.path.basename(filename) @@ -159,6 +183,7 @@ class FileNameParser: self.issue = self.getIssueNumber(filename) self.series, self.volume = self.getSeriesName(filename, self.issue) self.year = self.getYear(filename) + self.issue_count = self.getIssueCount(filename) if self.issue != "": # strip off leading zeros diff --git a/options.py b/options.py index 8695d5a..e9e2598 100644 --- a/options.py +++ b/options.py @@ -80,6 +80,7 @@ If no options are given, {0} will run in windowed mode self.save_tags = False self.parse_filename = False self.rename_file = False + self.file_list = [] def display_help_and_quit( self, msg, code ): appname = os.path.basename(sys.argv[0]) @@ -152,6 +153,7 @@ If no options are given, {0} will run in windowed mode if len(args) > 0: self.filename = args[0] + self.file_list = args if self.no_gui and self.filename is None: self.display_help_and_quit( "Command requires a filename!", 1 ) diff --git a/tagger.py b/tagger.py index 6a6621b..06e7721 100755 --- a/tagger.py +++ b/tagger.py @@ -41,15 +41,21 @@ import utils #----------------------------- def cli_mode( opts, settings ): + for f in opts.file_list: + print "Processing: ", f + process_file_cli( f, opts, settings ) - if opts.filename is None: +def process_file_cli( filename, opts, settings ): + + if filename is None: return - ca = ComicArchive(opts.filename) + + ca = ComicArchive(filename) if settings.rar_exe_path != "": ca.setExternalRarProgram( settings.rar_exe_path ) if not ca.seemsToBeAComicArchive(): - print "Sorry, but "+ opts.filename + " is not a comic archive!" + print "Sorry, but "+ filename + " is not a comic archive!" return if not ca.isWritable() and ( opts.delete_tags or opts.save_tags or opts.rename_file ): @@ -164,12 +170,12 @@ def cli_mode( opts, settings ): if result == ii.ResultNoMatches: pass elif result == ii.ResultFoundMatchButBadCoverScore: - low_confidence = False + #low_confidence = True found_match = True elif result == ii.ResultFoundMatchButNotFirstPage : found_match = True elif result == ii.ResultMultipleMatchesWithBadImageScores: - low_confidence = False + low_confidence = True choices = True elif result == ii.ResultOneGoodMatch: found_match = True diff --git a/todo.txt b/todo.txt index c74fab1..7210a20 100644 --- a/todo.txt +++ b/todo.txt @@ -2,11 +2,11 @@ Features ---------------- CLI - save log + multiple files? + save log abort flag explicit metadata settings option format just series, issue, year?? - multiple files? verbose settings for identifier interactive for choices option? --- or defer choices to end, by keeping special log of those files @@ -19,6 +19,9 @@ Settings/Preferences Dialog Add publisher blacklist other Identifier tunings +save Last tag style +save Last "Open" folder (include dragged) + Add class for warning/info messages with "Don't show again" checkbox. Add list of these flags to settings @@ -45,11 +48,6 @@ OSX: weird unrar complaints Page browser sizing Override cursor is not beachball - -Other settings possibilities: - Last tag style - Last "Open" folder (include dragged) - Clear caches Filename parsing: Rework how series name is separated from issue