comictagger/comictaggerlib/filenameparser.py

"""
Functions for parsing comic info from filename 

This should probably be re-written, but, well, it mostly works!

"""

"""
Copyright 2012  Anthony Beville

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""


# Some portions of this code were modified from pyComicMetaThis project
# http://code.google.com/p/pycomicmetathis/

import re
import os
from urllib import unquote

class FileNameParser:
	def fixSpaces( self, string, remove_dashes=True ):
		if remove_dashes:
			placeholders = ['[-_]','  +']
		else:
			placeholders = ['[_]','  +']			
		for ph in placeholders:
			string = re.sub(ph, ' ', string )
		return string.strip()

	# check for silly .1 or .5 style issue strings
	# allow up to 5 chars total
	def isPointIssue( self, word ):
		ret = False
		try:
			float(word)
			if (len(word) < 5 and not word.isdigit()):
				ret = True
		except ValueError:
			pass
		return ret


	def getIssueCount( self,filename ):

		count = ""
		# replace any name seperators with spaces
		tmpstr = self.fixSpaces(filename)
		found = False
		
		match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE)
		if match:
			count = match.group()
			found = True

		if not found:
			match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr,  re.IGNORECASE)
			if match:
				count = match.group()
				found = True
			

		count = count.lstrip("0")

		return count
		
		
	def getIssueNumber( self, filename ):

		found = False
		issue = ''
		
		# first, look for multiple "--", this means it's formatted differently from most:
		if "--" in filename:
			# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
			filename = filename.split("--")[0]
		elif "___" in filename:
			# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
			filename = filename.split("__")[0]

		filename = filename.replace("+", " ")
			
		# remove parenthetical phrases
		filename = re.sub( "\(.*\)", "", filename)
		filename = re.sub( "\[.*\]", "", filename)
		
		# guess based on position

		# replace any name seperators with spaces
		tmpstr = self.fixSpaces(filename)
		word_list = tmpstr.split(' ')
		
		#before we search, remove any kind of likely "of X" phrase
		for i in range(0, len(word_list)-2):
			if ( word_list[i].isdigit() and
				word_list[i+1] == "of"  and
				word_list[i+2].isdigit() ):
				word_list[i+1] ="XXX"
				word_list[i+2] ="XXX"
				
				
		# first look for the last "#" followed by a digit in the filename. this is almost certainly the issue number
		#issnum = re.search('#\d+', filename)
		matchlist = re.findall("#[-+]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", filename)
		if len(matchlist) > 0:
			#get the last item
			issue = matchlist[ len(matchlist) - 1][0]
			found = True

		# assume the last number in the filename that is under 4 digits is the issue number
		if not found:
			for word in reversed(word_list):
				if len(word) > 0 and word[0] == "#":
					word = word[1:]
				if ( 
					 (word.isdigit() and len(word) < 4) or
					 (self.isPointIssue(word))
					):
					issue = word
					found = True
					#print 'Assuming issue number is ' + str(issue) + ' based on the position.'
					break

		if not found:
			# try a regex
			issnum = re.search('(?<=[_#\s-])(\d+[a-zA-Z]+|\d+\.\d|\d+)', filename)
			if issnum:
				issue = issnum.group()
				found = True
				#print 'Got the issue using regex. Issue is ' + issue 
		
		return issue.strip()

	def getSeriesName(self, filename, issue ):

		# use the issue number string to split the filename string
		# assume first element of list is the series name, plus cruft
		#!!! this could fail in the case of small numerics in the series name!!!

		# TODO:  we really should pass in the *INDEX* of the issue, that makes 
		# finding it easier
		
		filename = filename.replace("+", " ")
		tmpstr = self.fixSpaces(filename, remove_dashes=False)
		
		#remove pound signs.  this might mess up the series name if there is a# in it.
		tmpstr = tmpstr.replace("#", " ")

		if issue != "":	
			# assume that issue substr has at least one space before it
			issue_str = " " + str(issue)
			series = tmpstr.split(issue_str)[0]
		else:
			# no issue to work off of
			#!!! TODO we should look for the year, and split from that
			# and if that doesn't exist, remove parenthetical phrases
			series = tmpstr
			series = re.sub( "\(.*\)", "", tmpstr)
			
		volume = ""
		
		series = series.rstrip("#")
			
		# search for volume number
		match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
		if match:
			series = match.group(1)
			volume = match.group(3)
		
		return series.strip(), volume.strip()

	def getYear( self,filename):

		year = ""
		# look for four digit number with "(" ")" or "--" around it
		match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
		if match:
			year = match.group()
			# remove non-numerics
			year = re.sub("[^0-9]", "", year)
		return year

	def getRemainder( self, filename, year, count ):
		#make a guess at where the the non-interesting stuff begins
		
		remainder = ""
		
		if "--" in filename:
			remainder = filename.split("--",1)[1]
		elif "__" in filename:
			remainder = filename.split("__",1)[1]
		elif "(" in filename:
			remainder = "(" + filename.split("(",1)[1]

		remainder = self.fixSpaces(remainder, remove_dashes=False)
		if year != "":
			remainder = remainder.replace(year,"",1)
		if count != "":
			remainder = remainder.replace("of "+count,"",1)
			
		remainder = remainder.replace("()","")
		
		return remainder.strip()
		
	def parseFilename( self, filename ):

		# remove the path
		filename = os.path.basename(filename)

		# remove the extension
		filename = os.path.splitext(filename)[0]

		#url decode, just in case
		filename = unquote(filename)

		# sometimes archives get messed up names from too many decodings
		# often url encodings will break and leave "_28" and "_29" in place
		# of "(" and ")"  see if there are a number of these, and replace them
		if filename.count("_28") > 1 and filename.count("_29") > 1:
			filename = filename.replace("_28", "(")
			filename = filename.replace("_29", ")")
					
		self.issue = self.getIssueNumber(filename)
		self.series, self.volume = self.getSeriesName(filename, self.issue)
		self.year = self.getYear(filename)
		self.issue_count = self.getIssueCount(filename)
		self.remainder = self.getRemainder( filename, self.year, self.issue_count )
	
		if self.issue != "":
			# strip off leading zeros
			self.issue = self.issue.lstrip("0")
			if self.issue == "":
				self.issue = "0"
			if self.issue[0] == ".":
				self.issue = "0" + self.issue
Added apache license headers git-svn-id: http://comictagger.googlecode.com/svn/trunk@13 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-06 12:56:30 -08:00			`"""`
			`Functions for parsing comic info from filename`

			`This should probably be re-written, but, well, it mostly works!`

			`"""`

			`"""`
			`Copyright 2012 Anthony Beville`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License.`
			`"""`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
Reworked the credit writing in CIX Added credit editing UI git-svn-id: http://comictagger.googlecode.com/svn/trunk@4 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 20:56:01 -07:00
			`# Some portions of this code were modified from pyComicMetaThis project`
			`# http://code.google.com/p/pycomicmetathis/`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`import re`
			`import os`
			`from urllib import unquote`

			`class FileNameParser:`
Filename parsing preserves dashes in series name git-svn-id: http://comictagger.googlecode.com/svn/trunk@494 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-02-13 21:37:00 -08:00			`def fixSpaces( self, string, remove_dashes=True ):`
			`if remove_dashes:`
			`placeholders = ['[-_]',' +']`
			`else:`
			`placeholders = ['[_]',' +']`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`for ph in placeholders:`
			`string = re.sub(ph, ' ', string )`
			`return string.strip()`

			`# check for silly .1 or .5 style issue strings`
			`# allow up to 5 chars total`
			`def isPointIssue( self, word ):`
			`ret = False`
			`try:`
			`float(word)`
			`if (len(word) < 5 and not word.isdigit()):`
			`ret = True`
			`except ValueError:`
			`pass`
			`return ret`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00

			`def getIssueCount( self,filename ):`

			`count = ""`
			`# replace any name seperators with spaces`
			`tmpstr = self.fixSpaces(filename)`
			`found = False`

			`match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE)`
			`if match:`
			`count = match.group()`
			`found = True`

			`if not found:`
			`match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)`
			`if match:`
			`count = match.group()`
			`found = True`


			`count = count.lstrip("0")`

			`return count`

Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
Setting metadata via CLI basics working git-svn-id: http://comictagger.googlecode.com/svn/trunk@61 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-19 11:57:16 -08:00			`def getIssueNumber( self, filename ):`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`found = False`
			`issue = ''`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00
Added option to parse scan info from filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@592 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-04-11 21:49:08 -07:00			`# first, look for multiple "--", this means it's formatted differently from most:`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00			`if "--" in filename:`
Some tweaks to issue number finder git-svn-id: http://comictagger.googlecode.com/svn/trunk@287 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-21 17:18:18 -08:00			`# the pattern seems to be that anything to left of the first "--" is the series name followed by issue`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00			`filename = filename.split("--")[0]`
Some tweaks to issue number finder git-svn-id: http://comictagger.googlecode.com/svn/trunk@287 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-21 17:18:18 -08:00			`elif "___" in filename:`
			`# the pattern seems to be that anything to left of the first "__" is the series name followed by issue`
			`filename = filename.split("__")[0]`
parser tweaks git-svn-id: http://comictagger.googlecode.com/svn/trunk@372 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 10:39:13 -08:00
			`filename = filename.replace("+", " ")`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00
parser tweaks git-svn-id: http://comictagger.googlecode.com/svn/trunk@372 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 10:39:13 -08:00			`# remove parenthetical phrases`
			`filename = re.sub( "\(.*\)", "", filename)`
			`filename = re.sub( "\[.*\]", "", filename)`

Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`# guess based on position`

			`# replace any name seperators with spaces`
			`tmpstr = self.fixSpaces(filename)`
			`word_list = tmpstr.split(' ')`

handle the case of "of XX" without parentheses git-svn-id: http://comictagger.googlecode.com/svn/trunk@202 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-03 21:28:06 -08:00			`#before we search, remove any kind of likely "of X" phrase`
			`for i in range(0, len(word_list)-2):`
			`if ( word_list[i].isdigit() and`
			`word_list[i+1] == "of" and`
			`word_list[i+2].isdigit() ):`
			`word_list[i+1] ="XXX"`
			`word_list[i+2] ="XXX"`


Use an RE to look for #issue before anything else git-svn-id: http://comictagger.googlecode.com/svn/trunk@379 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 17:05:16 -08:00			`# first look for the last "#" followed by a digit in the filename. this is almost certainly the issue number`
			`#issnum = re.search('#\d+', filename)`
removed typo git-svn-id: http://comictagger.googlecode.com/svn/trunk@553 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-03-27 12:25:42 -07:00			`matchlist = re.findall("#[-+]?(([0-9]\.[0-9]+\|[0-9]+)(\w))", filename)`
Use an RE to look for #issue before anything else git-svn-id: http://comictagger.googlecode.com/svn/trunk@379 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 17:05:16 -08:00			`if len(matchlist) > 0:`
			`#get the last item`
Fixed filename parsing to find "AU" issues git-svn-id: http://comictagger.googlecode.com/svn/trunk@551 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-03-27 12:20:10 -07:00			`issue = matchlist[ len(matchlist) - 1][0]`
Use an RE to look for #issue before anything else git-svn-id: http://comictagger.googlecode.com/svn/trunk@379 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 17:05:16 -08:00			`found = True`

Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`# assume the last number in the filename that is under 4 digits is the issue number`
Use an RE to look for #issue before anything else git-svn-id: http://comictagger.googlecode.com/svn/trunk@379 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 17:05:16 -08:00			`if not found:`
			`for word in reversed(word_list):`
			`if len(word) > 0 and word[0] == "#":`
			`word = word[1:]`
			`if (`
			`(word.isdigit() and len(word) < 4) or`
			`(self.isPointIssue(word))`
			`):`
			`issue = word`
			`found = True`
			`#print 'Assuming issue number is ' + str(issue) + ' based on the position.'`
			`break`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`if not found:`
			`# try a regex`
Fixed filename parsing to find "AU" issues git-svn-id: http://comictagger.googlecode.com/svn/trunk@551 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-03-27 12:20:10 -07:00			`issnum = re.search('(?<=[_#\s-])(\d+[a-zA-Z]+\|\d+\.\d\|\d+)', filename)`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`if issnum:`
			`issue = issnum.group()`
			`found = True`
			`#print 'Got the issue using regex. Issue is ' + issue`

			`return issue.strip()`

			`def getSeriesName(self, filename, issue ):`

			`# use the issue number string to split the filename string`
			`# assume first element of list is the series name, plus cruft`
			`#!!! this could fail in the case of small numerics in the series name!!!`
A slew of enhancements git-svn-id: http://comictagger.googlecode.com/svn/trunk@56 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-17 16:32:01 -08:00
			`# TODO: we really should pass in the INDEX of the issue, that makes`
			`# finding it easier`

parser tweaks git-svn-id: http://comictagger.googlecode.com/svn/trunk@372 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-30 10:39:13 -08:00			`filename = filename.replace("+", " ")`
Filename parsing preserves dashes in series name git-svn-id: http://comictagger.googlecode.com/svn/trunk@494 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-02-13 21:37:00 -08:00			`tmpstr = self.fixSpaces(filename, remove_dashes=False)`
A slew of enhancements git-svn-id: http://comictagger.googlecode.com/svn/trunk@56 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-17 16:32:01 -08:00
			`#remove pound signs. this might mess up the series name if there is a# in it.`
			`tmpstr = tmpstr.replace("#", " ")`

			`if issue != "":`
A lot of unicode related fixes git-svn-id: http://comictagger.googlecode.com/svn/trunk@289 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-29 21:06:12 -08:00			`# assume that issue substr has at least one space before it`
A slew of enhancements git-svn-id: http://comictagger.googlecode.com/svn/trunk@56 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-17 16:32:01 -08:00			`issue_str = " " + str(issue)`
			`series = tmpstr.split(issue_str)[0]`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`else:`
			`# no issue to work off of`
			`#!!! TODO we should look for the year, and split from that`
parse out parthetical phrases when no issue number git-svn-id: http://comictagger.googlecode.com/svn/trunk@200 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-03 20:02:53 -08:00			`# and if that doesn't exist, remove parenthetical phrases`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`series = tmpstr`
parse out parthetical phrases when no issue number git-svn-id: http://comictagger.googlecode.com/svn/trunk@200 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-03 20:02:53 -08:00			`series = re.sub( "\(.*\)", "", tmpstr)`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`volume = ""`

			`series = series.rstrip("#")`

better volume number parsing fixed case of more or less no filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@345 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-24 22:13:09 -08:00			`# search for volume number`
			`match = re.search('(.+)([vV]\|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`if match:`
better volume number parsing fixed case of more or less no filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@345 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-24 22:13:09 -08:00			`series = match.group(1)`
			`volume = match.group(3)`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`return series.strip(), volume.strip()`

			`def getYear( self,filename):`

			`year = ""`
			`# look for four digit number with "(" ")" or "--" around it`
			`match = re.search('(\(\d\d\d\d\))\|(--\d\d\d\d--)', filename)`
			`if match:`
			`year = match.group()`
			`# remove non-numerics`
			`year = re.sub("[^0-9]", "", year)`
			`return year`

Added option to parse scan info from filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@592 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-04-11 21:49:08 -07:00			`def getRemainder( self, filename, year, count ):`
			`#make a guess at where the the non-interesting stuff begins`

			`remainder = ""`

			`if "--" in filename:`
			`remainder = filename.split("--",1)[1]`
			`elif "__" in filename:`
			`remainder = filename.split("__",1)[1]`
			`elif "(" in filename:`
			`remainder = "(" + filename.split("(",1)[1]`

			`remainder = self.fixSpaces(remainder, remove_dashes=False)`
			`if year != "":`
			`remainder = remainder.replace(year,"",1)`
			`if count != "":`
			`remainder = remainder.replace("of "+count,"",1)`

			`remainder = remainder.replace("()","")`

			`return remainder.strip()`

Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`def parseFilename( self, filename ):`
A lot of unicode related fixes git-svn-id: http://comictagger.googlecode.com/svn/trunk@289 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-29 21:06:12 -08:00
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`# remove the path`
			`filename = os.path.basename(filename)`

			`# remove the extension`
			`filename = os.path.splitext(filename)[0]`
Added a hack that will probably be removed git-svn-id: http://comictagger.googlecode.com/svn/trunk@19 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-09 13:02:59 -08:00
			`#url decode, just in case`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`filename = unquote(filename)`
Added a hack that will probably be removed git-svn-id: http://comictagger.googlecode.com/svn/trunk@19 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-09 13:02:59 -08:00
Added special case of mangled URL encodings in filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@196 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-12-03 18:50:25 -08:00			`# sometimes archives get messed up names from too many decodings`
			`# often url encodings will break and leave "_28" and "_29" in place`
			`# of "(" and ")" see if there are a number of these, and replace them`
			`if filename.count("_28") > 1 and filename.count("_29") > 1:`
			`filename = filename.replace("_28", "(")`
			`filename = filename.replace("_29", ")")`
Added a hack that will probably be removed git-svn-id: http://comictagger.googlecode.com/svn/trunk@19 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-09 13:02:59 -08:00
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00			`self.issue = self.getIssueNumber(filename)`
			`self.series, self.volume = self.getSeriesName(filename, self.issue)`
			`self.year = self.getYear(filename)`
Filename parsing enhancements Multiple filenames on commandline git-svn-id: http://comictagger.googlecode.com/svn/trunk@60 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-18 21:15:16 -08:00			`self.issue_count = self.getIssueCount(filename)`
Added option to parse scan info from filename git-svn-id: http://comictagger.googlecode.com/svn/trunk@592 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-04-11 21:49:08 -07:00			`self.remainder = self.getRemainder( filename, self.year, self.issue_count )`
Initial checking git-svn-id: http://comictagger.googlecode.com/svn/trunk@2 6c5673fe-1810-88d6-992b-cd32ca31540c 2012-11-02 13:54:17 -07:00
			`if self.issue != "":`
			`# strip off leading zeros`
			`self.issue = self.issue.lstrip("0")`
			`if self.issue == "":`
			`self.issue = "0"`
Assorted fixes and enhancements git-svn-id: http://comictagger.googlecode.com/svn/trunk@325 6c5673fe-1810-88d6-992b-cd32ca31540c 2013-01-22 17:25:17 -08:00			`if self.issue[0] == ".":`
			`self.issue = "0" + self.issue`