Merge branch 'fcanc-master'

This commit is contained in:
davide-romanini 2015-03-01 15:44:11 +01:00
commit d84110ccb2
8 changed files with 2037 additions and 1960 deletions

View File

@ -1,260 +1,276 @@
""" """A class to encapsulate CoMet data"""
A python class to encapsulate CoMet data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
class CoMet: class CoMet:
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ]
inker_synonyms = [ 'inker', 'artist', 'finishes' ]
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ]
letterer_synonyms = [ 'letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ]
editor_synonyms = [ 'editor']
def metadataFromString( self, string ):
tree = ET.ElementTree(ET.fromstring( string )) writer_synonyms = ['writer', 'plotter', 'scripter']
return self.convertXMLToMetadata( tree ) penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = ['letterer']
cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = ['editor']
def stringFromMetadata( self, metadata ): def metadataFromString(self, string):
header = '<?xml version="1.0" encoding="UTF-8"?>\n' tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata(tree)
tree = self.convertMetadataToXML( self, metadata )
return header + ET.tostring(tree.getroot())
def indent( self, elem, level=0 ): def stringFromMetadata(self, metadata):
# for making the XML output readable
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent( elem, level+1 )
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def convertMetadataToXML( self, filename, metadata ):
#shorthand for the metadata header = '<?xml version="1.0" encoding="UTF-8"?>\n'
md = metadata
# build a tree structure tree = self.convertMetadataToXML(self, metadata)
root = ET.Element("comet") return header + ET.tostring(tree.getroot())
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
#helper func
def assign( comet_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
# title is manditory
if md.title is None:
md.title = ""
assign( 'title', md.title )
assign( 'series', md.series )
assign( 'issue', md.issue ) #must be int??
assign( 'volume', md.volume )
assign( 'description', md.comments )
assign( 'publisher', md.publisher )
assign( 'pages', md.pageCount )
assign( 'format', md.format )
assign( 'language', md.language )
assign( 'rating', md.maturityRating )
assign( 'price', md.price )
assign( 'isVersionOf', md.isVersionOf )
assign( 'rights', md.rights )
assign( 'identifier', md.identifier )
assign( 'lastMark', md.lastMark )
assign( 'genre', md.genre ) # TODO repeatable
if md.characters is not None: def indent(self, elem, level=0):
char_list = [ c.strip() for c in md.characters.split(',') ] # for making the XML output readable
for c in char_list: i = "\n" + level * " "
assign( 'character', c ) if len(elem):
if not elem.text or not elem.text.strip():
if md.manga is not None and md.manga == "YesAndRightToLeft": elem.text = i + " "
assign( 'readingDirection', "rtl") if not elem.tail or not elem.tail.strip():
elem.tail = i
date_str = "" for elem in elem:
if md.year is not None: self.indent(elem, level + 1)
date_str = str(md.year).zfill(4) if not elem.tail or not elem.tail.strip():
if md.month is not None: elem.tail = i
date_str += "-" + str(md.month).zfill(2) else:
assign( 'date', date_str ) if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
assign( 'coverImage', md.coverImage ) def convertMetadataToXML(self, filename, metadata):
# need to specially process the credits, since they are structured differently than CIX # shorthand for the metadata
credit_writer_list = list() md = metadata
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
if credit['role'].lower() in set( self.writer_synonyms ): # build a tree structure
ET.SubElement(root, 'writer').text = u"{0}".format(credit['person']) root = ET.Element("comet")
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib[
'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
if credit['role'].lower() in set( self.penciller_synonyms ): # helper func
ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person']) def assign(comet_entry, md_entry):
if md_entry is not None:
if credit['role'].lower() in set( self.inker_synonyms ): ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
ET.SubElement(root, 'inker').text = u"{0}".format(credit['person'])
if credit['role'].lower() in set( self.colorist_synonyms ):
ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person'])
if credit['role'].lower() in set( self.letterer_synonyms ): # title is manditory
ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person']) if md.title is None:
md.title = ""
assign('title', md.title)
assign('series', md.series)
assign('issue', md.issue) # must be int??
assign('volume', md.volume)
assign('description', md.comments)
assign('publisher', md.publisher)
assign('pages', md.pageCount)
assign('format', md.format)
assign('language', md.language)
assign('rating', md.maturityRating)
assign('price', md.price)
assign('isVersionOf', md.isVersionOf)
assign('rights', md.rights)
assign('identifier', md.identifier)
assign('lastMark', md.lastMark)
assign('genre', md.genre) # TODO repeatable
if credit['role'].lower() in set( self.cover_synonyms ): if md.characters is not None:
ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person']) char_list = [c.strip() for c in md.characters.split(',')]
for c in char_list:
assign('character', c)
if credit['role'].lower() in set( self.editor_synonyms ): if md.manga is not None and md.manga == "YesAndRightToLeft":
ET.SubElement(root, 'editor').text = u"{0}".format(credit['person']) assign('readingDirection', "rtl")
# self pretty-print date_str = ""
self.indent(root) if md.year is not None:
date_str = str(md.year).zfill(4)
if md.month is not None:
date_str += "-" + str(md.month).zfill(2)
assign('date', date_str)
# wrap it in an ElementTree instance, and save as XML assign('coverImage', md.coverImage)
tree = ET.ElementTree(root)
return tree
def convertXMLToMetadata( self, tree ): # need to specially process the credits, since they are structured
# differently than CIX
root = tree.getroot() credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
if root.tag != 'comet': # loop thru credits, and build a list for each role that CoMet supports
raise 1 for credit in metadata.credits:
return None
metadata = GenericMetadata() if credit['role'].lower() in set(self.writer_synonyms):
md = metadata ET.SubElement(
root,
# Helper function 'writer').text = u"{0}".format(
def xlate( tag ): credit['person'])
node = root.find( tag )
if node is not None:
return node.text
else:
return None
md.series = xlate( 'series' )
md.title = xlate( 'title' )
md.issue = xlate( 'issue' )
md.volume = xlate( 'volume' )
md.comments = xlate( 'description' )
md.publisher = xlate( 'publisher' )
md.language = xlate( 'language' )
md.format = xlate( 'format' )
md.pageCount = xlate( 'pages' )
md.maturityRating = xlate( 'rating' )
md.price = xlate( 'price' )
md.isVersionOf = xlate( 'isVersionOf' )
md.rights = xlate( 'rights' )
md.identifier = xlate( 'identifier' )
md.lastMark = xlate( 'lastMark' )
md.genre = xlate( 'genre' ) # TODO - repeatable field
date = xlate( 'date' ) if credit['role'].lower() in set(self.penciller_synonyms):
if date is not None: ET.SubElement(
parts = date.split('-') root,
if len( parts) > 0: 'penciller').text = u"{0}".format(
md.year = parts[0] credit['person'])
if len( parts) > 1:
md.month = parts[1]
md.coverImage = xlate( 'coverImage' )
readingDirection = xlate( 'readingDirection' )
if readingDirection is not None and readingDirection == "rtl":
md.manga = "YesAndRightToLeft"
# loop for character tags
char_list = []
for n in root:
if n.tag == 'character':
char_list.append(n.text.strip())
md.characters = utils.listToString( char_list )
# Now extract the credit info if credit['role'].lower() in set(self.inker_synonyms):
for n in root: ET.SubElement(
if ( n.tag == 'writer' or root,
n.tag == 'penciller' or 'inker').text = u"{0}".format(
n.tag == 'inker' or credit['person'])
n.tag == 'colorist' or
n.tag == 'letterer' or
n.tag == 'editor'
):
metadata.addCredit( n.text.strip(), n.tag.title() )
if n.tag == 'coverDesigner': if credit['role'].lower() in set(self.colorist_synonyms):
metadata.addCredit( n.text.strip(), "Cover" ) ET.SubElement(
root,
'colorist').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.letterer_synonyms):
ET.SubElement(
root,
'letterer').text = u"{0}".format(
credit['person'])
metadata.isEmpty = False if credit['role'].lower() in set(self.cover_synonyms):
ET.SubElement(
return metadata root,
'coverDesigner').text = u"{0}".format(
credit['person'])
#verify that the string actually contains CoMet data in XML format if credit['role'].lower() in set(self.editor_synonyms):
def validateString( self, string ): ET.SubElement(
try: root,
tree = ET.ElementTree(ET.fromstring( string )) 'editor').text = u"{0}".format(
root = tree.getroot() credit['person'])
if root.tag != 'comet':
raise Exception
except:
return False
return True
# self pretty-print
self.indent(root)
def writeToExternalFile( self, filename, metadata ): # wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)
tree = self.convertMetadataToXML( self, metadata ) return tree
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ):
tree = ET.parse( filename ) def convertXMLToMetadata(self, tree):
return self.convertXMLToMetadata( tree )
root = tree.getroot()
if root.tag != 'comet':
raise 1
return None
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
if node is not None:
return node.text
else:
return None
md.series = xlate('series')
md.title = xlate('title')
md.issue = xlate('issue')
md.volume = xlate('volume')
md.comments = xlate('description')
md.publisher = xlate('publisher')
md.language = xlate('language')
md.format = xlate('format')
md.pageCount = xlate('pages')
md.maturityRating = xlate('rating')
md.price = xlate('price')
md.isVersionOf = xlate('isVersionOf')
md.rights = xlate('rights')
md.identifier = xlate('identifier')
md.lastMark = xlate('lastMark')
md.genre = xlate('genre') # TODO - repeatable field
date = xlate('date')
if date is not None:
parts = date.split('-')
if len(parts) > 0:
md.year = parts[0]
if len(parts) > 1:
md.month = parts[1]
md.coverImage = xlate('coverImage')
readingDirection = xlate('readingDirection')
if readingDirection is not None and readingDirection == "rtl":
md.manga = "YesAndRightToLeft"
# loop for character tags
char_list = []
for n in root:
if n.tag == 'character':
char_list.append(n.text.strip())
md.characters = utils.listToString(char_list)
# Now extract the credit info
for n in root:
if (n.tag == 'writer' or
n.tag == 'penciller' or
n.tag == 'inker' or
n.tag == 'colorist' or
n.tag == 'letterer' or
n.tag == 'editor'
):
metadata.addCredit(n.text.strip(), n.tag.title())
if n.tag == 'coverDesigner':
metadata.addCredit(n.text.strip(), "Cover")
metadata.isEmpty = False
return metadata
# verify that the string actually contains CoMet data in XML format
def validateString(self, string):
try:
tree = ET.ElementTree(ET.fromstring(string))
root = tree.getroot()
if root.tag != 'comet':
raise Exception
except:
return False
return True
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
# ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

File diff suppressed because it is too large Load Diff

View File

@ -1,152 +1,144 @@
""" """A class to encapsulate the ComicBookInfo data"""
A python class to encapsulate the ComicBookInfo data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json import json
from datetime import datetime from datetime import datetime
import zipfile #import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
#import ctversion #import ctversion
class ComicBookInfo: class ComicBookInfo:
def metadataFromString( self, string ): def metadataFromString(self, string):
cbi_container = json.loads( unicode(string, 'utf-8') )
metadata = GenericMetadata() cbi_container = json.loads(unicode(string, 'utf-8'))
cbi = cbi_container[ 'ComicBookInfo/1.0' ] metadata = GenericMetadata()
#helper func cbi = cbi_container['ComicBookInfo/1.0']
# If item is not in CBI, return None
def xlate( cbi_entry):
if cbi_entry in cbi:
return cbi[cbi_entry]
else:
return None
metadata.series = xlate( 'series' )
metadata.title = xlate( 'title' )
metadata.issue = xlate( 'issue' )
metadata.publisher = xlate( 'publisher' )
metadata.month = xlate( 'publicationMonth' )
metadata.year = xlate( 'publicationYear' )
metadata.issueCount = xlate( 'numberOfIssues' )
metadata.comments = xlate( 'comments' )
metadata.credits = xlate( 'credits' )
metadata.genre = xlate( 'genre' )
metadata.volume = xlate( 'volume' )
metadata.volumeCount = xlate( 'numberOfVolumes' )
metadata.language = xlate( 'language' )
metadata.country = xlate( 'country' )
metadata.criticalRating = xlate( 'rating' )
metadata.tags = xlate( 'tags' )
# make sure credits and tags are at least empty lists and not None
if metadata.credits is None:
metadata.credits = []
if metadata.tags is None:
metadata.tags = []
#need to massage the language string to be ISO
if metadata.language is not None:
# reverse look-up
pattern = metadata.language
metadata.language = None
for key in utils.getLanguageDict():
if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'):
metadata.language = key
break
metadata.isEmpty = False
return metadata
def stringFromMetadata( self, metadata ): # helper func
# If item is not in CBI, return None
def xlate(cbi_entry):
if cbi_entry in cbi:
return cbi[cbi_entry]
else:
return None
cbi_container = self.createJSONDictionary( metadata ) metadata.series = xlate('series')
return json.dumps( cbi_container ) metadata.title = xlate('title')
metadata.issue = xlate('issue')
#verify that the string actually contains CBI data in JSON format metadata.publisher = xlate('publisher')
def validateString( self, string ): metadata.month = xlate('publicationMonth')
metadata.year = xlate('publicationYear')
try: metadata.issueCount = xlate('numberOfIssues')
cbi_container = json.loads( string ) metadata.comments = xlate('comments')
except: metadata.credits = xlate('credits')
return False metadata.genre = xlate('genre')
metadata.volume = xlate('volume')
return ( 'ComicBookInfo/1.0' in cbi_container ) metadata.volumeCount = xlate('numberOfVolumes')
metadata.language = xlate('language')
metadata.country = xlate('country')
metadata.criticalRating = xlate('rating')
metadata.tags = xlate('tags')
# make sure credits and tags are at least empty lists and not None
if metadata.credits is None:
metadata.credits = []
if metadata.tags is None:
metadata.tags = []
def createJSONDictionary( self, metadata ): # need to massage the language string to be ISO
if metadata.language is not None:
# Create the dictionary that we will convert to JSON text # reverse look-up
cbi = dict() pattern = metadata.language
cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version, metadata.language = None
'lastModified' : str(datetime.now()), for key in utils.getLanguageDict():
'ComicBookInfo/1.0' : cbi } if utils.getLanguageDict()[key] == pattern.encode('utf-8'):
metadata.language = key
#helper func break
def assign( cbi_entry, md_entry):
if md_entry is not None:
cbi[cbi_entry] = md_entry
#helper func
def toInt(s):
i = None
if type(s) in [ str, unicode, int ]:
try:
i = int(s)
except ValueError:
pass
return i
assign( 'series', metadata.series )
assign( 'title', metadata.title )
assign( 'issue', metadata.issue )
assign( 'publisher', metadata.publisher )
assign( 'publicationMonth', toInt(metadata.month) )
assign( 'publicationYear', toInt(metadata.year) )
assign( 'numberOfIssues', toInt(metadata.issueCount) )
assign( 'comments', metadata.comments )
assign( 'genre', metadata.genre )
assign( 'volume', toInt(metadata.volume) )
assign( 'numberOfVolumes', toInt(metadata.volumeCount) )
assign( 'language', utils.getLanguageFromISO(metadata.language) )
assign( 'country', metadata.country )
assign( 'rating', metadata.criticalRating )
assign( 'credits', metadata.credits )
assign( 'tags', metadata.tags )
return cbi_container
def writeToExternalFile( self, filename, metadata ): metadata.isEmpty = False
cbi_container = self.createJSONDictionary(metadata) return metadata
f = open(filename, 'w') def stringFromMetadata(self, metadata):
f.write(json.dumps(cbi_container, indent=4))
f.close
cbi_container = self.createJSONDictionary(metadata)
return json.dumps(cbi_container)
def validateString(self, string):
"""Verify that the string actually contains CBI data in JSON format"""
try:
cbi_container = json.loads(string)
except:
return False
return ('ComicBookInfo/1.0' in cbi_container)
def createJSONDictionary(self, metadata):
"""Create the dictionary that we will convert to JSON text"""
cbi = dict()
cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version,
'lastModified': str(datetime.now()),
'ComicBookInfo/1.0': cbi}
# helper func
def assign(cbi_entry, md_entry):
if md_entry is not None:
cbi[cbi_entry] = md_entry
# helper func
def toInt(s):
i = None
if type(s) in [str, unicode, int]:
try:
i = int(s)
except ValueError:
pass
return i
assign('series', metadata.series)
assign('title', metadata.title)
assign('issue', metadata.issue)
assign('publisher', metadata.publisher)
assign('publicationMonth', toInt(metadata.month))
assign('publicationYear', toInt(metadata.year))
assign('numberOfIssues', toInt(metadata.issueCount))
assign('comments', metadata.comments)
assign('genre', metadata.genre)
assign('volume', toInt(metadata.volume))
assign('numberOfVolumes', toInt(metadata.volumeCount))
assign('language', utils.getLanguageFromISO(metadata.language))
assign('country', metadata.country)
assign('rating', metadata.criticalRating)
assign('credits', metadata.credits)
assign('tags', metadata.tags)
return cbi_container
def writeToExternalFile(self, filename, metadata):
cbi_container = self.createJSONDictionary(metadata)
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close

View File

@ -1,293 +1,290 @@
""" """A class to encapsulate ComicRack's ComicInfo.xml data"""
A python class to encapsulate ComicRack's ComicInfo.xml data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
class ComicInfoXml: class ComicInfoXml:
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ]
inker_synonyms = [ 'inker', 'artist', 'finishes' ]
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ]
letterer_synonyms = [ 'letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ]
editor_synonyms = [ 'editor']
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = ['letterer']
cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = ['editor']
def getParseableCredits( self ): def getParseableCredits(self):
parsable_credits = [] parsable_credits = []
parsable_credits.extend( self.writer_synonyms ) parsable_credits.extend(self.writer_synonyms)
parsable_credits.extend( self.penciller_synonyms ) parsable_credits.extend(self.penciller_synonyms)
parsable_credits.extend( self.inker_synonyms ) parsable_credits.extend(self.inker_synonyms)
parsable_credits.extend( self.colorist_synonyms ) parsable_credits.extend(self.colorist_synonyms)
parsable_credits.extend( self.letterer_synonyms ) parsable_credits.extend(self.letterer_synonyms)
parsable_credits.extend( self.cover_synonyms ) parsable_credits.extend(self.cover_synonyms)
parsable_credits.extend( self.editor_synonyms ) parsable_credits.extend(self.editor_synonyms)
return parsable_credits return parsable_credits
def metadataFromString( self, string ):
tree = ET.ElementTree(ET.fromstring( string )) def metadataFromString(self, string):
return self.convertXMLToMetadata( tree )
def stringFromMetadata( self, metadata ): tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata(tree)
header = '<?xml version="1.0"?>\n' def stringFromMetadata(self, metadata):
tree = self.convertMetadataToXML( self, metadata )
return header + ET.tostring(tree.getroot())
def indent( self, elem, level=0 ): header = '<?xml version="1.0"?>\n'
# for making the XML output readable
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent( elem, level+1 )
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def convertMetadataToXML( self, filename, metadata ):
#shorthand for the metadata tree = self.convertMetadataToXML(self, metadata)
md = metadata return header + ET.tostring(tree.getroot())
# build a tree structure def indent(self, elem, level=0):
root = ET.Element("ComicInfo") # for making the XML output readable
root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance" i = "\n" + level * " "
root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema" if len(elem):
#helper func if not elem.text or not elem.text.strip():
def assign( cix_entry, md_entry): elem.text = i + " "
if md_entry is not None: if not elem.tail or not elem.tail.strip():
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) elem.tail = i
for elem in elem:
self.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
assign( 'Title', md.title ) def convertMetadataToXML(self, filename, metadata):
assign( 'Series', md.series )
assign( 'Number', md.issue )
assign( 'Count', md.issueCount )
assign( 'Volume', md.volume )
assign( 'AlternateSeries', md.alternateSeries )
assign( 'AlternateNumber', md.alternateNumber )
assign( 'StoryArc', md.storyArc )
assign( 'SeriesGroup', md.seriesGroup )
assign( 'AlternateCount', md.alternateCount )
assign( 'Summary', md.comments )
assign( 'Notes', md.notes )
assign( 'Year', md.year )
assign( 'Month', md.month )
assign( 'Day', md.day )
# need to specially process the credits, since they are structured differently than CIX # shorthand for the metadata
credit_writer_list = list() md = metadata
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# first, loop thru credits, and build a list for each role that CIX supports
for credit in metadata.credits:
if credit['role'].lower() in set( self.writer_synonyms ): # build a tree structure
credit_writer_list.append(credit['person'].replace(",","")) root = ET.Element("ComicInfo")
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema"
# helper func
if credit['role'].lower() in set( self.penciller_synonyms ): def assign(cix_entry, md_entry):
credit_penciller_list.append(credit['person'].replace(",","")) if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
if credit['role'].lower() in set( self.inker_synonyms ):
credit_inker_list.append(credit['person'].replace(",",""))
if credit['role'].lower() in set( self.colorist_synonyms ):
credit_colorist_list.append(credit['person'].replace(",",""))
if credit['role'].lower() in set( self.letterer_synonyms ): assign('Title', md.title)
credit_letterer_list.append(credit['person'].replace(",","")) assign('Series', md.series)
assign('Number', md.issue)
assign('Count', md.issueCount)
assign('Volume', md.volume)
assign('AlternateSeries', md.alternateSeries)
assign('AlternateNumber', md.alternateNumber)
assign('StoryArc', md.storyArc)
assign('SeriesGroup', md.seriesGroup)
assign('AlternateCount', md.alternateCount)
assign('Summary', md.comments)
assign('Notes', md.notes)
assign('Year', md.year)
assign('Month', md.month)
assign('Day', md.day)
if credit['role'].lower() in set( self.cover_synonyms ): # need to specially process the credits, since they are structured
credit_cover_list.append(credit['person'].replace(",","")) # differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
if credit['role'].lower() in set( self.editor_synonyms ): # first, loop thru credits, and build a list for each role that CIX
credit_editor_list.append(credit['person'].replace(",","")) # supports
for credit in metadata.credits:
# second, convert each list to string, and add to XML struct
if len( credit_writer_list ) > 0:
node = ET.SubElement(root, 'Writer')
node.text = utils.listToString( credit_writer_list )
if len( credit_penciller_list ) > 0: if credit['role'].lower() in set(self.writer_synonyms):
node = ET.SubElement(root, 'Penciller') credit_writer_list.append(credit['person'].replace(",", ""))
node.text = utils.listToString( credit_penciller_list )
if len( credit_inker_list ) > 0: if credit['role'].lower() in set(self.penciller_synonyms):
node = ET.SubElement(root, 'Inker') credit_penciller_list.append(credit['person'].replace(",", ""))
node.text = utils.listToString( credit_inker_list )
if len( credit_colorist_list ) > 0: if credit['role'].lower() in set(self.inker_synonyms):
node = ET.SubElement(root, 'Colorist') credit_inker_list.append(credit['person'].replace(",", ""))
node.text = utils.listToString( credit_colorist_list )
if len( credit_letterer_list ) > 0: if credit['role'].lower() in set(self.colorist_synonyms):
node = ET.SubElement(root, 'Letterer') credit_colorist_list.append(credit['person'].replace(",", ""))
node.text = utils.listToString( credit_letterer_list )
if len( credit_cover_list ) > 0: if credit['role'].lower() in set(self.letterer_synonyms):
node = ET.SubElement(root, 'CoverArtist') credit_letterer_list.append(credit['person'].replace(",", ""))
node.text = utils.listToString( credit_cover_list )
if len( credit_editor_list ) > 0:
node = ET.SubElement(root, 'Editor')
node.text = utils.listToString( credit_editor_list )
assign( 'Publisher', md.publisher ) if credit['role'].lower() in set(self.cover_synonyms):
assign( 'Imprint', md.imprint ) credit_cover_list.append(credit['person'].replace(",", ""))
assign( 'Genre', md.genre )
assign( 'Web', md.webLink )
assign( 'PageCount', md.pageCount )
assign( 'LanguageISO', md.language )
assign( 'Format', md.format )
assign( 'AgeRating', md.maturityRating )
if md.blackAndWhite is not None and md.blackAndWhite:
ET.SubElement(root, 'BlackAndWhite').text = "Yes"
assign( 'Manga', md.manga )
assign( 'Characters', md.characters )
assign( 'Teams', md.teams )
assign( 'Locations', md.locations )
assign( 'ScanInformation', md.scanInfo )
# loop and add the page entries under pages node if credit['role'].lower() in set(self.editor_synonyms):
if len( md.pages ) > 0: credit_editor_list.append(credit['person'].replace(",", ""))
pages_node = ET.SubElement(root, 'Pages')
for page_dict in md.pages:
page_node = ET.SubElement(pages_node, 'Page')
page_node.attrib = page_dict
# self pretty-print # second, convert each list to string, and add to XML struct
self.indent(root) if len(credit_writer_list) > 0:
node = ET.SubElement(root, 'Writer')
node.text = utils.listToString(credit_writer_list)
# wrap it in an ElementTree instance, and save as XML if len(credit_penciller_list) > 0:
tree = ET.ElementTree(root) node = ET.SubElement(root, 'Penciller')
return tree node.text = utils.listToString(credit_penciller_list)
def convertXMLToMetadata( self, tree ): if len(credit_inker_list) > 0:
node = ET.SubElement(root, 'Inker')
root = tree.getroot() node.text = utils.listToString(credit_inker_list)
if root.tag != 'ComicInfo': if len(credit_colorist_list) > 0:
raise 1 node = ET.SubElement(root, 'Colorist')
return None node.text = utils.listToString(credit_colorist_list)
metadata = GenericMetadata() if len(credit_letterer_list) > 0:
md = metadata node = ET.SubElement(root, 'Letterer')
node.text = utils.listToString(credit_letterer_list)
# Helper function
def xlate( tag ):
node = root.find( tag )
if node is not None:
return node.text
else:
return None
md.series = xlate( 'Series' )
md.title = xlate( 'Title' )
md.issue = xlate( 'Number' )
md.issueCount = xlate( 'Count' )
md.volume = xlate( 'Volume' )
md.alternateSeries = xlate( 'AlternateSeries' )
md.alternateNumber = xlate( 'AlternateNumber' )
md.alternateCount = xlate( 'AlternateCount' )
md.comments = xlate( 'Summary' )
md.notes = xlate( 'Notes' )
md.year = xlate( 'Year' )
md.month = xlate( 'Month' )
md.day = xlate( 'Day' )
md.publisher = xlate( 'Publisher' )
md.imprint = xlate( 'Imprint' )
md.genre = xlate( 'Genre' )
md.webLink = xlate( 'Web' )
md.language = xlate( 'LanguageISO' )
md.format = xlate( 'Format' )
md.manga = xlate( 'Manga' )
md.characters = xlate( 'Characters' )
md.teams = xlate( 'Teams' )
md.locations = xlate( 'Locations' )
md.pageCount = xlate( 'PageCount' )
md.scanInfo = xlate( 'ScanInformation' )
md.storyArc = xlate( 'StoryArc' )
md.seriesGroup = xlate( 'SeriesGroup' )
md.maturityRating = xlate( 'AgeRating' )
tmp = xlate( 'BlackAndWhite' ) if len(credit_cover_list) > 0:
md.blackAndWhite = False node = ET.SubElement(root, 'CoverArtist')
if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: node.text = utils.listToString(credit_cover_list)
md.blackAndWhite = True
# Now extract the credit info
for n in root:
if ( n.tag == 'Writer' or
n.tag == 'Penciller' or
n.tag == 'Inker' or
n.tag == 'Colorist' or
n.tag == 'Letterer' or
n.tag == 'Editor'
):
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit( name.strip(), n.tag )
if n.tag == 'CoverArtist': if len(credit_editor_list) > 0:
if n.text is not None: node = ET.SubElement(root, 'Editor')
for name in n.text.split(','): node.text = utils.listToString(credit_editor_list)
metadata.addCredit( name.strip(), "Cover" )
# parse page data now assign('Publisher', md.publisher)
pages_node = root.find( "Pages" ) assign('Imprint', md.imprint)
if pages_node is not None: assign('Genre', md.genre)
for page in pages_node: assign('Web', md.webLink)
metadata.pages.append( page.attrib ) assign('PageCount', md.pageCount)
#print page.attrib assign('LanguageISO', md.language)
assign('Format', md.format)
assign('AgeRating', md.maturityRating)
if md.blackAndWhite is not None and md.blackAndWhite:
ET.SubElement(root, 'BlackAndWhite').text = "Yes"
assign('Manga', md.manga)
assign('Characters', md.characters)
assign('Teams', md.teams)
assign('Locations', md.locations)
assign('ScanInformation', md.scanInfo)
metadata.isEmpty = False # loop and add the page entries under pages node
if len(md.pages) > 0:
return metadata pages_node = ET.SubElement(root, 'Pages')
for page_dict in md.pages:
page_node = ET.SubElement(pages_node, 'Page')
page_node.attrib = page_dict
def writeToExternalFile( self, filename, metadata ): # self pretty-print
self.indent(root)
tree = self.convertMetadataToXML( self, metadata )
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ):
tree = ET.parse( filename ) # wrap it in an ElementTree instance, and save as XML
return self.convertXMLToMetadata( tree ) tree = ET.ElementTree(root)
return tree
def convertXMLToMetadata(self, tree):
root = tree.getroot()
if root.tag != 'ComicInfo':
raise 1
return None
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
if node is not None:
return node.text
else:
return None
md.series = xlate('Series')
md.title = xlate('Title')
md.issue = xlate('Number')
md.issueCount = xlate('Count')
md.volume = xlate('Volume')
md.alternateSeries = xlate('AlternateSeries')
md.alternateNumber = xlate('AlternateNumber')
md.alternateCount = xlate('AlternateCount')
md.comments = xlate('Summary')
md.notes = xlate('Notes')
md.year = xlate('Year')
md.month = xlate('Month')
md.day = xlate('Day')
md.publisher = xlate('Publisher')
md.imprint = xlate('Imprint')
md.genre = xlate('Genre')
md.webLink = xlate('Web')
md.language = xlate('LanguageISO')
md.format = xlate('Format')
md.manga = xlate('Manga')
md.characters = xlate('Characters')
md.teams = xlate('Teams')
md.locations = xlate('Locations')
md.pageCount = xlate('PageCount')
md.scanInfo = xlate('ScanInformation')
md.storyArc = xlate('StoryArc')
md.seriesGroup = xlate('SeriesGroup')
md.maturityRating = xlate('AgeRating')
tmp = xlate('BlackAndWhite')
md.blackAndWhite = False
if tmp is not None and tmp.lower() in ["yes", "true", "1"]:
md.blackAndWhite = True
# Now extract the credit info
for n in root:
if (n.tag == 'Writer' or
n.tag == 'Penciller' or
n.tag == 'Inker' or
n.tag == 'Colorist' or
n.tag == 'Letterer' or
n.tag == 'Editor'
):
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), n.tag)
if n.tag == 'CoverArtist':
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), "Cover")
# parse page data now
pages_node = root.find("Pages")
if pages_node is not None:
for page in pages_node:
metadata.pages.append(page.attrib)
# print page.attrib
metadata.isEmpty = False
return metadata
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
# ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

View File

@ -1,26 +1,21 @@
""" """Functions for parsing comic info from filename
Functions for parsing comic info from filename
This should probably be re-written, but, well, it mostly works! This should probably be re-written, but, well, it mostly works!
""" """
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Some portions of this code were modified from pyComicMetaThis project # Some portions of this code were modified from pyComicMetaThis project
# http://code.google.com/p/pycomicmetathis/ # http://code.google.com/p/pycomicmetathis/
@ -29,27 +24,27 @@ import re
import os import os
from urllib import unquote from urllib import unquote
class FileNameParser: class FileNameParser:
def repl(self, m): def repl(self, m):
return ' ' * len(m.group()) return ' ' * len(m.group())
def fixSpaces( self, string, remove_dashes=True ): def fixSpaces(self, string, remove_dashes=True):
if remove_dashes: if remove_dashes:
placeholders = ['[-_]',' +'] placeholders = ['[-_]', ' +']
else: else:
placeholders = ['[_]',' +'] placeholders = ['[_]', ' +']
for ph in placeholders: for ph in placeholders:
string = re.sub(ph, self.repl, string ) string = re.sub(ph, self.repl, string)
return string #.strip() return string # .strip()
def getIssueCount(self, filename, issue_end):
def getIssueCount( self,filename, issue_end ):
count = "" count = ""
filename = filename[issue_end:] filename = filename[issue_end:]
# replace any name seperators with spaces # replace any name separators with spaces
tmpstr = self.fixSpaces(filename) tmpstr = self.fixSpaces(filename)
found = False found = False
@ -59,48 +54,51 @@ class FileNameParser:
found = True found = True
if not found: if not found:
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
if match: if match:
count = match.group() count = match.group()
found = True found = True
count = count.lstrip("0") count = count.lstrip("0")
return count return count
def getIssueNumber( self, filename ): def getIssueNumber(self, filename):
"""Returns a tuple of issue number string, and start and end indexes in the filename
# Returns a tuple of issue number string, and start and end indexs in the filename (The indexes will be used to split the string up for further parsing)
# (The indexes will be used to split the string up for further parsing) """
found = False found = False
issue = '' issue = ''
start = 0 start = 0
end = 0 end = 0
# first, look for multiple "--", this means it's formatted differently from most: # first, look for multiple "--", this means it's formatted differently
# from most:
if "--" in filename: if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue # the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename) filename = re.sub("--.*", self.repl, filename)
elif "__" in filename: elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue # the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename) filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ") filename = filename.replace("+", " ")
# replace parenthetical phrases with spaces # replace parenthetical phrases with spaces
filename = re.sub( "\(.*?\)", self.repl, filename) filename = re.sub("\(.*?\)", self.repl, filename)
filename = re.sub( "\[.*?\]", self.repl, filename) filename = re.sub("\[.*?\]", self.repl, filename)
# replace any name seperators with spaces # replace any name separators with spaces
filename = self.fixSpaces(filename) filename = self.fixSpaces(filename)
# remove any "of NN" phrase with spaces (problem: this could break on some titles) # remove any "of NN" phrase with spaces (problem: this could break on
filename = re.sub( "of [\d]+", self.repl, filename) # some titles)
filename = re.sub("of [\d]+", self.repl, filename)
#print u"[{0}]".format(filename) # print u"[{0}]".format(filename)
# we should now have a cleaned up filename version with all the words in # we should now have a cleaned up filename version with all the words in
# the same positions as original filename # the same positions as original filename
@ -108,27 +106,28 @@ class FileNameParser:
# make a list of each word and its position # make a list of each word and its position
word_list = list() word_list = list()
for m in re.finditer("\S+", filename): for m in re.finditer("\S+", filename):
word_list.append( (m.group(0), m.start(), m.end()) ) word_list.append((m.group(0), m.start(), m.end()))
# remove the first word, since it can't be the issue number # remove the first word, since it can't be the issue number
if len(word_list) > 1: if len(word_list) > 1:
word_list = word_list[1:] word_list = word_list[1:]
else: else:
#only one word?? just bail. # only one word?? just bail.
return issue, start, end return issue, start, end
# Now try to search for the likely issue number word in the list # Now try to search for the likely issue number word in the list
# first look for a word with "#" followed by digits with optional sufix # first look for a word with "#" followed by digits with optional suffix
# this is almost certainly the issue number # this is almost certainly the issue number
for w in reversed(word_list): for w in reversed(word_list):
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True found = True
break break
# same as above but w/o a '#', and only look at the last word in the list # same as above but w/o a '#', and only look at the last word in the
# list
if not found: if not found:
w = word_list[-1] w = word_list[-1]
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True found = True
@ -148,20 +147,21 @@ class FileNameParser:
return issue, start, end return issue, start, end
def getSeriesName(self, filename, issue_start ): def getSeriesName(self, filename, issue_start):
"""Use the issue number string index to split the filename string"""
# use the issue number string index to split the filename string
if issue_start != 0: if issue_start != 0:
filename = filename[:issue_start] filename = filename[:issue_start]
# in case there is no issue number, remove some obvious stuff # in case there is no issue number, remove some obvious stuff
if "--" in filename: if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue # the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename) filename = re.sub("--.*", self.repl, filename)
elif "__" in filename: elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue # the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename) filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ") filename = filename.replace("+", " ")
@ -170,14 +170,14 @@ class FileNameParser:
series = tmpstr series = tmpstr
volume = "" volume = ""
#save the last word # save the last word
try: try:
last_word = series.split()[-1] last_word = series.split()[-1]
except: except:
last_word = "" last_word = ""
# remove any parenthetical phrases # remove any parenthetical phrases
series = re.sub( "\(.*?\)", "", series) series = re.sub("\(.*?\)", "", series)
# search for volume number # search for volume number
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
@ -188,7 +188,7 @@ class FileNameParser:
# if a volume wasn't found, see if the last word is a year in parentheses # if a volume wasn't found, see if the last word is a year in parentheses
# since that's a common way to designate the volume # since that's a common way to designate the volume
if volume == "": if volume == "":
#match either (YEAR), (YEAR-), or (YEAR-YEAR2) # match either (YEAR), (YEAR-), or (YEAR-YEAR2)
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
if match: if match:
volume = match.group(2) volume = match.group(2)
@ -199,7 +199,7 @@ class FileNameParser:
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
# be removed to help search online # be removed to help search online
if issue_start == 0: if issue_start == 0:
one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ] one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"]
try: try:
last_word = series.split()[-1] last_word = series.split()[-1]
if last_word.lower() in one_shot_words: if last_word.lower() in one_shot_words:
@ -209,7 +209,7 @@ class FileNameParser:
return series, volume.strip() return series, volume.strip()
def getYear( self,filename, issue_end): def getYear(self, filename, issue_end):
filename = filename[issue_end:] filename = filename[issue_end:]
@ -218,36 +218,38 @@ class FileNameParser:
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
if match: if match:
year = match.group() year = match.group()
# remove non-numerics # remove non-digits
year = re.sub("[^0-9]", "", year) year = re.sub("[^0-9]", "", year)
return year return year
def getRemainder( self, filename, year, count, volume, issue_end ): def getRemainder(self, filename, year, count, volume, issue_end):
"""Make a guess at where the the non-interesting stuff begins"""
#make a guess at where the the non-interesting stuff begins
remainder = "" remainder = ""
if "--" in filename: if "--" in filename:
remainder = filename.split("--",1)[1] remainder = filename.split("--", 1)[1]
elif "__" in filename: elif "__" in filename:
remainder = filename.split("__",1)[1] remainder = filename.split("__", 1)[1]
elif issue_end != 0: elif issue_end != 0:
remainder = filename[issue_end:] remainder = filename[issue_end:]
remainder = self.fixSpaces(remainder, remove_dashes=False) remainder = self.fixSpaces(remainder, remove_dashes=False)
if volume != "": if volume != "":
remainder = remainder.replace("Vol."+volume,"",1) remainder = remainder.replace("Vol." + volume, "", 1)
if year != "": if year != "":
remainder = remainder.replace(year,"",1) remainder = remainder.replace(year, "", 1)
if count != "": if count != "":
remainder = remainder.replace("of "+count,"",1) remainder = remainder.replace("of " + count, "", 1)
remainder = remainder.replace("()","") remainder = remainder.replace("()", "")
remainder = remainder.replace(" "," ") # cleans some whitespace mess remainder = remainder.replace(
" ",
" ") # cleans some whitespace mess
return remainder.strip() return remainder.strip()
def parseFilename( self, filename ): def parseFilename(self, filename):
# remove the path # remove the path
filename = os.path.basename(filename) filename = os.path.basename(filename)
@ -255,10 +257,10 @@ class FileNameParser:
# remove the extension # remove the extension
filename = os.path.splitext(filename)[0] filename = os.path.splitext(filename)[0]
#url decode, just in case # url decode, just in case
filename = unquote(filename) filename = unquote(filename)
# sometimes archives get messed up names from too many decodings # sometimes archives get messed up names from too many decodes
# often url encodings will break and leave "_28" and "_29" in place # often url encodings will break and leave "_28" and "_29" in place
# of "(" and ")" see if there are a number of these, and replace them # of "(" and ")" see if there are a number of these, and replace them
if filename.count("_28") > 1 and filename.count("_29") > 1: if filename.count("_28") > 1 and filename.count("_29") > 1:
@ -268,14 +270,18 @@ class FileNameParser:
self.issue, issue_start, issue_end = self.getIssueNumber(filename) self.issue, issue_start, issue_end = self.getIssueNumber(filename)
self.series, self.volume = self.getSeriesName(filename, issue_start) self.series, self.volume = self.getSeriesName(filename, issue_start)
# provides proper value when the filename doesn't have a issue number # provides proper value when the filename doesn't have a issue number
if issue_end == 0: if issue_end == 0:
issue_end=len(self.series) issue_end = len(self.series)
self.year = self.getYear(filename, issue_end) self.year = self.getYear(filename, issue_end)
self.issue_count = self.getIssueCount(filename, issue_end) self.issue_count = self.getIssueCount(filename, issue_end)
self.remainder = self.getRemainder( filename, self.year, self.issue_count, self.volume, issue_end ) self.remainder = self.getRemainder(
filename,
self.year,
self.issue_count,
self.volume,
issue_end)
if self.issue != "": if self.issue != "":
# strip off leading zeros # strip off leading zeros

View File

@ -1,43 +1,46 @@
""" """A class for internal metadata storage
A python class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various
The goal of this class is to handle ALL the data that might come from various tagging schemes and databases, such as ComicVine or GCD. This makes conversion
tagging schemes and databases, such as ComicVine or GCD. This makes conversion possible, however lossy it might be
possible, however lossy it might be
"""
""" """
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Copyright 2012-2014 Anthony Beville
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software # http://www.apache.org/licenses/LICENSE-2.0
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # Unless required by applicable law or agreed to in writing, software
See the License for the specific language governing permissions and # distributed under the License is distributed on an "AS IS" BASIS,
limitations under the License. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
""" # See the License for the specific language governing permissions and
# limitations under the License.
import utils import utils
# These page info classes are exactly the same as the CIX scheme, since it's unique
class PageType: class PageType:
FrontCover = "FrontCover"
InnerCover = "InnerCover" """
Roundup = "Roundup" These page info classes are exactly the same as the CIX scheme, since
Story = "Story" it's unique
Advertisement = "Advertisement" """
Editorial = "Editorial"
Letters = "Letters" FrontCover = "FrontCover"
Preview = "Preview" InnerCover = "InnerCover"
BackCover = "BackCover" Roundup = "Roundup"
Other = "Other" Story = "Story"
Deleted = "Deleted" Advertisement = "Advertisement"
Editorial = "Editorial"
Letters = "Letters"
Preview = "Preview"
BackCover = "BackCover"
Other = "Other"
Deleted = "Deleted"
""" """
class PageInfo: class PageInfo:
@ -48,269 +51,271 @@ class PageInfo:
Key = "" Key = ""
ImageWidth = 0 ImageWidth = 0
ImageHeight = 0 ImageHeight = 0
""" """
class GenericMetadata: class GenericMetadata:
def __init__(self): def __init__(self):
self.isEmpty = True
self.tagOrigin = None
self.series = None
self.issue = None
self.title = None
self.publisher = None
self.month = None
self.year = None
self.day = None
self.issueCount = None
self.volume = None
self.genre = None
self.language = None # 2 letter iso code
self.comments = None # use same way as Summary in CIX
self.volumeCount = None self.isEmpty = True
self.criticalRating = None self.tagOrigin = None
self.country = None
self.alternateSeries = None
self.alternateNumber = None
self.alternateCount = None
self.imprint = None
self.notes = None
self.webLink = None
self.format = None
self.manga = None
self.blackAndWhite = None
self.pageCount = None
self.maturityRating = None
self.storyArc = None
self.seriesGroup = None
self.scanInfo = None
self.characters = None
self.teams = None
self.locations = None
self.credits = list() self.series = None
self.tags = list() self.issue = None
self.pages = list() self.title = None
self.publisher = None
self.month = None
self.year = None
self.day = None
self.issueCount = None
self.volume = None
self.genre = None
self.language = None # 2 letter iso code
self.comments = None # use same way as Summary in CIX
# Some CoMet-only items self.volumeCount = None
self.price = None self.criticalRating = None
self.isVersionOf = None self.country = None
self.rights = None
self.identifier = None
self.lastMark = None
self.coverImage = None
def overlay( self, new_md ): self.alternateSeries = None
# Overlay a metadata object on this one self.alternateNumber = None
# that is, when the new object has non-None self.alternateCount = None
# values, over-write them to this one self.imprint = None
self.notes = None
def assign( cur, new ): self.webLink = None
if new is not None: self.format = None
if type(new) == str and len(new) == 0: self.manga = None
setattr(self, cur, None) self.blackAndWhite = None
else: self.pageCount = None
setattr(self, cur, new) self.maturityRating = None
if not new_md.isEmpty:
self.isEmpty = False
assign( 'series', new_md.series )
assign( "issue", new_md.issue )
assign( "issueCount", new_md.issueCount )
assign( "title", new_md.title )
assign( "publisher", new_md.publisher )
assign( "day", new_md.day )
assign( "month", new_md.month )
assign( "year", new_md.year )
assign( "volume", new_md.volume )
assign( "volumeCount", new_md.volumeCount )
assign( "genre", new_md.genre )
assign( "language", new_md.language )
assign( "country", new_md.country )
assign( "criticalRating", new_md.criticalRating )
assign( "alternateSeries", new_md.alternateSeries )
assign( "alternateNumber", new_md.alternateNumber )
assign( "alternateCount", new_md.alternateCount )
assign( "imprint", new_md.imprint )
assign( "webLink", new_md.webLink )
assign( "format", new_md.format )
assign( "manga", new_md.manga )
assign( "blackAndWhite", new_md.blackAndWhite )
assign( "maturityRating", new_md.maturityRating )
assign( "storyArc", new_md.storyArc )
assign( "seriesGroup", new_md.seriesGroup )
assign( "scanInfo", new_md.scanInfo )
assign( "characters", new_md.characters )
assign( "teams", new_md.teams )
assign( "locations", new_md.locations )
assign( "comments", new_md.comments )
assign( "notes", new_md.notes )
assign( "price", new_md.price ) self.storyArc = None
assign( "isVersionOf", new_md.isVersionOf ) self.seriesGroup = None
assign( "rights", new_md.rights ) self.scanInfo = None
assign( "identifier", new_md.identifier )
assign( "lastMark", new_md.lastMark )
self.overlayCredits( new_md.credits )
# TODO
# not sure if the tags and pages should broken down, or treated
# as whole lists....
# For now, go the easy route, where any overlay self.characters = None
# value wipes out the whole list self.teams = None
if len(new_md.tags) > 0: self.locations = None
assign( "tags", new_md.tags )
if len(new_md.pages) > 0:
assign( "pages", new_md.pages )
self.credits = list()
def overlayCredits( self, new_credits ): self.tags = list()
for c in new_credits: self.pages = list()
if c.has_key('primary') and c['primary']:
primary = True
else:
primary = False
# Remove credit role if person is blank # Some CoMet-only items
if c['person'] == "": self.price = None
for r in reversed(self.credits): self.isVersionOf = None
if r['role'].lower() == c['role'].lower(): self.rights = None
self.credits.remove(r) self.identifier = None
# otherwise, add it! self.lastMark = None
else: self.coverImage = None
self.addCredit( c['person'], c['role'], primary )
def setDefaultPageList( self, count ):
# generate a default page list, with the first page marked as the cover
for i in range(count):
page_dict = dict()
page_dict['Image'] = str(i)
if i == 0:
page_dict['Type'] = PageType.FrontCover
self.pages.append( page_dict )
def getArchivePageIndex( self, pagenum ): def overlay(self, new_md):
# convert the displayed page number to the page index of the file in the archive """Overlay a metadata object on this one
if pagenum < len( self.pages ):
return int( self.pages[pagenum]['Image'] )
else:
return 0
def getCoverPageIndexList( self ):
# return a list of archive page indices of cover pages
coverlist = []
for p in self.pages:
if 'Type' in p and p['Type'] == PageType.FrontCover:
coverlist.append( int(p['Image']))
if len(coverlist) == 0:
coverlist.append( 0 )
return coverlist
def addCredit( self, person, role, primary = False ):
credit = dict()
credit['person'] = person
credit['role'] = role
if primary:
credit['primary'] = primary
# look to see if it's not already there... That is, when the new object has non-None values, over-write them
found = False to this one.
for c in self.credits: """
if ( c['person'].lower() == person.lower() and
c['role'].lower() == role.lower() ):
# no need to add it. just adjust the "primary" flag as needed
c['primary'] = primary
found = True
break
if not found:
self.credits.append(credit)
def assign(cur, new):
def __str__( self ): if new is not None:
vals = [] if isinstance(new, str) and len(new) == 0:
if self.isEmpty: setattr(self, cur, None)
return "No metadata" else:
setattr(self, cur, new)
def add_string( tag, val ): if not new_md.isEmpty:
if val is not None and u"{0}".format(val) != "": self.isEmpty = False
vals.append( (tag, val) )
def add_attr_string( tag ): assign('series', new_md.series)
val = getattr(self,tag) assign("issue", new_md.issue)
add_string( tag, getattr(self,tag) ) assign("issueCount", new_md.issueCount)
assign("title", new_md.title)
assign("publisher", new_md.publisher)
assign("day", new_md.day)
assign("month", new_md.month)
assign("year", new_md.year)
assign("volume", new_md.volume)
assign("volumeCount", new_md.volumeCount)
assign("genre", new_md.genre)
assign("language", new_md.language)
assign("country", new_md.country)
assign("criticalRating", new_md.criticalRating)
assign("alternateSeries", new_md.alternateSeries)
assign("alternateNumber", new_md.alternateNumber)
assign("alternateCount", new_md.alternateCount)
assign("imprint", new_md.imprint)
assign("webLink", new_md.webLink)
assign("format", new_md.format)
assign("manga", new_md.manga)
assign("blackAndWhite", new_md.blackAndWhite)
assign("maturityRating", new_md.maturityRating)
assign("storyArc", new_md.storyArc)
assign("seriesGroup", new_md.seriesGroup)
assign("scanInfo", new_md.scanInfo)
assign("characters", new_md.characters)
assign("teams", new_md.teams)
assign("locations", new_md.locations)
assign("comments", new_md.comments)
assign("notes", new_md.notes)
add_attr_string( "series" ) assign("price", new_md.price)
add_attr_string( "issue" ) assign("isVersionOf", new_md.isVersionOf)
add_attr_string( "issueCount" ) assign("rights", new_md.rights)
add_attr_string( "title" ) assign("identifier", new_md.identifier)
add_attr_string( "publisher" ) assign("lastMark", new_md.lastMark)
add_attr_string( "year" )
add_attr_string( "month" )
add_attr_string( "day" )
add_attr_string( "volume" )
add_attr_string( "volumeCount" )
add_attr_string( "genre" )
add_attr_string( "language" )
add_attr_string( "country" )
add_attr_string( "criticalRating" )
add_attr_string( "alternateSeries" )
add_attr_string( "alternateNumber" )
add_attr_string( "alternateCount" )
add_attr_string( "imprint" )
add_attr_string( "webLink" )
add_attr_string( "format" )
add_attr_string( "manga" )
add_attr_string( "price" ) self.overlayCredits(new_md.credits)
add_attr_string( "isVersionOf" ) # TODO
add_attr_string( "rights" )
add_attr_string( "identifier" ) # not sure if the tags and pages should broken down, or treated
add_attr_string( "lastMark" ) # as whole lists....
if self.blackAndWhite: # For now, go the easy route, where any overlay
add_attr_string( "blackAndWhite" ) # value wipes out the whole list
add_attr_string( "maturityRating" ) if len(new_md.tags) > 0:
add_attr_string( "storyArc" ) assign("tags", new_md.tags)
add_attr_string( "seriesGroup" )
add_attr_string( "scanInfo" ) if len(new_md.pages) > 0:
add_attr_string( "characters" ) assign("pages", new_md.pages)
add_attr_string( "teams" )
add_attr_string( "locations" ) def overlayCredits(self, new_credits):
add_attr_string( "comments" ) for c in new_credits:
add_attr_string( "notes" ) if 'primary' in c and c['primary']:
primary = True
add_string( "tags", utils.listToString( self.tags ) ) else:
primary = False
for c in self.credits:
primary = "" # Remove credit role if person is blank
if c.has_key('primary') and c['primary']: if c['person'] == "":
primary = " [P]" for r in reversed(self.credits):
add_string( "credit", c['role']+": "+c['person'] + primary) if r['role'].lower() == c['role'].lower():
self.credits.remove(r)
# find the longest field name # otherwise, add it!
flen = 0 else:
for i in vals: self.addCredit(c['person'], c['role'], primary)
flen = max( flen, len(i[0]) )
flen += 1 def setDefaultPageList(self, count):
# generate a default page list, with the first page marked as the cover
#format the data nicely for i in range(count):
outstr = "" page_dict = dict()
fmt_str = u"{0: <" + str(flen) + "} {1}\n" page_dict['Image'] = str(i)
for i in vals: if i == 0:
outstr += fmt_str.format( i[0]+":", i[1] ) page_dict['Type'] = PageType.FrontCover
self.pages.append(page_dict)
return outstr
def getArchivePageIndex(self, pagenum):
# convert the displayed page number to the page index of the file in
# the archive
if pagenum < len(self.pages):
return int(self.pages[pagenum]['Image'])
else:
return 0
def getCoverPageIndexList(self):
# return a list of archive page indices of cover pages
coverlist = []
for p in self.pages:
if 'Type' in p and p['Type'] == PageType.FrontCover:
coverlist.append(int(p['Image']))
if len(coverlist) == 0:
coverlist.append(0)
return coverlist
def addCredit(self, person, role, primary=False):
credit = dict()
credit['person'] = person
credit['role'] = role
if primary:
credit['primary'] = primary
# look to see if it's not already there...
found = False
for c in self.credits:
if (c['person'].lower() == person.lower() and
c['role'].lower() == role.lower()):
# no need to add it. just adjust the "primary" flag as needed
c['primary'] = primary
found = True
break
if not found:
self.credits.append(credit)
def __str__(self):
vals = []
if self.isEmpty:
return "No metadata"
def add_string(tag, val):
if val is not None and u"{0}".format(val) != "":
vals.append((tag, val))
def add_attr_string(tag):
val = getattr(self, tag)
add_string(tag, getattr(self, tag))
add_attr_string("series")
add_attr_string("issue")
add_attr_string("issueCount")
add_attr_string("title")
add_attr_string("publisher")
add_attr_string("year")
add_attr_string("month")
add_attr_string("day")
add_attr_string("volume")
add_attr_string("volumeCount")
add_attr_string("genre")
add_attr_string("language")
add_attr_string("country")
add_attr_string("criticalRating")
add_attr_string("alternateSeries")
add_attr_string("alternateNumber")
add_attr_string("alternateCount")
add_attr_string("imprint")
add_attr_string("webLink")
add_attr_string("format")
add_attr_string("manga")
add_attr_string("price")
add_attr_string("isVersionOf")
add_attr_string("rights")
add_attr_string("identifier")
add_attr_string("lastMark")
if self.blackAndWhite:
add_attr_string("blackAndWhite")
add_attr_string("maturityRating")
add_attr_string("storyArc")
add_attr_string("seriesGroup")
add_attr_string("scanInfo")
add_attr_string("characters")
add_attr_string("teams")
add_attr_string("locations")
add_attr_string("comments")
add_attr_string("notes")
add_string("tags", utils.listToString(self.tags))
for c in self.credits:
primary = ""
if 'primary' in c and c['primary']:
primary = " [P]"
add_string("credit", c['role'] + ": " + c['person'] + primary)
# find the longest field name
flen = 0
for i in vals:
flen = max(flen, len(i[0]))
flen += 1
# format the data nicely
outstr = ""
fmt_str = u"{0: <" + str(flen) + "} {1}\n"
for i in vals:
outstr += fmt_str.format(i[0] + ":", i[1])
return outstr

View File

@ -1,140 +1,133 @@
# coding=utf-8 # coding=utf-8
""" """Support for mixed digit/string type Issue field
Class for handling the odd permutations of an 'issue number' that the comics industry throws at us
e.g.:
"12"
"12.1"
"0"
"-1"
"5AU"
"100-2"
Class for handling the odd permutations of an 'issue number' that the
comics industry throws at us.
e.g.: "12", "12.1", "0", "-1", "5AU", "100-2"
""" """
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
#import utils
#import math
#import re
import utils
import math
import re
class IssueString: class IssueString:
def __init__(self, text):
# break up the issue number string into 2 parts: the numeric and suffix string.
# ( assumes that the numeric portion is always first )
self.num = None
self.suffix = ""
if text is None: def __init__(self, text):
return
if type(text) == int: # break up the issue number string into 2 parts: the numeric and suffix string.
text = str(text) # (assumes that the numeric portion is always first)
if len(text) == 0: self.num = None
return self.suffix = ""
text = unicode(text)
#skip the minus sign if it's first
if text[0] == '-':
start = 1
else:
start = 0
# if it's still not numeric at start skip it if text is None:
if text[start].isdigit() or text[start] == ".": return
# walk through the string, look for split point (the first non-numeric)
decimal_count = 0
for idx in range( start, len(text) ):
if text[idx] not in "0123456789.":
break
# special case: also split on second "."
if text[idx] == ".":
decimal_count += 1
if decimal_count > 1:
break
else:
idx = len(text)
# move trailing numeric decimal to suffix
# (only if there is other junk after )
if text[idx-1] == "." and len(text) != idx:
idx = idx -1
# if there is no numeric after the minus, make the minus part of the suffix
if idx == 1 and start == 1:
idx = 0
part1 = text[0:idx]
part2 = text[idx:len(text)]
if part1 != "":
self.num = float( part1 )
self.suffix = part2
else:
self.suffix = text
#print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString( self, pad = 0 ): if isinstance(text, int):
#return the float, left side zero-padded, with suffix attached text = str(text)
if self.num is None:
return self.suffix
negative = self.num < 0
num_f = abs(self.num) if len(text) == 0:
return
num_int = int( num_f )
num_s = str( num_int )
if float( num_int ) != num_f:
num_s = str( num_f )
num_s += self.suffix
# create padding
padding = ""
l = len( str(num_int))
if l < pad :
padding = "0" * (pad - l)
num_s = padding + num_s
if negative:
num_s = "-" + num_s
return num_s text = unicode(text)
def asFloat( self ): # skip the minus sign if it's first
#return the float, with no suffix if text[0] == '-':
if self.suffix == u"½": start = 1
if self.num is not None: else:
return self.num + .5 start = 0
else:
return .5 # if it's still not numeric at start skip it
return self.num if text[start].isdigit() or text[start] == ".":
# walk through the string, look for split point (the first
def asInt( self ): # non-numeric)
#return the int version of the float decimal_count = 0
if self.num is None: for idx in range(start, len(text)):
return None if text[idx] not in "0123456789.":
return int( self.num ) break
# special case: also split on second "."
if text[idx] == ".":
decimal_count += 1
if decimal_count > 1:
break
else:
idx = len(text)
# move trailing numeric decimal to suffix
# (only if there is other junk after )
if text[idx - 1] == "." and len(text) != idx:
idx = idx - 1
# if there is no numeric after the minus, make the minus part of
# the suffix
if idx == 1 and start == 1:
idx = 0
part1 = text[0:idx]
part2 = text[idx:len(text)]
if part1 != "":
self.num = float(part1)
self.suffix = part2
else:
self.suffix = text
# print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString(self, pad=0):
# return the float, left side zero-padded, with suffix attached
if self.num is None:
return self.suffix
negative = self.num < 0
num_f = abs(self.num)
num_int = int(num_f)
num_s = str(num_int)
if float(num_int) != num_f:
num_s = str(num_f)
num_s += self.suffix
# create padding
padding = ""
l = len(str(num_int))
if l < pad:
padding = "0" * (pad - l)
num_s = padding + num_s
if negative:
num_s = "-" + num_s
return num_s
def asFloat(self):
# return the float, with no suffix
if self.suffix == u"½":
if self.num is not None:
return self.num + .5
else:
return .5
return self.num
def asInt(self):
# return the int version of the float
if self.num is None:
return None
return int(self.num)

File diff suppressed because it is too large Load Diff