Merge branch 'fcanc-master'

This commit is contained in:
davide-romanini 2015-03-01 15:44:11 +01:00
commit d84110ccb2
8 changed files with 2037 additions and 1960 deletions

View File

@ -1,260 +1,276 @@
"""
A python class to encapsulate CoMet data
"""
"""A class to encapsulate CoMet data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata
import utils
class CoMet:
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ]
inker_synonyms = [ 'inker', 'artist', 'finishes' ]
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ]
letterer_synonyms = [ 'letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ]
editor_synonyms = [ 'editor']
def metadataFromString( self, string ):
tree = ET.ElementTree(ET.fromstring( string ))
return self.convertXMLToMetadata( tree )
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = ['letterer']
cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = ['editor']
def stringFromMetadata( self, metadata ):
def metadataFromString(self, string):
header = '<?xml version="1.0" encoding="UTF-8"?>\n'
tree = self.convertMetadataToXML( self, metadata )
return header + ET.tostring(tree.getroot())
tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata(tree)
def indent( self, elem, level=0 ):
# for making the XML output readable
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent( elem, level+1 )
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def convertMetadataToXML( self, filename, metadata ):
def stringFromMetadata(self, metadata):
#shorthand for the metadata
md = metadata
header = '<?xml version="1.0" encoding="UTF-8"?>\n'
# build a tree structure
root = ET.Element("comet")
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
#helper func
def assign( comet_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
# title is manditory
if md.title is None:
md.title = ""
assign( 'title', md.title )
assign( 'series', md.series )
assign( 'issue', md.issue ) #must be int??
assign( 'volume', md.volume )
assign( 'description', md.comments )
assign( 'publisher', md.publisher )
assign( 'pages', md.pageCount )
assign( 'format', md.format )
assign( 'language', md.language )
assign( 'rating', md.maturityRating )
assign( 'price', md.price )
assign( 'isVersionOf', md.isVersionOf )
assign( 'rights', md.rights )
assign( 'identifier', md.identifier )
assign( 'lastMark', md.lastMark )
assign( 'genre', md.genre ) # TODO repeatable
tree = self.convertMetadataToXML(self, metadata)
return header + ET.tostring(tree.getroot())
if md.characters is not None:
char_list = [ c.strip() for c in md.characters.split(',') ]
for c in char_list:
assign( 'character', c )
if md.manga is not None and md.manga == "YesAndRightToLeft":
assign( 'readingDirection', "rtl")
date_str = ""
if md.year is not None:
date_str = str(md.year).zfill(4)
if md.month is not None:
date_str += "-" + str(md.month).zfill(2)
assign( 'date', date_str )
def indent(self, elem, level=0):
# for making the XML output readable
i = "\n" + level * " "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
assign( 'coverImage', md.coverImage )
def convertMetadataToXML(self, filename, metadata):
# need to specially process the credits, since they are structured differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
# shorthand for the metadata
md = metadata
if credit['role'].lower() in set( self.writer_synonyms ):
ET.SubElement(root, 'writer').text = u"{0}".format(credit['person'])
# build a tree structure
root = ET.Element("comet")
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib[
'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
if credit['role'].lower() in set( self.penciller_synonyms ):
ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person'])
if credit['role'].lower() in set( self.inker_synonyms ):
ET.SubElement(root, 'inker').text = u"{0}".format(credit['person'])
if credit['role'].lower() in set( self.colorist_synonyms ):
ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person'])
# helper func
def assign(comet_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
if credit['role'].lower() in set( self.letterer_synonyms ):
ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person'])
# title is manditory
if md.title is None:
md.title = ""
assign('title', md.title)
assign('series', md.series)
assign('issue', md.issue) # must be int??
assign('volume', md.volume)
assign('description', md.comments)
assign('publisher', md.publisher)
assign('pages', md.pageCount)
assign('format', md.format)
assign('language', md.language)
assign('rating', md.maturityRating)
assign('price', md.price)
assign('isVersionOf', md.isVersionOf)
assign('rights', md.rights)
assign('identifier', md.identifier)
assign('lastMark', md.lastMark)
assign('genre', md.genre) # TODO repeatable
if credit['role'].lower() in set( self.cover_synonyms ):
ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person'])
if md.characters is not None:
char_list = [c.strip() for c in md.characters.split(',')]
for c in char_list:
assign('character', c)
if credit['role'].lower() in set( self.editor_synonyms ):
ET.SubElement(root, 'editor').text = u"{0}".format(credit['person'])
if md.manga is not None and md.manga == "YesAndRightToLeft":
assign('readingDirection', "rtl")
# self pretty-print
self.indent(root)
date_str = ""
if md.year is not None:
date_str = str(md.year).zfill(4)
if md.month is not None:
date_str += "-" + str(md.month).zfill(2)
assign('date', date_str)
# wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)
return tree
assign('coverImage', md.coverImage)
def convertXMLToMetadata( self, tree ):
root = tree.getroot()
# need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
if root.tag != 'comet':
raise 1
return None
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate( tag ):
node = root.find( tag )
if node is not None:
return node.text
else:
return None
md.series = xlate( 'series' )
md.title = xlate( 'title' )
md.issue = xlate( 'issue' )
md.volume = xlate( 'volume' )
md.comments = xlate( 'description' )
md.publisher = xlate( 'publisher' )
md.language = xlate( 'language' )
md.format = xlate( 'format' )
md.pageCount = xlate( 'pages' )
md.maturityRating = xlate( 'rating' )
md.price = xlate( 'price' )
md.isVersionOf = xlate( 'isVersionOf' )
md.rights = xlate( 'rights' )
md.identifier = xlate( 'identifier' )
md.lastMark = xlate( 'lastMark' )
md.genre = xlate( 'genre' ) # TODO - repeatable field
if credit['role'].lower() in set(self.writer_synonyms):
ET.SubElement(
root,
'writer').text = u"{0}".format(
credit['person'])
date = xlate( 'date' )
if date is not None:
parts = date.split('-')
if len( parts) > 0:
md.year = parts[0]
if len( parts) > 1:
md.month = parts[1]
md.coverImage = xlate( 'coverImage' )
readingDirection = xlate( 'readingDirection' )
if readingDirection is not None and readingDirection == "rtl":
md.manga = "YesAndRightToLeft"
# loop for character tags
char_list = []
for n in root:
if n.tag == 'character':
char_list.append(n.text.strip())
md.characters = utils.listToString( char_list )
if credit['role'].lower() in set(self.penciller_synonyms):
ET.SubElement(
root,
'penciller').text = u"{0}".format(
credit['person'])
# Now extract the credit info
for n in root:
if ( n.tag == 'writer' or
n.tag == 'penciller' or
n.tag == 'inker' or
n.tag == 'colorist' or
n.tag == 'letterer' or
n.tag == 'editor'
):
metadata.addCredit( n.text.strip(), n.tag.title() )
if credit['role'].lower() in set(self.inker_synonyms):
ET.SubElement(
root,
'inker').text = u"{0}".format(
credit['person'])
if n.tag == 'coverDesigner':
metadata.addCredit( n.text.strip(), "Cover" )
if credit['role'].lower() in set(self.colorist_synonyms):
ET.SubElement(
root,
'colorist').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.letterer_synonyms):
ET.SubElement(
root,
'letterer').text = u"{0}".format(
credit['person'])
metadata.isEmpty = False
return metadata
if credit['role'].lower() in set(self.cover_synonyms):
ET.SubElement(
root,
'coverDesigner').text = u"{0}".format(
credit['person'])
#verify that the string actually contains CoMet data in XML format
def validateString( self, string ):
try:
tree = ET.ElementTree(ET.fromstring( string ))
root = tree.getroot()
if root.tag != 'comet':
raise Exception
except:
return False
return True
if credit['role'].lower() in set(self.editor_synonyms):
ET.SubElement(
root,
'editor').text = u"{0}".format(
credit['person'])
# self pretty-print
self.indent(root)
def writeToExternalFile( self, filename, metadata ):
tree = self.convertMetadataToXML( self, metadata )
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ):
# wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)
return tree
tree = ET.parse( filename )
return self.convertXMLToMetadata( tree )
def convertXMLToMetadata(self, tree):
root = tree.getroot()
if root.tag != 'comet':
raise 1
return None
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
if node is not None:
return node.text
else:
return None
md.series = xlate('series')
md.title = xlate('title')
md.issue = xlate('issue')
md.volume = xlate('volume')
md.comments = xlate('description')
md.publisher = xlate('publisher')
md.language = xlate('language')
md.format = xlate('format')
md.pageCount = xlate('pages')
md.maturityRating = xlate('rating')
md.price = xlate('price')
md.isVersionOf = xlate('isVersionOf')
md.rights = xlate('rights')
md.identifier = xlate('identifier')
md.lastMark = xlate('lastMark')
md.genre = xlate('genre') # TODO - repeatable field
date = xlate('date')
if date is not None:
parts = date.split('-')
if len(parts) > 0:
md.year = parts[0]
if len(parts) > 1:
md.month = parts[1]
md.coverImage = xlate('coverImage')
readingDirection = xlate('readingDirection')
if readingDirection is not None and readingDirection == "rtl":
md.manga = "YesAndRightToLeft"
# loop for character tags
char_list = []
for n in root:
if n.tag == 'character':
char_list.append(n.text.strip())
md.characters = utils.listToString(char_list)
# Now extract the credit info
for n in root:
if (n.tag == 'writer' or
n.tag == 'penciller' or
n.tag == 'inker' or
n.tag == 'colorist' or
n.tag == 'letterer' or
n.tag == 'editor'
):
metadata.addCredit(n.text.strip(), n.tag.title())
if n.tag == 'coverDesigner':
metadata.addCredit(n.text.strip(), "Cover")
metadata.isEmpty = False
return metadata
# verify that the string actually contains CoMet data in XML format
def validateString(self, string):
try:
tree = ET.ElementTree(ET.fromstring(string))
root = tree.getroot()
if root.tag != 'comet':
raise Exception
except:
return False
return True
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
# ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

File diff suppressed because it is too large Load Diff

View File

@ -1,152 +1,144 @@
"""
A python class to encapsulate the ComicBookInfo data
"""
"""A class to encapsulate the ComicBookInfo data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from datetime import datetime
import zipfile
#import zipfile
from genericmetadata import GenericMetadata
import utils
#import ctversion
class ComicBookInfo:
def metadataFromString( self, string ):
cbi_container = json.loads( unicode(string, 'utf-8') )
def metadataFromString(self, string):
metadata = GenericMetadata()
cbi_container = json.loads(unicode(string, 'utf-8'))
cbi = cbi_container[ 'ComicBookInfo/1.0' ]
metadata = GenericMetadata()
#helper func
# If item is not in CBI, return None
def xlate( cbi_entry):
if cbi_entry in cbi:
return cbi[cbi_entry]
else:
return None
metadata.series = xlate( 'series' )
metadata.title = xlate( 'title' )
metadata.issue = xlate( 'issue' )
metadata.publisher = xlate( 'publisher' )
metadata.month = xlate( 'publicationMonth' )
metadata.year = xlate( 'publicationYear' )
metadata.issueCount = xlate( 'numberOfIssues' )
metadata.comments = xlate( 'comments' )
metadata.credits = xlate( 'credits' )
metadata.genre = xlate( 'genre' )
metadata.volume = xlate( 'volume' )
metadata.volumeCount = xlate( 'numberOfVolumes' )
metadata.language = xlate( 'language' )
metadata.country = xlate( 'country' )
metadata.criticalRating = xlate( 'rating' )
metadata.tags = xlate( 'tags' )
# make sure credits and tags are at least empty lists and not None
if metadata.credits is None:
metadata.credits = []
if metadata.tags is None:
metadata.tags = []
#need to massage the language string to be ISO
if metadata.language is not None:
# reverse look-up
pattern = metadata.language
metadata.language = None
for key in utils.getLanguageDict():
if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'):
metadata.language = key
break
metadata.isEmpty = False
return metadata
cbi = cbi_container['ComicBookInfo/1.0']
def stringFromMetadata( self, metadata ):
# helper func
# If item is not in CBI, return None
def xlate(cbi_entry):
if cbi_entry in cbi:
return cbi[cbi_entry]
else:
return None
cbi_container = self.createJSONDictionary( metadata )
return json.dumps( cbi_container )
#verify that the string actually contains CBI data in JSON format
def validateString( self, string ):
try:
cbi_container = json.loads( string )
except:
return False
return ( 'ComicBookInfo/1.0' in cbi_container )
metadata.series = xlate('series')
metadata.title = xlate('title')
metadata.issue = xlate('issue')
metadata.publisher = xlate('publisher')
metadata.month = xlate('publicationMonth')
metadata.year = xlate('publicationYear')
metadata.issueCount = xlate('numberOfIssues')
metadata.comments = xlate('comments')
metadata.credits = xlate('credits')
metadata.genre = xlate('genre')
metadata.volume = xlate('volume')
metadata.volumeCount = xlate('numberOfVolumes')
metadata.language = xlate('language')
metadata.country = xlate('country')
metadata.criticalRating = xlate('rating')
metadata.tags = xlate('tags')
# make sure credits and tags are at least empty lists and not None
if metadata.credits is None:
metadata.credits = []
if metadata.tags is None:
metadata.tags = []
def createJSONDictionary( self, metadata ):
# Create the dictionary that we will convert to JSON text
cbi = dict()
cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version,
'lastModified' : str(datetime.now()),
'ComicBookInfo/1.0' : cbi }
#helper func
def assign( cbi_entry, md_entry):
if md_entry is not None:
cbi[cbi_entry] = md_entry
#helper func
def toInt(s):
i = None
if type(s) in [ str, unicode, int ]:
try:
i = int(s)
except ValueError:
pass
return i
assign( 'series', metadata.series )
assign( 'title', metadata.title )
assign( 'issue', metadata.issue )
assign( 'publisher', metadata.publisher )
assign( 'publicationMonth', toInt(metadata.month) )
assign( 'publicationYear', toInt(metadata.year) )
assign( 'numberOfIssues', toInt(metadata.issueCount) )
assign( 'comments', metadata.comments )
assign( 'genre', metadata.genre )
assign( 'volume', toInt(metadata.volume) )
assign( 'numberOfVolumes', toInt(metadata.volumeCount) )
assign( 'language', utils.getLanguageFromISO(metadata.language) )
assign( 'country', metadata.country )
assign( 'rating', metadata.criticalRating )
assign( 'credits', metadata.credits )
assign( 'tags', metadata.tags )
return cbi_container
# need to massage the language string to be ISO
if metadata.language is not None:
# reverse look-up
pattern = metadata.language
metadata.language = None
for key in utils.getLanguageDict():
if utils.getLanguageDict()[key] == pattern.encode('utf-8'):
metadata.language = key
break
def writeToExternalFile( self, filename, metadata ):
metadata.isEmpty = False
cbi_container = self.createJSONDictionary(metadata)
return metadata
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close
def stringFromMetadata(self, metadata):
cbi_container = self.createJSONDictionary(metadata)
return json.dumps(cbi_container)
def validateString(self, string):
"""Verify that the string actually contains CBI data in JSON format"""
try:
cbi_container = json.loads(string)
except:
return False
return ('ComicBookInfo/1.0' in cbi_container)
def createJSONDictionary(self, metadata):
"""Create the dictionary that we will convert to JSON text"""
cbi = dict()
cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version,
'lastModified': str(datetime.now()),
'ComicBookInfo/1.0': cbi}
# helper func
def assign(cbi_entry, md_entry):
if md_entry is not None:
cbi[cbi_entry] = md_entry
# helper func
def toInt(s):
i = None
if type(s) in [str, unicode, int]:
try:
i = int(s)
except ValueError:
pass
return i
assign('series', metadata.series)
assign('title', metadata.title)
assign('issue', metadata.issue)
assign('publisher', metadata.publisher)
assign('publicationMonth', toInt(metadata.month))
assign('publicationYear', toInt(metadata.year))
assign('numberOfIssues', toInt(metadata.issueCount))
assign('comments', metadata.comments)
assign('genre', metadata.genre)
assign('volume', toInt(metadata.volume))
assign('numberOfVolumes', toInt(metadata.volumeCount))
assign('language', utils.getLanguageFromISO(metadata.language))
assign('country', metadata.country)
assign('rating', metadata.criticalRating)
assign('credits', metadata.credits)
assign('tags', metadata.tags)
return cbi_container
def writeToExternalFile(self, filename, metadata):
cbi_container = self.createJSONDictionary(metadata)
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close

View File

@ -1,293 +1,290 @@
"""
A python class to encapsulate ComicRack's ComicInfo.xml data
"""
"""A class to encapsulate ComicRack's ComicInfo.xml data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata
import utils
class ComicInfoXml:
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ]
inker_synonyms = [ 'inker', 'artist', 'finishes' ]
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ]
letterer_synonyms = [ 'letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ]
editor_synonyms = [ 'editor']
writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = ['letterer']
cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = ['editor']
def getParseableCredits( self ):
parsable_credits = []
parsable_credits.extend( self.writer_synonyms )
parsable_credits.extend( self.penciller_synonyms )
parsable_credits.extend( self.inker_synonyms )
parsable_credits.extend( self.colorist_synonyms )
parsable_credits.extend( self.letterer_synonyms )
parsable_credits.extend( self.cover_synonyms )
parsable_credits.extend( self.editor_synonyms )
return parsable_credits
def metadataFromString( self, string ):
def getParseableCredits(self):
parsable_credits = []
parsable_credits.extend(self.writer_synonyms)
parsable_credits.extend(self.penciller_synonyms)
parsable_credits.extend(self.inker_synonyms)
parsable_credits.extend(self.colorist_synonyms)
parsable_credits.extend(self.letterer_synonyms)
parsable_credits.extend(self.cover_synonyms)
parsable_credits.extend(self.editor_synonyms)
return parsable_credits
tree = ET.ElementTree(ET.fromstring( string ))
return self.convertXMLToMetadata( tree )
def metadataFromString(self, string):
def stringFromMetadata( self, metadata ):
tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata(tree)
header = '<?xml version="1.0"?>\n'
tree = self.convertMetadataToXML( self, metadata )
return header + ET.tostring(tree.getroot())
def stringFromMetadata(self, metadata):
def indent( self, elem, level=0 ):
# for making the XML output readable
i = "\n" + level*" "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent( elem, level+1 )
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def convertMetadataToXML( self, filename, metadata ):
header = '<?xml version="1.0"?>\n'
#shorthand for the metadata
md = metadata
tree = self.convertMetadataToXML(self, metadata)
return header + ET.tostring(tree.getroot())
# build a tree structure
root = ET.Element("ComicInfo")
root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema"
#helper func
def assign( cix_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
def indent(self, elem, level=0):
# for making the XML output readable
i = "\n" + level * " "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
assign( 'Title', md.title )
assign( 'Series', md.series )
assign( 'Number', md.issue )
assign( 'Count', md.issueCount )
assign( 'Volume', md.volume )
assign( 'AlternateSeries', md.alternateSeries )
assign( 'AlternateNumber', md.alternateNumber )
assign( 'StoryArc', md.storyArc )
assign( 'SeriesGroup', md.seriesGroup )
assign( 'AlternateCount', md.alternateCount )
assign( 'Summary', md.comments )
assign( 'Notes', md.notes )
assign( 'Year', md.year )
assign( 'Month', md.month )
assign( 'Day', md.day )
def convertMetadataToXML(self, filename, metadata):
# need to specially process the credits, since they are structured differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# first, loop thru credits, and build a list for each role that CIX supports
for credit in metadata.credits:
# shorthand for the metadata
md = metadata
if credit['role'].lower() in set( self.writer_synonyms ):
credit_writer_list.append(credit['person'].replace(",",""))
# build a tree structure
root = ET.Element("ComicInfo")
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema"
# helper func
if credit['role'].lower() in set( self.penciller_synonyms ):
credit_penciller_list.append(credit['person'].replace(",",""))
if credit['role'].lower() in set( self.inker_synonyms ):
credit_inker_list.append(credit['person'].replace(",",""))
if credit['role'].lower() in set( self.colorist_synonyms ):
credit_colorist_list.append(credit['person'].replace(",",""))
def assign(cix_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
if credit['role'].lower() in set( self.letterer_synonyms ):
credit_letterer_list.append(credit['person'].replace(",",""))
assign('Title', md.title)
assign('Series', md.series)
assign('Number', md.issue)
assign('Count', md.issueCount)
assign('Volume', md.volume)
assign('AlternateSeries', md.alternateSeries)
assign('AlternateNumber', md.alternateNumber)
assign('StoryArc', md.storyArc)
assign('SeriesGroup', md.seriesGroup)
assign('AlternateCount', md.alternateCount)
assign('Summary', md.comments)
assign('Notes', md.notes)
assign('Year', md.year)
assign('Month', md.month)
assign('Day', md.day)
if credit['role'].lower() in set( self.cover_synonyms ):
credit_cover_list.append(credit['person'].replace(",",""))
# need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
if credit['role'].lower() in set( self.editor_synonyms ):
credit_editor_list.append(credit['person'].replace(",",""))
# second, convert each list to string, and add to XML struct
if len( credit_writer_list ) > 0:
node = ET.SubElement(root, 'Writer')
node.text = utils.listToString( credit_writer_list )
# first, loop thru credits, and build a list for each role that CIX
# supports
for credit in metadata.credits:
if len( credit_penciller_list ) > 0:
node = ET.SubElement(root, 'Penciller')
node.text = utils.listToString( credit_penciller_list )
if credit['role'].lower() in set(self.writer_synonyms):
credit_writer_list.append(credit['person'].replace(",", ""))
if len( credit_inker_list ) > 0:
node = ET.SubElement(root, 'Inker')
node.text = utils.listToString( credit_inker_list )
if credit['role'].lower() in set(self.penciller_synonyms):
credit_penciller_list.append(credit['person'].replace(",", ""))
if len( credit_colorist_list ) > 0:
node = ET.SubElement(root, 'Colorist')
node.text = utils.listToString( credit_colorist_list )
if credit['role'].lower() in set(self.inker_synonyms):
credit_inker_list.append(credit['person'].replace(",", ""))
if len( credit_letterer_list ) > 0:
node = ET.SubElement(root, 'Letterer')
node.text = utils.listToString( credit_letterer_list )
if credit['role'].lower() in set(self.colorist_synonyms):
credit_colorist_list.append(credit['person'].replace(",", ""))
if len( credit_cover_list ) > 0:
node = ET.SubElement(root, 'CoverArtist')
node.text = utils.listToString( credit_cover_list )
if len( credit_editor_list ) > 0:
node = ET.SubElement(root, 'Editor')
node.text = utils.listToString( credit_editor_list )
if credit['role'].lower() in set(self.letterer_synonyms):
credit_letterer_list.append(credit['person'].replace(",", ""))
assign( 'Publisher', md.publisher )
assign( 'Imprint', md.imprint )
assign( 'Genre', md.genre )
assign( 'Web', md.webLink )
assign( 'PageCount', md.pageCount )
assign( 'LanguageISO', md.language )
assign( 'Format', md.format )
assign( 'AgeRating', md.maturityRating )
if md.blackAndWhite is not None and md.blackAndWhite:
ET.SubElement(root, 'BlackAndWhite').text = "Yes"
assign( 'Manga', md.manga )
assign( 'Characters', md.characters )
assign( 'Teams', md.teams )
assign( 'Locations', md.locations )
assign( 'ScanInformation', md.scanInfo )
if credit['role'].lower() in set(self.cover_synonyms):
credit_cover_list.append(credit['person'].replace(",", ""))
# loop and add the page entries under pages node
if len( md.pages ) > 0:
pages_node = ET.SubElement(root, 'Pages')
for page_dict in md.pages:
page_node = ET.SubElement(pages_node, 'Page')
page_node.attrib = page_dict
if credit['role'].lower() in set(self.editor_synonyms):
credit_editor_list.append(credit['person'].replace(",", ""))
# self pretty-print
self.indent(root)
# second, convert each list to string, and add to XML struct
if len(credit_writer_list) > 0:
node = ET.SubElement(root, 'Writer')
node.text = utils.listToString(credit_writer_list)
# wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)
return tree
if len(credit_penciller_list) > 0:
node = ET.SubElement(root, 'Penciller')
node.text = utils.listToString(credit_penciller_list)
def convertXMLToMetadata( self, tree ):
root = tree.getroot()
if len(credit_inker_list) > 0:
node = ET.SubElement(root, 'Inker')
node.text = utils.listToString(credit_inker_list)
if root.tag != 'ComicInfo':
raise 1
return None
if len(credit_colorist_list) > 0:
node = ET.SubElement(root, 'Colorist')
node.text = utils.listToString(credit_colorist_list)
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate( tag ):
node = root.find( tag )
if node is not None:
return node.text
else:
return None
md.series = xlate( 'Series' )
md.title = xlate( 'Title' )
md.issue = xlate( 'Number' )
md.issueCount = xlate( 'Count' )
md.volume = xlate( 'Volume' )
md.alternateSeries = xlate( 'AlternateSeries' )
md.alternateNumber = xlate( 'AlternateNumber' )
md.alternateCount = xlate( 'AlternateCount' )
md.comments = xlate( 'Summary' )
md.notes = xlate( 'Notes' )
md.year = xlate( 'Year' )
md.month = xlate( 'Month' )
md.day = xlate( 'Day' )
md.publisher = xlate( 'Publisher' )
md.imprint = xlate( 'Imprint' )
md.genre = xlate( 'Genre' )
md.webLink = xlate( 'Web' )
md.language = xlate( 'LanguageISO' )
md.format = xlate( 'Format' )
md.manga = xlate( 'Manga' )
md.characters = xlate( 'Characters' )
md.teams = xlate( 'Teams' )
md.locations = xlate( 'Locations' )
md.pageCount = xlate( 'PageCount' )
md.scanInfo = xlate( 'ScanInformation' )
md.storyArc = xlate( 'StoryArc' )
md.seriesGroup = xlate( 'SeriesGroup' )
md.maturityRating = xlate( 'AgeRating' )
if len(credit_letterer_list) > 0:
node = ET.SubElement(root, 'Letterer')
node.text = utils.listToString(credit_letterer_list)
tmp = xlate( 'BlackAndWhite' )
md.blackAndWhite = False
if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]:
md.blackAndWhite = True
# Now extract the credit info
for n in root:
if ( n.tag == 'Writer' or
n.tag == 'Penciller' or
n.tag == 'Inker' or
n.tag == 'Colorist' or
n.tag == 'Letterer' or
n.tag == 'Editor'
):
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit( name.strip(), n.tag )
if len(credit_cover_list) > 0:
node = ET.SubElement(root, 'CoverArtist')
node.text = utils.listToString(credit_cover_list)
if n.tag == 'CoverArtist':
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit( name.strip(), "Cover" )
if len(credit_editor_list) > 0:
node = ET.SubElement(root, 'Editor')
node.text = utils.listToString(credit_editor_list)
# parse page data now
pages_node = root.find( "Pages" )
if pages_node is not None:
for page in pages_node:
metadata.pages.append( page.attrib )
#print page.attrib
assign('Publisher', md.publisher)
assign('Imprint', md.imprint)
assign('Genre', md.genre)
assign('Web', md.webLink)
assign('PageCount', md.pageCount)
assign('LanguageISO', md.language)
assign('Format', md.format)
assign('AgeRating', md.maturityRating)
if md.blackAndWhite is not None and md.blackAndWhite:
ET.SubElement(root, 'BlackAndWhite').text = "Yes"
assign('Manga', md.manga)
assign('Characters', md.characters)
assign('Teams', md.teams)
assign('Locations', md.locations)
assign('ScanInformation', md.scanInfo)
metadata.isEmpty = False
return metadata
# loop and add the page entries under pages node
if len(md.pages) > 0:
pages_node = ET.SubElement(root, 'Pages')
for page_dict in md.pages:
page_node = ET.SubElement(pages_node, 'Page')
page_node.attrib = page_dict
def writeToExternalFile( self, filename, metadata ):
tree = self.convertMetadataToXML( self, metadata )
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ):
# self pretty-print
self.indent(root)
tree = ET.parse( filename )
return self.convertXMLToMetadata( tree )
# wrap it in an ElementTree instance, and save as XML
tree = ET.ElementTree(root)
return tree
def convertXMLToMetadata(self, tree):
root = tree.getroot()
if root.tag != 'ComicInfo':
raise 1
return None
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
if node is not None:
return node.text
else:
return None
md.series = xlate('Series')
md.title = xlate('Title')
md.issue = xlate('Number')
md.issueCount = xlate('Count')
md.volume = xlate('Volume')
md.alternateSeries = xlate('AlternateSeries')
md.alternateNumber = xlate('AlternateNumber')
md.alternateCount = xlate('AlternateCount')
md.comments = xlate('Summary')
md.notes = xlate('Notes')
md.year = xlate('Year')
md.month = xlate('Month')
md.day = xlate('Day')
md.publisher = xlate('Publisher')
md.imprint = xlate('Imprint')
md.genre = xlate('Genre')
md.webLink = xlate('Web')
md.language = xlate('LanguageISO')
md.format = xlate('Format')
md.manga = xlate('Manga')
md.characters = xlate('Characters')
md.teams = xlate('Teams')
md.locations = xlate('Locations')
md.pageCount = xlate('PageCount')
md.scanInfo = xlate('ScanInformation')
md.storyArc = xlate('StoryArc')
md.seriesGroup = xlate('SeriesGroup')
md.maturityRating = xlate('AgeRating')
tmp = xlate('BlackAndWhite')
md.blackAndWhite = False
if tmp is not None and tmp.lower() in ["yes", "true", "1"]:
md.blackAndWhite = True
# Now extract the credit info
for n in root:
if (n.tag == 'Writer' or
n.tag == 'Penciller' or
n.tag == 'Inker' or
n.tag == 'Colorist' or
n.tag == 'Letterer' or
n.tag == 'Editor'
):
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), n.tag)
if n.tag == 'CoverArtist':
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), "Cover")
# parse page data now
pages_node = root.find("Pages")
if pages_node is not None:
for page in pages_node:
metadata.pages.append(page.attrib)
# print page.attrib
metadata.isEmpty = False
return metadata
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
# ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

View File

@ -1,26 +1,21 @@
"""
Functions for parsing comic info from filename
"""Functions for parsing comic info from filename
This should probably be re-written, but, well, it mostly works!
"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Some portions of this code were modified from pyComicMetaThis project
# http://code.google.com/p/pycomicmetathis/
@ -29,27 +24,27 @@ import re
import os
from urllib import unquote
class FileNameParser:
def repl(self, m):
return ' ' * len(m.group())
return ' ' * len(m.group())
def fixSpaces( self, string, remove_dashes=True ):
def fixSpaces(self, string, remove_dashes=True):
if remove_dashes:
placeholders = ['[-_]',' +']
placeholders = ['[-_]', ' +']
else:
placeholders = ['[_]',' +']
placeholders = ['[_]', ' +']
for ph in placeholders:
string = re.sub(ph, self.repl, string )
return string #.strip()
string = re.sub(ph, self.repl, string)
return string # .strip()
def getIssueCount( self,filename, issue_end ):
def getIssueCount(self, filename, issue_end):
count = ""
filename = filename[issue_end:]
# replace any name seperators with spaces
# replace any name separators with spaces
tmpstr = self.fixSpaces(filename)
found = False
@ -59,48 +54,51 @@ class FileNameParser:
found = True
if not found:
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
if match:
count = match.group()
found = True
count = count.lstrip("0")
return count
def getIssueNumber( self, filename ):
# Returns a tuple of issue number string, and start and end indexs in the filename
# (The indexes will be used to split the string up for further parsing)
def getIssueNumber(self, filename):
"""Returns a tuple of issue number string, and start and end indexes in the filename
(The indexes will be used to split the string up for further parsing)
"""
found = False
issue = ''
start = 0
end = 0
# first, look for multiple "--", this means it's formatted differently from most:
# first, look for multiple "--", this means it's formatted differently
# from most:
if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
# the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename)
elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
# the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ")
# replace parenthetical phrases with spaces
filename = re.sub( "\(.*?\)", self.repl, filename)
filename = re.sub( "\[.*?\]", self.repl, filename)
filename = re.sub("\(.*?\)", self.repl, filename)
filename = re.sub("\[.*?\]", self.repl, filename)
# replace any name seperators with spaces
# replace any name separators with spaces
filename = self.fixSpaces(filename)
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
filename = re.sub( "of [\d]+", self.repl, filename)
# remove any "of NN" phrase with spaces (problem: this could break on
# some titles)
filename = re.sub("of [\d]+", self.repl, filename)
#print u"[{0}]".format(filename)
# print u"[{0}]".format(filename)
# we should now have a cleaned up filename version with all the words in
# the same positions as original filename
@ -108,27 +106,28 @@ class FileNameParser:
# make a list of each word and its position
word_list = list()
for m in re.finditer("\S+", filename):
word_list.append( (m.group(0), m.start(), m.end()) )
word_list.append((m.group(0), m.start(), m.end()))
# remove the first word, since it can't be the issue number
if len(word_list) > 1:
word_list = word_list[1:]
else:
#only one word?? just bail.
# only one word?? just bail.
return issue, start, end
# Now try to search for the likely issue number word in the list
# first look for a word with "#" followed by digits with optional sufix
# first look for a word with "#" followed by digits with optional suffix
# this is almost certainly the issue number
for w in reversed(word_list):
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True
break
# same as above but w/o a '#', and only look at the last word in the list
# same as above but w/o a '#', and only look at the last word in the
# list
if not found:
w = word_list[-1]
w = word_list[-1]
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True
@ -148,20 +147,21 @@ class FileNameParser:
return issue, start, end
def getSeriesName(self, filename, issue_start ):
# use the issue number string index to split the filename string
def getSeriesName(self, filename, issue_start):
"""Use the issue number string index to split the filename string"""
if issue_start != 0:
filename = filename[:issue_start]
# in case there is no issue number, remove some obvious stuff
if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
# the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename)
elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
# the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ")
@ -170,14 +170,14 @@ class FileNameParser:
series = tmpstr
volume = ""
#save the last word
# save the last word
try:
last_word = series.split()[-1]
except:
last_word = ""
# remove any parenthetical phrases
series = re.sub( "\(.*?\)", "", series)
series = re.sub("\(.*?\)", "", series)
# search for volume number
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
@ -188,7 +188,7 @@ class FileNameParser:
# if a volume wasn't found, see if the last word is a year in parentheses
# since that's a common way to designate the volume
if volume == "":
#match either (YEAR), (YEAR-), or (YEAR-YEAR2)
# match either (YEAR), (YEAR-), or (YEAR-YEAR2)
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
if match:
volume = match.group(2)
@ -199,7 +199,7 @@ class FileNameParser:
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
# be removed to help search online
if issue_start == 0:
one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ]
one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"]
try:
last_word = series.split()[-1]
if last_word.lower() in one_shot_words:
@ -209,7 +209,7 @@ class FileNameParser:
return series, volume.strip()
def getYear( self,filename, issue_end):
def getYear(self, filename, issue_end):
filename = filename[issue_end:]
@ -218,36 +218,38 @@ class FileNameParser:
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
if match:
year = match.group()
# remove non-numerics
# remove non-digits
year = re.sub("[^0-9]", "", year)
return year
def getRemainder( self, filename, year, count, volume, issue_end ):
def getRemainder(self, filename, year, count, volume, issue_end):
"""Make a guess at where the the non-interesting stuff begins"""
#make a guess at where the the non-interesting stuff begins
remainder = ""
if "--" in filename:
remainder = filename.split("--",1)[1]
remainder = filename.split("--", 1)[1]
elif "__" in filename:
remainder = filename.split("__",1)[1]
remainder = filename.split("__", 1)[1]
elif issue_end != 0:
remainder = filename[issue_end:]
remainder = self.fixSpaces(remainder, remove_dashes=False)
if volume != "":
remainder = remainder.replace("Vol."+volume,"",1)
remainder = remainder.replace("Vol." + volume, "", 1)
if year != "":
remainder = remainder.replace(year,"",1)
remainder = remainder.replace(year, "", 1)
if count != "":
remainder = remainder.replace("of "+count,"",1)
remainder = remainder.replace("of " + count, "", 1)
remainder = remainder.replace("()","")
remainder = remainder.replace(" "," ") # cleans some whitespace mess
remainder = remainder.replace("()", "")
remainder = remainder.replace(
" ",
" ") # cleans some whitespace mess
return remainder.strip()
def parseFilename( self, filename ):
def parseFilename(self, filename):
# remove the path
filename = os.path.basename(filename)
@ -255,10 +257,10 @@ class FileNameParser:
# remove the extension
filename = os.path.splitext(filename)[0]
#url decode, just in case
# url decode, just in case
filename = unquote(filename)
# sometimes archives get messed up names from too many decodings
# sometimes archives get messed up names from too many decodes
# often url encodings will break and leave "_28" and "_29" in place
# of "(" and ")" see if there are a number of these, and replace them
if filename.count("_28") > 1 and filename.count("_29") > 1:
@ -268,14 +270,18 @@ class FileNameParser:
self.issue, issue_start, issue_end = self.getIssueNumber(filename)
self.series, self.volume = self.getSeriesName(filename, issue_start)
# provides proper value when the filename doesn't have a issue number
if issue_end == 0:
issue_end=len(self.series)
issue_end = len(self.series)
self.year = self.getYear(filename, issue_end)
self.issue_count = self.getIssueCount(filename, issue_end)
self.remainder = self.getRemainder( filename, self.year, self.issue_count, self.volume, issue_end )
self.remainder = self.getRemainder(
filename,
self.year,
self.issue_count,
self.volume,
issue_end)
if self.issue != "":
# strip off leading zeros

View File

@ -1,43 +1,46 @@
"""
A python class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various
tagging schemes and databases, such as ComicVine or GCD. This makes conversion
possible, however lossy it might be
"""
"""A class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various
tagging schemes and databases, such as ComicVine or GCD. This makes conversion
possible, however lossy it might be
"""
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Copyright 2012-2014 Anthony Beville
http://www.apache.org/licenses/LICENSE-2.0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utils
# These page info classes are exactly the same as the CIX scheme, since it's unique
class PageType:
FrontCover = "FrontCover"
InnerCover = "InnerCover"
Roundup = "Roundup"
Story = "Story"
Advertisement = "Advertisement"
Editorial = "Editorial"
Letters = "Letters"
Preview = "Preview"
BackCover = "BackCover"
Other = "Other"
Deleted = "Deleted"
"""
These page info classes are exactly the same as the CIX scheme, since
it's unique
"""
FrontCover = "FrontCover"
InnerCover = "InnerCover"
Roundup = "Roundup"
Story = "Story"
Advertisement = "Advertisement"
Editorial = "Editorial"
Letters = "Letters"
Preview = "Preview"
BackCover = "BackCover"
Other = "Other"
Deleted = "Deleted"
"""
class PageInfo:
@ -48,269 +51,271 @@ class PageInfo:
Key = ""
ImageWidth = 0
ImageHeight = 0
"""
"""
class GenericMetadata:
def __init__(self):
self.isEmpty = True
self.tagOrigin = None
self.series = None
self.issue = None
self.title = None
self.publisher = None
self.month = None
self.year = None
self.day = None
self.issueCount = None
self.volume = None
self.genre = None
self.language = None # 2 letter iso code
self.comments = None # use same way as Summary in CIX
def __init__(self):
self.volumeCount = None
self.criticalRating = None
self.country = None
self.alternateSeries = None
self.alternateNumber = None
self.alternateCount = None
self.imprint = None
self.notes = None
self.webLink = None
self.format = None
self.manga = None
self.blackAndWhite = None
self.pageCount = None
self.maturityRating = None
self.storyArc = None
self.seriesGroup = None
self.scanInfo = None
self.characters = None
self.teams = None
self.locations = None
self.isEmpty = True
self.tagOrigin = None
self.credits = list()
self.tags = list()
self.pages = list()
self.series = None
self.issue = None
self.title = None
self.publisher = None
self.month = None
self.year = None
self.day = None
self.issueCount = None
self.volume = None
self.genre = None
self.language = None # 2 letter iso code
self.comments = None # use same way as Summary in CIX
# Some CoMet-only items
self.price = None
self.isVersionOf = None
self.rights = None
self.identifier = None
self.lastMark = None
self.coverImage = None
self.volumeCount = None
self.criticalRating = None
self.country = None
def overlay( self, new_md ):
# Overlay a metadata object on this one
# that is, when the new object has non-None
# values, over-write them to this one
def assign( cur, new ):
if new is not None:
if type(new) == str and len(new) == 0:
setattr(self, cur, None)
else:
setattr(self, cur, new)
if not new_md.isEmpty:
self.isEmpty = False
assign( 'series', new_md.series )
assign( "issue", new_md.issue )
assign( "issueCount", new_md.issueCount )
assign( "title", new_md.title )
assign( "publisher", new_md.publisher )
assign( "day", new_md.day )
assign( "month", new_md.month )
assign( "year", new_md.year )
assign( "volume", new_md.volume )
assign( "volumeCount", new_md.volumeCount )
assign( "genre", new_md.genre )
assign( "language", new_md.language )
assign( "country", new_md.country )
assign( "criticalRating", new_md.criticalRating )
assign( "alternateSeries", new_md.alternateSeries )
assign( "alternateNumber", new_md.alternateNumber )
assign( "alternateCount", new_md.alternateCount )
assign( "imprint", new_md.imprint )
assign( "webLink", new_md.webLink )
assign( "format", new_md.format )
assign( "manga", new_md.manga )
assign( "blackAndWhite", new_md.blackAndWhite )
assign( "maturityRating", new_md.maturityRating )
assign( "storyArc", new_md.storyArc )
assign( "seriesGroup", new_md.seriesGroup )
assign( "scanInfo", new_md.scanInfo )
assign( "characters", new_md.characters )
assign( "teams", new_md.teams )
assign( "locations", new_md.locations )
assign( "comments", new_md.comments )
assign( "notes", new_md.notes )
self.alternateSeries = None
self.alternateNumber = None
self.alternateCount = None
self.imprint = None
self.notes = None
self.webLink = None
self.format = None
self.manga = None
self.blackAndWhite = None
self.pageCount = None
self.maturityRating = None
assign( "price", new_md.price )
assign( "isVersionOf", new_md.isVersionOf )
assign( "rights", new_md.rights )
assign( "identifier", new_md.identifier )
assign( "lastMark", new_md.lastMark )
self.overlayCredits( new_md.credits )
# TODO
# not sure if the tags and pages should broken down, or treated
# as whole lists....
self.storyArc = None
self.seriesGroup = None
self.scanInfo = None
# For now, go the easy route, where any overlay
# value wipes out the whole list
if len(new_md.tags) > 0:
assign( "tags", new_md.tags )
if len(new_md.pages) > 0:
assign( "pages", new_md.pages )
self.characters = None
self.teams = None
self.locations = None
def overlayCredits( self, new_credits ):
for c in new_credits:
if c.has_key('primary') and c['primary']:
primary = True
else:
primary = False
self.credits = list()
self.tags = list()
self.pages = list()
# Remove credit role if person is blank
if c['person'] == "":
for r in reversed(self.credits):
if r['role'].lower() == c['role'].lower():
self.credits.remove(r)
# otherwise, add it!
else:
self.addCredit( c['person'], c['role'], primary )
def setDefaultPageList( self, count ):
# generate a default page list, with the first page marked as the cover
for i in range(count):
page_dict = dict()
page_dict['Image'] = str(i)
if i == 0:
page_dict['Type'] = PageType.FrontCover
self.pages.append( page_dict )
# Some CoMet-only items
self.price = None
self.isVersionOf = None
self.rights = None
self.identifier = None
self.lastMark = None
self.coverImage = None
def getArchivePageIndex( self, pagenum ):
# convert the displayed page number to the page index of the file in the archive
if pagenum < len( self.pages ):
return int( self.pages[pagenum]['Image'] )
else:
return 0
def getCoverPageIndexList( self ):
# return a list of archive page indices of cover pages
coverlist = []
for p in self.pages:
if 'Type' in p and p['Type'] == PageType.FrontCover:
coverlist.append( int(p['Image']))
if len(coverlist) == 0:
coverlist.append( 0 )
return coverlist
def addCredit( self, person, role, primary = False ):
credit = dict()
credit['person'] = person
credit['role'] = role
if primary:
credit['primary'] = primary
def overlay(self, new_md):
"""Overlay a metadata object on this one
# look to see if it's not already there...
found = False
for c in self.credits:
if ( c['person'].lower() == person.lower() and
c['role'].lower() == role.lower() ):
# no need to add it. just adjust the "primary" flag as needed
c['primary'] = primary
found = True
break
if not found:
self.credits.append(credit)
That is, when the new object has non-None values, over-write them
to this one.
"""
def __str__( self ):
vals = []
if self.isEmpty:
return "No metadata"
def assign(cur, new):
if new is not None:
if isinstance(new, str) and len(new) == 0:
setattr(self, cur, None)
else:
setattr(self, cur, new)
def add_string( tag, val ):
if val is not None and u"{0}".format(val) != "":
vals.append( (tag, val) )
if not new_md.isEmpty:
self.isEmpty = False
def add_attr_string( tag ):
val = getattr(self,tag)
add_string( tag, getattr(self,tag) )
assign('series', new_md.series)
assign("issue", new_md.issue)
assign("issueCount", new_md.issueCount)
assign("title", new_md.title)
assign("publisher", new_md.publisher)
assign("day", new_md.day)
assign("month", new_md.month)
assign("year", new_md.year)
assign("volume", new_md.volume)
assign("volumeCount", new_md.volumeCount)
assign("genre", new_md.genre)
assign("language", new_md.language)
assign("country", new_md.country)
assign("criticalRating", new_md.criticalRating)
assign("alternateSeries", new_md.alternateSeries)
assign("alternateNumber", new_md.alternateNumber)
assign("alternateCount", new_md.alternateCount)
assign("imprint", new_md.imprint)
assign("webLink", new_md.webLink)
assign("format", new_md.format)
assign("manga", new_md.manga)
assign("blackAndWhite", new_md.blackAndWhite)
assign("maturityRating", new_md.maturityRating)
assign("storyArc", new_md.storyArc)
assign("seriesGroup", new_md.seriesGroup)
assign("scanInfo", new_md.scanInfo)
assign("characters", new_md.characters)
assign("teams", new_md.teams)
assign("locations", new_md.locations)
assign("comments", new_md.comments)
assign("notes", new_md.notes)
add_attr_string( "series" )
add_attr_string( "issue" )
add_attr_string( "issueCount" )
add_attr_string( "title" )
add_attr_string( "publisher" )
add_attr_string( "year" )
add_attr_string( "month" )
add_attr_string( "day" )
add_attr_string( "volume" )
add_attr_string( "volumeCount" )
add_attr_string( "genre" )
add_attr_string( "language" )
add_attr_string( "country" )
add_attr_string( "criticalRating" )
add_attr_string( "alternateSeries" )
add_attr_string( "alternateNumber" )
add_attr_string( "alternateCount" )
add_attr_string( "imprint" )
add_attr_string( "webLink" )
add_attr_string( "format" )
add_attr_string( "manga" )
assign("price", new_md.price)
assign("isVersionOf", new_md.isVersionOf)
assign("rights", new_md.rights)
assign("identifier", new_md.identifier)
assign("lastMark", new_md.lastMark)
add_attr_string( "price" )
add_attr_string( "isVersionOf" )
add_attr_string( "rights" )
add_attr_string( "identifier" )
add_attr_string( "lastMark" )
if self.blackAndWhite:
add_attr_string( "blackAndWhite" )
add_attr_string( "maturityRating" )
add_attr_string( "storyArc" )
add_attr_string( "seriesGroup" )
add_attr_string( "scanInfo" )
add_attr_string( "characters" )
add_attr_string( "teams" )
add_attr_string( "locations" )
add_attr_string( "comments" )
add_attr_string( "notes" )
add_string( "tags", utils.listToString( self.tags ) )
for c in self.credits:
primary = ""
if c.has_key('primary') and c['primary']:
primary = " [P]"
add_string( "credit", c['role']+": "+c['person'] + primary)
# find the longest field name
flen = 0
for i in vals:
flen = max( flen, len(i[0]) )
flen += 1
#format the data nicely
outstr = ""
fmt_str = u"{0: <" + str(flen) + "} {1}\n"
for i in vals:
outstr += fmt_str.format( i[0]+":", i[1] )
return outstr
self.overlayCredits(new_md.credits)
# TODO
# not sure if the tags and pages should broken down, or treated
# as whole lists....
# For now, go the easy route, where any overlay
# value wipes out the whole list
if len(new_md.tags) > 0:
assign("tags", new_md.tags)
if len(new_md.pages) > 0:
assign("pages", new_md.pages)
def overlayCredits(self, new_credits):
for c in new_credits:
if 'primary' in c and c['primary']:
primary = True
else:
primary = False
# Remove credit role if person is blank
if c['person'] == "":
for r in reversed(self.credits):
if r['role'].lower() == c['role'].lower():
self.credits.remove(r)
# otherwise, add it!
else:
self.addCredit(c['person'], c['role'], primary)
def setDefaultPageList(self, count):
# generate a default page list, with the first page marked as the cover
for i in range(count):
page_dict = dict()
page_dict['Image'] = str(i)
if i == 0:
page_dict['Type'] = PageType.FrontCover
self.pages.append(page_dict)
def getArchivePageIndex(self, pagenum):
# convert the displayed page number to the page index of the file in
# the archive
if pagenum < len(self.pages):
return int(self.pages[pagenum]['Image'])
else:
return 0
def getCoverPageIndexList(self):
# return a list of archive page indices of cover pages
coverlist = []
for p in self.pages:
if 'Type' in p and p['Type'] == PageType.FrontCover:
coverlist.append(int(p['Image']))
if len(coverlist) == 0:
coverlist.append(0)
return coverlist
def addCredit(self, person, role, primary=False):
credit = dict()
credit['person'] = person
credit['role'] = role
if primary:
credit['primary'] = primary
# look to see if it's not already there...
found = False
for c in self.credits:
if (c['person'].lower() == person.lower() and
c['role'].lower() == role.lower()):
# no need to add it. just adjust the "primary" flag as needed
c['primary'] = primary
found = True
break
if not found:
self.credits.append(credit)
def __str__(self):
vals = []
if self.isEmpty:
return "No metadata"
def add_string(tag, val):
if val is not None and u"{0}".format(val) != "":
vals.append((tag, val))
def add_attr_string(tag):
val = getattr(self, tag)
add_string(tag, getattr(self, tag))
add_attr_string("series")
add_attr_string("issue")
add_attr_string("issueCount")
add_attr_string("title")
add_attr_string("publisher")
add_attr_string("year")
add_attr_string("month")
add_attr_string("day")
add_attr_string("volume")
add_attr_string("volumeCount")
add_attr_string("genre")
add_attr_string("language")
add_attr_string("country")
add_attr_string("criticalRating")
add_attr_string("alternateSeries")
add_attr_string("alternateNumber")
add_attr_string("alternateCount")
add_attr_string("imprint")
add_attr_string("webLink")
add_attr_string("format")
add_attr_string("manga")
add_attr_string("price")
add_attr_string("isVersionOf")
add_attr_string("rights")
add_attr_string("identifier")
add_attr_string("lastMark")
if self.blackAndWhite:
add_attr_string("blackAndWhite")
add_attr_string("maturityRating")
add_attr_string("storyArc")
add_attr_string("seriesGroup")
add_attr_string("scanInfo")
add_attr_string("characters")
add_attr_string("teams")
add_attr_string("locations")
add_attr_string("comments")
add_attr_string("notes")
add_string("tags", utils.listToString(self.tags))
for c in self.credits:
primary = ""
if 'primary' in c and c['primary']:
primary = " [P]"
add_string("credit", c['role'] + ": " + c['person'] + primary)
# find the longest field name
flen = 0
for i in vals:
flen = max(flen, len(i[0]))
flen += 1
# format the data nicely
outstr = ""
fmt_str = u"{0: <" + str(flen) + "} {1}\n"
for i in vals:
outstr += fmt_str.format(i[0] + ":", i[1])
return outstr

View File

@ -1,140 +1,133 @@
# coding=utf-8
"""
Class for handling the odd permutations of an 'issue number' that the comics industry throws at us
e.g.:
"12"
"12.1"
"0"
"-1"
"5AU"
"100-2"
"""Support for mixed digit/string type Issue field
Class for handling the odd permutations of an 'issue number' that the
comics industry throws at us.
e.g.: "12", "12.1", "0", "-1", "5AU", "100-2"
"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#import utils
#import math
#import re
import utils
import math
import re
class IssueString:
def __init__(self, text):
# break up the issue number string into 2 parts: the numeric and suffix string.
# ( assumes that the numeric portion is always first )
self.num = None
self.suffix = ""
if text is None:
return
def __init__(self, text):
if type(text) == int:
text = str(text)
# break up the issue number string into 2 parts: the numeric and suffix string.
# (assumes that the numeric portion is always first)
if len(text) == 0:
return
text = unicode(text)
#skip the minus sign if it's first
if text[0] == '-':
start = 1
else:
start = 0
self.num = None
self.suffix = ""
# if it's still not numeric at start skip it
if text[start].isdigit() or text[start] == ".":
# walk through the string, look for split point (the first non-numeric)
decimal_count = 0
for idx in range( start, len(text) ):
if text[idx] not in "0123456789.":
break
# special case: also split on second "."
if text[idx] == ".":
decimal_count += 1
if decimal_count > 1:
break
else:
idx = len(text)
# move trailing numeric decimal to suffix
# (only if there is other junk after )
if text[idx-1] == "." and len(text) != idx:
idx = idx -1
# if there is no numeric after the minus, make the minus part of the suffix
if idx == 1 and start == 1:
idx = 0
part1 = text[0:idx]
part2 = text[idx:len(text)]
if part1 != "":
self.num = float( part1 )
self.suffix = part2
else:
self.suffix = text
#print "num: {0} suf: {1}".format(self.num, self.suffix)
if text is None:
return
def asString( self, pad = 0 ):
#return the float, left side zero-padded, with suffix attached
if self.num is None:
return self.suffix
negative = self.num < 0
if isinstance(text, int):
text = str(text)
num_f = abs(self.num)
num_int = int( num_f )
num_s = str( num_int )
if float( num_int ) != num_f:
num_s = str( num_f )
num_s += self.suffix
# create padding
padding = ""
l = len( str(num_int))
if l < pad :
padding = "0" * (pad - l)
num_s = padding + num_s
if negative:
num_s = "-" + num_s
if len(text) == 0:
return
return num_s
def asFloat( self ):
#return the float, with no suffix
if self.suffix == u"½":
if self.num is not None:
return self.num + .5
else:
return .5
return self.num
def asInt( self ):
#return the int version of the float
if self.num is None:
return None
return int( self.num )
text = unicode(text)
# skip the minus sign if it's first
if text[0] == '-':
start = 1
else:
start = 0
# if it's still not numeric at start skip it
if text[start].isdigit() or text[start] == ".":
# walk through the string, look for split point (the first
# non-numeric)
decimal_count = 0
for idx in range(start, len(text)):
if text[idx] not in "0123456789.":
break
# special case: also split on second "."
if text[idx] == ".":
decimal_count += 1
if decimal_count > 1:
break
else:
idx = len(text)
# move trailing numeric decimal to suffix
# (only if there is other junk after )
if text[idx - 1] == "." and len(text) != idx:
idx = idx - 1
# if there is no numeric after the minus, make the minus part of
# the suffix
if idx == 1 and start == 1:
idx = 0
part1 = text[0:idx]
part2 = text[idx:len(text)]
if part1 != "":
self.num = float(part1)
self.suffix = part2
else:
self.suffix = text
# print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString(self, pad=0):
# return the float, left side zero-padded, with suffix attached
if self.num is None:
return self.suffix
negative = self.num < 0
num_f = abs(self.num)
num_int = int(num_f)
num_s = str(num_int)
if float(num_int) != num_f:
num_s = str(num_f)
num_s += self.suffix
# create padding
padding = ""
l = len(str(num_int))
if l < pad:
padding = "0" * (pad - l)
num_s = padding + num_s
if negative:
num_s = "-" + num_s
return num_s
def asFloat(self):
# return the float, with no suffix
if self.suffix == u"½":
if self.num is not None:
return self.num + .5
else:
return .5
return self.num
def asInt(self):
# return the int version of the float
if self.num is None:
return None
return int(self.num)

File diff suppressed because it is too large Load Diff