Merge branch 'fcanc-master'

This commit is contained in:
davide-romanini 2015-03-01 15:44:11 +01:00
commit d84110ccb2
8 changed files with 2037 additions and 1960 deletions

View File

@ -1,122 +1,122 @@
""" """A class to encapsulate CoMet data"""
A python class to encapsulate CoMet data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
class CoMet: class CoMet:
writer_synonyms = ['writer', 'plotter', 'scripter'] writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = [ 'inker', 'artist', 'finishes' ] inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = [ 'letterer'] letterer_synonyms = ['letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = [ 'editor'] editor_synonyms = ['editor']
def metadataFromString( self, string ): def metadataFromString(self, string):
tree = ET.ElementTree(ET.fromstring( string )) tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata( tree ) return self.convertXMLToMetadata(tree)
def stringFromMetadata( self, metadata ): def stringFromMetadata(self, metadata):
header = '<?xml version="1.0" encoding="UTF-8"?>\n' header = '<?xml version="1.0" encoding="UTF-8"?>\n'
tree = self.convertMetadataToXML( self, metadata ) tree = self.convertMetadataToXML(self, metadata)
return header + ET.tostring(tree.getroot()) return header + ET.tostring(tree.getroot())
def indent( self, elem, level=0 ): def indent(self, elem, level=0):
# for making the XML output readable # for making the XML output readable
i = "\n" + level*" " i = "\n" + level * " "
if len(elem): if len(elem):
if not elem.text or not elem.text.strip(): if not elem.text or not elem.text.strip():
elem.text = i + " " elem.text = i + " "
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
for elem in elem: for elem in elem:
self.indent( elem, level+1 ) self.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
else: else:
if level and (not elem.tail or not elem.tail.strip()): if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i elem.tail = i
def convertMetadataToXML( self, filename, metadata ): def convertMetadataToXML(self, filename, metadata):
#shorthand for the metadata # shorthand for the metadata
md = metadata md = metadata
# build a tree structure # build a tree structure
root = ET.Element("comet") root = ET.Element("comet")
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/" root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" root.attrib[
'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
#helper func # helper func
def assign( comet_entry, md_entry): def assign(comet_entry, md_entry):
if md_entry is not None: if md_entry is not None:
ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry) ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
# title is manditory # title is manditory
if md.title is None: if md.title is None:
md.title = "" md.title = ""
assign( 'title', md.title ) assign('title', md.title)
assign( 'series', md.series ) assign('series', md.series)
assign( 'issue', md.issue ) #must be int?? assign('issue', md.issue) # must be int??
assign( 'volume', md.volume ) assign('volume', md.volume)
assign( 'description', md.comments ) assign('description', md.comments)
assign( 'publisher', md.publisher ) assign('publisher', md.publisher)
assign( 'pages', md.pageCount ) assign('pages', md.pageCount)
assign( 'format', md.format ) assign('format', md.format)
assign( 'language', md.language ) assign('language', md.language)
assign( 'rating', md.maturityRating ) assign('rating', md.maturityRating)
assign( 'price', md.price ) assign('price', md.price)
assign( 'isVersionOf', md.isVersionOf ) assign('isVersionOf', md.isVersionOf)
assign( 'rights', md.rights ) assign('rights', md.rights)
assign( 'identifier', md.identifier ) assign('identifier', md.identifier)
assign( 'lastMark', md.lastMark ) assign('lastMark', md.lastMark)
assign( 'genre', md.genre ) # TODO repeatable assign('genre', md.genre) # TODO repeatable
if md.characters is not None: if md.characters is not None:
char_list = [ c.strip() for c in md.characters.split(',') ] char_list = [c.strip() for c in md.characters.split(',')]
for c in char_list: for c in char_list:
assign( 'character', c ) assign('character', c)
if md.manga is not None and md.manga == "YesAndRightToLeft": if md.manga is not None and md.manga == "YesAndRightToLeft":
assign( 'readingDirection', "rtl") assign('readingDirection', "rtl")
date_str = "" date_str = ""
if md.year is not None: if md.year is not None:
date_str = str(md.year).zfill(4) date_str = str(md.year).zfill(4)
if md.month is not None: if md.month is not None:
date_str += "-" + str(md.month).zfill(2) date_str += "-" + str(md.month).zfill(2)
assign( 'date', date_str ) assign('date', date_str)
assign( 'coverImage', md.coverImage ) assign('coverImage', md.coverImage)
# need to specially process the credits, since they are structured differently than CIX # need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list() credit_writer_list = list()
credit_penciller_list = list() credit_penciller_list = list()
credit_inker_list = list() credit_inker_list = list()
@ -128,27 +128,47 @@ class CoMet:
# loop thru credits, and build a list for each role that CoMet supports # loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits: for credit in metadata.credits:
if credit['role'].lower() in set( self.writer_synonyms ): if credit['role'].lower() in set(self.writer_synonyms):
ET.SubElement(root, 'writer').text = u"{0}".format(credit['person']) ET.SubElement(
root,
'writer').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set( self.penciller_synonyms ): if credit['role'].lower() in set(self.penciller_synonyms):
ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person']) ET.SubElement(
root,
'penciller').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set( self.inker_synonyms ): if credit['role'].lower() in set(self.inker_synonyms):
ET.SubElement(root, 'inker').text = u"{0}".format(credit['person']) ET.SubElement(
root,
'inker').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set( self.colorist_synonyms ): if credit['role'].lower() in set(self.colorist_synonyms):
ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person']) ET.SubElement(
root,
'colorist').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set( self.letterer_synonyms ): if credit['role'].lower() in set(self.letterer_synonyms):
ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person']) ET.SubElement(
root,
'letterer').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set( self.cover_synonyms ): if credit['role'].lower() in set(self.cover_synonyms):
ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person']) ET.SubElement(
root,
if credit['role'].lower() in set( self.editor_synonyms ): 'coverDesigner').text = u"{0}".format(
ET.SubElement(root, 'editor').text = u"{0}".format(credit['person']) credit['person'])
if credit['role'].lower() in set(self.editor_synonyms):
ET.SubElement(
root,
'editor').text = u"{0}".format(
credit['person'])
# self pretty-print # self pretty-print
self.indent(root) self.indent(root)
@ -157,8 +177,7 @@ class CoMet:
tree = ET.ElementTree(root) tree = ET.ElementTree(root)
return tree return tree
def convertXMLToMetadata(self, tree):
def convertXMLToMetadata( self, tree ):
root = tree.getroot() root = tree.getroot()
@ -170,41 +189,41 @@ class CoMet:
md = metadata md = metadata
# Helper function # Helper function
def xlate( tag ): def xlate(tag):
node = root.find( tag ) node = root.find(tag)
if node is not None: if node is not None:
return node.text return node.text
else: else:
return None return None
md.series = xlate( 'series' ) md.series = xlate('series')
md.title = xlate( 'title' ) md.title = xlate('title')
md.issue = xlate( 'issue' ) md.issue = xlate('issue')
md.volume = xlate( 'volume' ) md.volume = xlate('volume')
md.comments = xlate( 'description' ) md.comments = xlate('description')
md.publisher = xlate( 'publisher' ) md.publisher = xlate('publisher')
md.language = xlate( 'language' ) md.language = xlate('language')
md.format = xlate( 'format' ) md.format = xlate('format')
md.pageCount = xlate( 'pages' ) md.pageCount = xlate('pages')
md.maturityRating = xlate( 'rating' ) md.maturityRating = xlate('rating')
md.price = xlate( 'price' ) md.price = xlate('price')
md.isVersionOf = xlate( 'isVersionOf' ) md.isVersionOf = xlate('isVersionOf')
md.rights = xlate( 'rights' ) md.rights = xlate('rights')
md.identifier = xlate( 'identifier' ) md.identifier = xlate('identifier')
md.lastMark = xlate( 'lastMark' ) md.lastMark = xlate('lastMark')
md.genre = xlate( 'genre' ) # TODO - repeatable field md.genre = xlate('genre') # TODO - repeatable field
date = xlate( 'date' ) date = xlate('date')
if date is not None: if date is not None:
parts = date.split('-') parts = date.split('-')
if len( parts) > 0: if len(parts) > 0:
md.year = parts[0] md.year = parts[0]
if len( parts) > 1: if len(parts) > 1:
md.month = parts[1] md.month = parts[1]
md.coverImage = xlate( 'coverImage' ) md.coverImage = xlate('coverImage')
readingDirection = xlate( 'readingDirection' ) readingDirection = xlate('readingDirection')
if readingDirection is not None and readingDirection == "rtl": if readingDirection is not None and readingDirection == "rtl":
md.manga = "YesAndRightToLeft" md.manga = "YesAndRightToLeft"
@ -213,31 +232,30 @@ class CoMet:
for n in root: for n in root:
if n.tag == 'character': if n.tag == 'character':
char_list.append(n.text.strip()) char_list.append(n.text.strip())
md.characters = utils.listToString( char_list ) md.characters = utils.listToString(char_list)
# Now extract the credit info # Now extract the credit info
for n in root: for n in root:
if ( n.tag == 'writer' or if (n.tag == 'writer' or
n.tag == 'penciller' or n.tag == 'penciller' or
n.tag == 'inker' or n.tag == 'inker' or
n.tag == 'colorist' or n.tag == 'colorist' or
n.tag == 'letterer' or n.tag == 'letterer' or
n.tag == 'editor' n.tag == 'editor'
): ):
metadata.addCredit( n.text.strip(), n.tag.title() ) metadata.addCredit(n.text.strip(), n.tag.title())
if n.tag == 'coverDesigner': if n.tag == 'coverDesigner':
metadata.addCredit( n.text.strip(), "Cover" ) metadata.addCredit(n.text.strip(), "Cover")
metadata.isEmpty = False metadata.isEmpty = False
return metadata return metadata
#verify that the string actually contains CoMet data in XML format # verify that the string actually contains CoMet data in XML format
def validateString( self, string ): def validateString(self, string):
try: try:
tree = ET.ElementTree(ET.fromstring( string )) tree = ET.ElementTree(ET.fromstring(string))
root = tree.getroot() root = tree.getroot()
if root.tag != 'comet': if root.tag != 'comet':
raise Exception raise Exception
@ -246,15 +264,13 @@ class CoMet:
return True return True
def writeToExternalFile(self, filename, metadata):
def writeToExternalFile( self, filename, metadata ): tree = self.convertMetadataToXML(self, metadata)
# ET.dump(tree)
tree = self.convertMetadataToXML( self, metadata )
#ET.dump(tree)
tree.write(filename, encoding='utf-8') tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ): def readFromExternalFile(self, filename):
tree = ET.parse( filename )
return self.convertXMLToMetadata( tree )
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

File diff suppressed because it is too large Load Diff

View File

@ -1,67 +1,62 @@
""" """A class to encapsulate the ComicBookInfo data"""
A python class to encapsulate the ComicBookInfo data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json import json
from datetime import datetime from datetime import datetime
import zipfile #import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
#import ctversion #import ctversion
class ComicBookInfo: class ComicBookInfo:
def metadataFromString(self, string):
def metadataFromString( self, string ): cbi_container = json.loads(unicode(string, 'utf-8'))
cbi_container = json.loads( unicode(string, 'utf-8') )
metadata = GenericMetadata() metadata = GenericMetadata()
cbi = cbi_container[ 'ComicBookInfo/1.0' ] cbi = cbi_container['ComicBookInfo/1.0']
#helper func # helper func
# If item is not in CBI, return None # If item is not in CBI, return None
def xlate( cbi_entry): def xlate(cbi_entry):
if cbi_entry in cbi: if cbi_entry in cbi:
return cbi[cbi_entry] return cbi[cbi_entry]
else: else:
return None return None
metadata.series = xlate( 'series' ) metadata.series = xlate('series')
metadata.title = xlate( 'title' ) metadata.title = xlate('title')
metadata.issue = xlate( 'issue' ) metadata.issue = xlate('issue')
metadata.publisher = xlate( 'publisher' ) metadata.publisher = xlate('publisher')
metadata.month = xlate( 'publicationMonth' ) metadata.month = xlate('publicationMonth')
metadata.year = xlate( 'publicationYear' ) metadata.year = xlate('publicationYear')
metadata.issueCount = xlate( 'numberOfIssues' ) metadata.issueCount = xlate('numberOfIssues')
metadata.comments = xlate( 'comments' ) metadata.comments = xlate('comments')
metadata.credits = xlate( 'credits' ) metadata.credits = xlate('credits')
metadata.genre = xlate( 'genre' ) metadata.genre = xlate('genre')
metadata.volume = xlate( 'volume' ) metadata.volume = xlate('volume')
metadata.volumeCount = xlate( 'numberOfVolumes' ) metadata.volumeCount = xlate('numberOfVolumes')
metadata.language = xlate( 'language' ) metadata.language = xlate('language')
metadata.country = xlate( 'country' ) metadata.country = xlate('country')
metadata.criticalRating = xlate( 'rating' ) metadata.criticalRating = xlate('rating')
metadata.tags = xlate( 'tags' ) metadata.tags = xlate('tags')
# make sure credits and tags are at least empty lists and not None # make sure credits and tags are at least empty lists and not None
if metadata.credits is None: if metadata.credits is None:
@ -69,13 +64,13 @@ class ComicBookInfo:
if metadata.tags is None: if metadata.tags is None:
metadata.tags = [] metadata.tags = []
#need to massage the language string to be ISO # need to massage the language string to be ISO
if metadata.language is not None: if metadata.language is not None:
# reverse look-up # reverse look-up
pattern = metadata.language pattern = metadata.language
metadata.language = None metadata.language = None
for key in utils.getLanguageDict(): for key in utils.getLanguageDict():
if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'): if utils.getLanguageDict()[key] == pattern.encode('utf-8'):
metadata.language = key metadata.language = key
break break
@ -83,70 +78,67 @@ class ComicBookInfo:
return metadata return metadata
def stringFromMetadata( self, metadata ): def stringFromMetadata(self, metadata):
cbi_container = self.createJSONDictionary( metadata ) cbi_container = self.createJSONDictionary(metadata)
return json.dumps( cbi_container ) return json.dumps(cbi_container)
#verify that the string actually contains CBI data in JSON format def validateString(self, string):
def validateString( self, string ): """Verify that the string actually contains CBI data in JSON format"""
try: try:
cbi_container = json.loads( string ) cbi_container = json.loads(string)
except: except:
return False return False
return ( 'ComicBookInfo/1.0' in cbi_container ) return ('ComicBookInfo/1.0' in cbi_container)
def createJSONDictionary(self, metadata):
"""Create the dictionary that we will convert to JSON text"""
def createJSONDictionary( self, metadata ):
# Create the dictionary that we will convert to JSON text
cbi = dict() cbi = dict()
cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version, cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version,
'lastModified' : str(datetime.now()), 'lastModified': str(datetime.now()),
'ComicBookInfo/1.0' : cbi } 'ComicBookInfo/1.0': cbi}
#helper func # helper func
def assign( cbi_entry, md_entry): def assign(cbi_entry, md_entry):
if md_entry is not None: if md_entry is not None:
cbi[cbi_entry] = md_entry cbi[cbi_entry] = md_entry
#helper func # helper func
def toInt(s): def toInt(s):
i = None i = None
if type(s) in [ str, unicode, int ]: if type(s) in [str, unicode, int]:
try: try:
i = int(s) i = int(s)
except ValueError: except ValueError:
pass pass
return i return i
assign( 'series', metadata.series ) assign('series', metadata.series)
assign( 'title', metadata.title ) assign('title', metadata.title)
assign( 'issue', metadata.issue ) assign('issue', metadata.issue)
assign( 'publisher', metadata.publisher ) assign('publisher', metadata.publisher)
assign( 'publicationMonth', toInt(metadata.month) ) assign('publicationMonth', toInt(metadata.month))
assign( 'publicationYear', toInt(metadata.year) ) assign('publicationYear', toInt(metadata.year))
assign( 'numberOfIssues', toInt(metadata.issueCount) ) assign('numberOfIssues', toInt(metadata.issueCount))
assign( 'comments', metadata.comments ) assign('comments', metadata.comments)
assign( 'genre', metadata.genre ) assign('genre', metadata.genre)
assign( 'volume', toInt(metadata.volume) ) assign('volume', toInt(metadata.volume))
assign( 'numberOfVolumes', toInt(metadata.volumeCount) ) assign('numberOfVolumes', toInt(metadata.volumeCount))
assign( 'language', utils.getLanguageFromISO(metadata.language) ) assign('language', utils.getLanguageFromISO(metadata.language))
assign( 'country', metadata.country ) assign('country', metadata.country)
assign( 'rating', metadata.criticalRating ) assign('rating', metadata.criticalRating)
assign( 'credits', metadata.credits ) assign('credits', metadata.credits)
assign( 'tags', metadata.tags ) assign('tags', metadata.tags)
return cbi_container return cbi_container
def writeToExternalFile(self, filename, metadata):
def writeToExternalFile( self, filename, metadata ):
cbi_container = self.createJSONDictionary(metadata) cbi_container = self.createJSONDictionary(metadata)
f = open(filename, 'w') f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4)) f.write(json.dumps(cbi_container, indent=4))
f.close f.close

View File

@ -1,111 +1,110 @@
""" """A class to encapsulate ComicRack's ComicInfo.xml data"""
A python class to encapsulate ComicRack's ComicInfo.xml data
"""
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata from genericmetadata import GenericMetadata
import utils import utils
class ComicInfoXml: class ComicInfoXml:
writer_synonyms = ['writer', 'plotter', 'scripter'] writer_synonyms = ['writer', 'plotter', 'scripter']
penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns']
inker_synonyms = [ 'inker', 'artist', 'finishes' ] inker_synonyms = ['inker', 'artist', 'finishes']
colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer']
letterer_synonyms = [ 'letterer'] letterer_synonyms = ['letterer']
cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = [ 'editor'] editor_synonyms = ['editor']
def getParseableCredits(self):
def getParseableCredits( self ):
parsable_credits = [] parsable_credits = []
parsable_credits.extend( self.writer_synonyms ) parsable_credits.extend(self.writer_synonyms)
parsable_credits.extend( self.penciller_synonyms ) parsable_credits.extend(self.penciller_synonyms)
parsable_credits.extend( self.inker_synonyms ) parsable_credits.extend(self.inker_synonyms)
parsable_credits.extend( self.colorist_synonyms ) parsable_credits.extend(self.colorist_synonyms)
parsable_credits.extend( self.letterer_synonyms ) parsable_credits.extend(self.letterer_synonyms)
parsable_credits.extend( self.cover_synonyms ) parsable_credits.extend(self.cover_synonyms)
parsable_credits.extend( self.editor_synonyms ) parsable_credits.extend(self.editor_synonyms)
return parsable_credits return parsable_credits
def metadataFromString( self, string ): def metadataFromString(self, string):
tree = ET.ElementTree(ET.fromstring( string )) tree = ET.ElementTree(ET.fromstring(string))
return self.convertXMLToMetadata( tree ) return self.convertXMLToMetadata(tree)
def stringFromMetadata( self, metadata ): def stringFromMetadata(self, metadata):
header = '<?xml version="1.0"?>\n' header = '<?xml version="1.0"?>\n'
tree = self.convertMetadataToXML( self, metadata ) tree = self.convertMetadataToXML(self, metadata)
return header + ET.tostring(tree.getroot()) return header + ET.tostring(tree.getroot())
def indent( self, elem, level=0 ): def indent(self, elem, level=0):
# for making the XML output readable # for making the XML output readable
i = "\n" + level*" " i = "\n" + level * " "
if len(elem): if len(elem):
if not elem.text or not elem.text.strip(): if not elem.text or not elem.text.strip():
elem.text = i + " " elem.text = i + " "
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
for elem in elem: for elem in elem:
self.indent( elem, level+1 ) self.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip(): if not elem.tail or not elem.tail.strip():
elem.tail = i elem.tail = i
else: else:
if level and (not elem.tail or not elem.tail.strip()): if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i elem.tail = i
def convertMetadataToXML( self, filename, metadata ): def convertMetadataToXML(self, filename, metadata):
#shorthand for the metadata # shorthand for the metadata
md = metadata md = metadata
# build a tree structure # build a tree structure
root = ET.Element("ComicInfo") root = ET.Element("ComicInfo")
root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance" root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema" root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema"
#helper func # helper func
def assign( cix_entry, md_entry):
def assign(cix_entry, md_entry):
if md_entry is not None: if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
assign( 'Title', md.title ) assign('Title', md.title)
assign( 'Series', md.series ) assign('Series', md.series)
assign( 'Number', md.issue ) assign('Number', md.issue)
assign( 'Count', md.issueCount ) assign('Count', md.issueCount)
assign( 'Volume', md.volume ) assign('Volume', md.volume)
assign( 'AlternateSeries', md.alternateSeries ) assign('AlternateSeries', md.alternateSeries)
assign( 'AlternateNumber', md.alternateNumber ) assign('AlternateNumber', md.alternateNumber)
assign( 'StoryArc', md.storyArc ) assign('StoryArc', md.storyArc)
assign( 'SeriesGroup', md.seriesGroup ) assign('SeriesGroup', md.seriesGroup)
assign( 'AlternateCount', md.alternateCount ) assign('AlternateCount', md.alternateCount)
assign( 'Summary', md.comments ) assign('Summary', md.comments)
assign( 'Notes', md.notes ) assign('Notes', md.notes)
assign( 'Year', md.year ) assign('Year', md.year)
assign( 'Month', md.month ) assign('Month', md.month)
assign( 'Day', md.day ) assign('Day', md.day)
# need to specially process the credits, since they are structured differently than CIX # need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list() credit_writer_list = list()
credit_penciller_list = list() credit_penciller_list = list()
credit_inker_list = list() credit_inker_list = list()
@ -114,77 +113,78 @@ class ComicInfoXml:
credit_cover_list = list() credit_cover_list = list()
credit_editor_list = list() credit_editor_list = list()
# first, loop thru credits, and build a list for each role that CIX supports # first, loop thru credits, and build a list for each role that CIX
# supports
for credit in metadata.credits: for credit in metadata.credits:
if credit['role'].lower() in set( self.writer_synonyms ): if credit['role'].lower() in set(self.writer_synonyms):
credit_writer_list.append(credit['person'].replace(",","")) credit_writer_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.penciller_synonyms ): if credit['role'].lower() in set(self.penciller_synonyms):
credit_penciller_list.append(credit['person'].replace(",","")) credit_penciller_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.inker_synonyms ): if credit['role'].lower() in set(self.inker_synonyms):
credit_inker_list.append(credit['person'].replace(",","")) credit_inker_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.colorist_synonyms ): if credit['role'].lower() in set(self.colorist_synonyms):
credit_colorist_list.append(credit['person'].replace(",","")) credit_colorist_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.letterer_synonyms ): if credit['role'].lower() in set(self.letterer_synonyms):
credit_letterer_list.append(credit['person'].replace(",","")) credit_letterer_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.cover_synonyms ): if credit['role'].lower() in set(self.cover_synonyms):
credit_cover_list.append(credit['person'].replace(",","")) credit_cover_list.append(credit['person'].replace(",", ""))
if credit['role'].lower() in set( self.editor_synonyms ): if credit['role'].lower() in set(self.editor_synonyms):
credit_editor_list.append(credit['person'].replace(",","")) credit_editor_list.append(credit['person'].replace(",", ""))
# second, convert each list to string, and add to XML struct # second, convert each list to string, and add to XML struct
if len( credit_writer_list ) > 0: if len(credit_writer_list) > 0:
node = ET.SubElement(root, 'Writer') node = ET.SubElement(root, 'Writer')
node.text = utils.listToString( credit_writer_list ) node.text = utils.listToString(credit_writer_list)
if len( credit_penciller_list ) > 0: if len(credit_penciller_list) > 0:
node = ET.SubElement(root, 'Penciller') node = ET.SubElement(root, 'Penciller')
node.text = utils.listToString( credit_penciller_list ) node.text = utils.listToString(credit_penciller_list)
if len( credit_inker_list ) > 0: if len(credit_inker_list) > 0:
node = ET.SubElement(root, 'Inker') node = ET.SubElement(root, 'Inker')
node.text = utils.listToString( credit_inker_list ) node.text = utils.listToString(credit_inker_list)
if len( credit_colorist_list ) > 0: if len(credit_colorist_list) > 0:
node = ET.SubElement(root, 'Colorist') node = ET.SubElement(root, 'Colorist')
node.text = utils.listToString( credit_colorist_list ) node.text = utils.listToString(credit_colorist_list)
if len( credit_letterer_list ) > 0: if len(credit_letterer_list) > 0:
node = ET.SubElement(root, 'Letterer') node = ET.SubElement(root, 'Letterer')
node.text = utils.listToString( credit_letterer_list ) node.text = utils.listToString(credit_letterer_list)
if len( credit_cover_list ) > 0: if len(credit_cover_list) > 0:
node = ET.SubElement(root, 'CoverArtist') node = ET.SubElement(root, 'CoverArtist')
node.text = utils.listToString( credit_cover_list ) node.text = utils.listToString(credit_cover_list)
if len( credit_editor_list ) > 0: if len(credit_editor_list) > 0:
node = ET.SubElement(root, 'Editor') node = ET.SubElement(root, 'Editor')
node.text = utils.listToString( credit_editor_list ) node.text = utils.listToString(credit_editor_list)
assign( 'Publisher', md.publisher ) assign('Publisher', md.publisher)
assign( 'Imprint', md.imprint ) assign('Imprint', md.imprint)
assign( 'Genre', md.genre ) assign('Genre', md.genre)
assign( 'Web', md.webLink ) assign('Web', md.webLink)
assign( 'PageCount', md.pageCount ) assign('PageCount', md.pageCount)
assign( 'LanguageISO', md.language ) assign('LanguageISO', md.language)
assign( 'Format', md.format ) assign('Format', md.format)
assign( 'AgeRating', md.maturityRating ) assign('AgeRating', md.maturityRating)
if md.blackAndWhite is not None and md.blackAndWhite: if md.blackAndWhite is not None and md.blackAndWhite:
ET.SubElement(root, 'BlackAndWhite').text = "Yes" ET.SubElement(root, 'BlackAndWhite').text = "Yes"
assign( 'Manga', md.manga ) assign('Manga', md.manga)
assign( 'Characters', md.characters ) assign('Characters', md.characters)
assign( 'Teams', md.teams ) assign('Teams', md.teams)
assign( 'Locations', md.locations ) assign('Locations', md.locations)
assign( 'ScanInformation', md.scanInfo ) assign('ScanInformation', md.scanInfo)
# loop and add the page entries under pages node # loop and add the page entries under pages node
if len( md.pages ) > 0: if len(md.pages) > 0:
pages_node = ET.SubElement(root, 'Pages') pages_node = ET.SubElement(root, 'Pages')
for page_dict in md.pages: for page_dict in md.pages:
page_node = ET.SubElement(pages_node, 'Page') page_node = ET.SubElement(pages_node, 'Page')
@ -197,8 +197,7 @@ class ComicInfoXml:
tree = ET.ElementTree(root) tree = ET.ElementTree(root)
return tree return tree
def convertXMLToMetadata(self, tree):
def convertXMLToMetadata( self, tree ):
root = tree.getroot() root = tree.getroot()
@ -209,51 +208,50 @@ class ComicInfoXml:
metadata = GenericMetadata() metadata = GenericMetadata()
md = metadata md = metadata
# Helper function # Helper function
def xlate( tag ): def xlate(tag):
node = root.find( tag ) node = root.find(tag)
if node is not None: if node is not None:
return node.text return node.text
else: else:
return None return None
md.series = xlate( 'Series' ) md.series = xlate('Series')
md.title = xlate( 'Title' ) md.title = xlate('Title')
md.issue = xlate( 'Number' ) md.issue = xlate('Number')
md.issueCount = xlate( 'Count' ) md.issueCount = xlate('Count')
md.volume = xlate( 'Volume' ) md.volume = xlate('Volume')
md.alternateSeries = xlate( 'AlternateSeries' ) md.alternateSeries = xlate('AlternateSeries')
md.alternateNumber = xlate( 'AlternateNumber' ) md.alternateNumber = xlate('AlternateNumber')
md.alternateCount = xlate( 'AlternateCount' ) md.alternateCount = xlate('AlternateCount')
md.comments = xlate( 'Summary' ) md.comments = xlate('Summary')
md.notes = xlate( 'Notes' ) md.notes = xlate('Notes')
md.year = xlate( 'Year' ) md.year = xlate('Year')
md.month = xlate( 'Month' ) md.month = xlate('Month')
md.day = xlate( 'Day' ) md.day = xlate('Day')
md.publisher = xlate( 'Publisher' ) md.publisher = xlate('Publisher')
md.imprint = xlate( 'Imprint' ) md.imprint = xlate('Imprint')
md.genre = xlate( 'Genre' ) md.genre = xlate('Genre')
md.webLink = xlate( 'Web' ) md.webLink = xlate('Web')
md.language = xlate( 'LanguageISO' ) md.language = xlate('LanguageISO')
md.format = xlate( 'Format' ) md.format = xlate('Format')
md.manga = xlate( 'Manga' ) md.manga = xlate('Manga')
md.characters = xlate( 'Characters' ) md.characters = xlate('Characters')
md.teams = xlate( 'Teams' ) md.teams = xlate('Teams')
md.locations = xlate( 'Locations' ) md.locations = xlate('Locations')
md.pageCount = xlate( 'PageCount' ) md.pageCount = xlate('PageCount')
md.scanInfo = xlate( 'ScanInformation' ) md.scanInfo = xlate('ScanInformation')
md.storyArc = xlate( 'StoryArc' ) md.storyArc = xlate('StoryArc')
md.seriesGroup = xlate( 'SeriesGroup' ) md.seriesGroup = xlate('SeriesGroup')
md.maturityRating = xlate( 'AgeRating' ) md.maturityRating = xlate('AgeRating')
tmp = xlate( 'BlackAndWhite' ) tmp = xlate('BlackAndWhite')
md.blackAndWhite = False md.blackAndWhite = False
if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: if tmp is not None and tmp.lower() in ["yes", "true", "1"]:
md.blackAndWhite = True md.blackAndWhite = True
# Now extract the credit info # Now extract the credit info
for n in root: for n in root:
if ( n.tag == 'Writer' or if (n.tag == 'Writer' or
n.tag == 'Penciller' or n.tag == 'Penciller' or
n.tag == 'Inker' or n.tag == 'Inker' or
n.tag == 'Colorist' or n.tag == 'Colorist' or
@ -262,32 +260,31 @@ class ComicInfoXml:
): ):
if n.text is not None: if n.text is not None:
for name in n.text.split(','): for name in n.text.split(','):
metadata.addCredit( name.strip(), n.tag ) metadata.addCredit(name.strip(), n.tag)
if n.tag == 'CoverArtist': if n.tag == 'CoverArtist':
if n.text is not None: if n.text is not None:
for name in n.text.split(','): for name in n.text.split(','):
metadata.addCredit( name.strip(), "Cover" ) metadata.addCredit(name.strip(), "Cover")
# parse page data now # parse page data now
pages_node = root.find( "Pages" ) pages_node = root.find("Pages")
if pages_node is not None: if pages_node is not None:
for page in pages_node: for page in pages_node:
metadata.pages.append( page.attrib ) metadata.pages.append(page.attrib)
#print page.attrib # print page.attrib
metadata.isEmpty = False metadata.isEmpty = False
return metadata return metadata
def writeToExternalFile( self, filename, metadata ): def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML( self, metadata ) tree = self.convertMetadataToXML(self, metadata)
#ET.dump(tree) # ET.dump(tree)
tree.write(filename, encoding='utf-8') tree.write(filename, encoding='utf-8')
def readFromExternalFile( self, filename ): def readFromExternalFile(self, filename):
tree = ET.parse( filename )
return self.convertXMLToMetadata( tree )
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

View File

@ -1,26 +1,21 @@
""" """Functions for parsing comic info from filename
Functions for parsing comic info from filename
This should probably be re-written, but, well, it mostly works! This should probably be re-written, but, well, it mostly works!
""" """
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Some portions of this code were modified from pyComicMetaThis project # Some portions of this code were modified from pyComicMetaThis project
# http://code.google.com/p/pycomicmetathis/ # http://code.google.com/p/pycomicmetathis/
@ -29,27 +24,27 @@ import re
import os import os
from urllib import unquote from urllib import unquote
class FileNameParser: class FileNameParser:
def repl(self, m): def repl(self, m):
return ' ' * len(m.group()) return ' ' * len(m.group())
def fixSpaces( self, string, remove_dashes=True ): def fixSpaces(self, string, remove_dashes=True):
if remove_dashes: if remove_dashes:
placeholders = ['[-_]',' +'] placeholders = ['[-_]', ' +']
else: else:
placeholders = ['[_]',' +'] placeholders = ['[_]', ' +']
for ph in placeholders: for ph in placeholders:
string = re.sub(ph, self.repl, string ) string = re.sub(ph, self.repl, string)
return string #.strip() return string # .strip()
def getIssueCount(self, filename, issue_end):
def getIssueCount( self,filename, issue_end ):
count = "" count = ""
filename = filename[issue_end:] filename = filename[issue_end:]
# replace any name seperators with spaces # replace any name separators with spaces
tmpstr = self.fixSpaces(filename) tmpstr = self.fixSpaces(filename)
found = False found = False
@ -64,43 +59,46 @@ class FileNameParser:
count = match.group() count = match.group()
found = True found = True
count = count.lstrip("0") count = count.lstrip("0")
return count return count
def getIssueNumber( self, filename ): def getIssueNumber(self, filename):
"""Returns a tuple of issue number string, and start and end indexes in the filename
# Returns a tuple of issue number string, and start and end indexs in the filename (The indexes will be used to split the string up for further parsing)
# (The indexes will be used to split the string up for further parsing) """
found = False found = False
issue = '' issue = ''
start = 0 start = 0
end = 0 end = 0
# first, look for multiple "--", this means it's formatted differently from most: # first, look for multiple "--", this means it's formatted differently
# from most:
if "--" in filename: if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue # the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename) filename = re.sub("--.*", self.repl, filename)
elif "__" in filename: elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue # the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename) filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ") filename = filename.replace("+", " ")
# replace parenthetical phrases with spaces # replace parenthetical phrases with spaces
filename = re.sub( "\(.*?\)", self.repl, filename) filename = re.sub("\(.*?\)", self.repl, filename)
filename = re.sub( "\[.*?\]", self.repl, filename) filename = re.sub("\[.*?\]", self.repl, filename)
# replace any name seperators with spaces # replace any name separators with spaces
filename = self.fixSpaces(filename) filename = self.fixSpaces(filename)
# remove any "of NN" phrase with spaces (problem: this could break on some titles) # remove any "of NN" phrase with spaces (problem: this could break on
filename = re.sub( "of [\d]+", self.repl, filename) # some titles)
filename = re.sub("of [\d]+", self.repl, filename)
#print u"[{0}]".format(filename) # print u"[{0}]".format(filename)
# we should now have a cleaned up filename version with all the words in # we should now have a cleaned up filename version with all the words in
# the same positions as original filename # the same positions as original filename
@ -108,25 +106,26 @@ class FileNameParser:
# make a list of each word and its position # make a list of each word and its position
word_list = list() word_list = list()
for m in re.finditer("\S+", filename): for m in re.finditer("\S+", filename):
word_list.append( (m.group(0), m.start(), m.end()) ) word_list.append((m.group(0), m.start(), m.end()))
# remove the first word, since it can't be the issue number # remove the first word, since it can't be the issue number
if len(word_list) > 1: if len(word_list) > 1:
word_list = word_list[1:] word_list = word_list[1:]
else: else:
#only one word?? just bail. # only one word?? just bail.
return issue, start, end return issue, start, end
# Now try to search for the likely issue number word in the list # Now try to search for the likely issue number word in the list
# first look for a word with "#" followed by digits with optional sufix # first look for a word with "#" followed by digits with optional suffix
# this is almost certainly the issue number # this is almost certainly the issue number
for w in reversed(word_list): for w in reversed(word_list):
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True found = True
break break
# same as above but w/o a '#', and only look at the last word in the list # same as above but w/o a '#', and only look at the last word in the
# list
if not found: if not found:
w = word_list[-1] w = word_list[-1]
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
@ -148,20 +147,21 @@ class FileNameParser:
return issue, start, end return issue, start, end
def getSeriesName(self, filename, issue_start ): def getSeriesName(self, filename, issue_start):
"""Use the issue number string index to split the filename string"""
# use the issue number string index to split the filename string
if issue_start != 0: if issue_start != 0:
filename = filename[:issue_start] filename = filename[:issue_start]
# in case there is no issue number, remove some obvious stuff # in case there is no issue number, remove some obvious stuff
if "--" in filename: if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue # the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename) filename = re.sub("--.*", self.repl, filename)
elif "__" in filename: elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue # the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename) filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ") filename = filename.replace("+", " ")
@ -170,14 +170,14 @@ class FileNameParser:
series = tmpstr series = tmpstr
volume = "" volume = ""
#save the last word # save the last word
try: try:
last_word = series.split()[-1] last_word = series.split()[-1]
except: except:
last_word = "" last_word = ""
# remove any parenthetical phrases # remove any parenthetical phrases
series = re.sub( "\(.*?\)", "", series) series = re.sub("\(.*?\)", "", series)
# search for volume number # search for volume number
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
@ -188,7 +188,7 @@ class FileNameParser:
# if a volume wasn't found, see if the last word is a year in parentheses # if a volume wasn't found, see if the last word is a year in parentheses
# since that's a common way to designate the volume # since that's a common way to designate the volume
if volume == "": if volume == "":
#match either (YEAR), (YEAR-), or (YEAR-YEAR2) # match either (YEAR), (YEAR-), or (YEAR-YEAR2)
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
if match: if match:
volume = match.group(2) volume = match.group(2)
@ -199,7 +199,7 @@ class FileNameParser:
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
# be removed to help search online # be removed to help search online
if issue_start == 0: if issue_start == 0:
one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ] one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"]
try: try:
last_word = series.split()[-1] last_word = series.split()[-1]
if last_word.lower() in one_shot_words: if last_word.lower() in one_shot_words:
@ -209,7 +209,7 @@ class FileNameParser:
return series, volume.strip() return series, volume.strip()
def getYear( self,filename, issue_end): def getYear(self, filename, issue_end):
filename = filename[issue_end:] filename = filename[issue_end:]
@ -218,36 +218,38 @@ class FileNameParser:
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
if match: if match:
year = match.group() year = match.group()
# remove non-numerics # remove non-digits
year = re.sub("[^0-9]", "", year) year = re.sub("[^0-9]", "", year)
return year return year
def getRemainder( self, filename, year, count, volume, issue_end ): def getRemainder(self, filename, year, count, volume, issue_end):
"""Make a guess at where the the non-interesting stuff begins"""
#make a guess at where the the non-interesting stuff begins
remainder = "" remainder = ""
if "--" in filename: if "--" in filename:
remainder = filename.split("--",1)[1] remainder = filename.split("--", 1)[1]
elif "__" in filename: elif "__" in filename:
remainder = filename.split("__",1)[1] remainder = filename.split("__", 1)[1]
elif issue_end != 0: elif issue_end != 0:
remainder = filename[issue_end:] remainder = filename[issue_end:]
remainder = self.fixSpaces(remainder, remove_dashes=False) remainder = self.fixSpaces(remainder, remove_dashes=False)
if volume != "": if volume != "":
remainder = remainder.replace("Vol."+volume,"",1) remainder = remainder.replace("Vol." + volume, "", 1)
if year != "": if year != "":
remainder = remainder.replace(year,"",1) remainder = remainder.replace(year, "", 1)
if count != "": if count != "":
remainder = remainder.replace("of "+count,"",1) remainder = remainder.replace("of " + count, "", 1)
remainder = remainder.replace("()","") remainder = remainder.replace("()", "")
remainder = remainder.replace(" "," ") # cleans some whitespace mess remainder = remainder.replace(
" ",
" ") # cleans some whitespace mess
return remainder.strip() return remainder.strip()
def parseFilename( self, filename ): def parseFilename(self, filename):
# remove the path # remove the path
filename = os.path.basename(filename) filename = os.path.basename(filename)
@ -255,10 +257,10 @@ class FileNameParser:
# remove the extension # remove the extension
filename = os.path.splitext(filename)[0] filename = os.path.splitext(filename)[0]
#url decode, just in case # url decode, just in case
filename = unquote(filename) filename = unquote(filename)
# sometimes archives get messed up names from too many decodings # sometimes archives get messed up names from too many decodes
# often url encodings will break and leave "_28" and "_29" in place # often url encodings will break and leave "_28" and "_29" in place
# of "(" and ")" see if there are a number of these, and replace them # of "(" and ")" see if there are a number of these, and replace them
if filename.count("_28") > 1 and filename.count("_29") > 1: if filename.count("_28") > 1 and filename.count("_29") > 1:
@ -268,14 +270,18 @@ class FileNameParser:
self.issue, issue_start, issue_end = self.getIssueNumber(filename) self.issue, issue_start, issue_end = self.getIssueNumber(filename)
self.series, self.volume = self.getSeriesName(filename, issue_start) self.series, self.volume = self.getSeriesName(filename, issue_start)
# provides proper value when the filename doesn't have a issue number # provides proper value when the filename doesn't have a issue number
if issue_end == 0: if issue_end == 0:
issue_end=len(self.series) issue_end = len(self.series)
self.year = self.getYear(filename, issue_end) self.year = self.getYear(filename, issue_end)
self.issue_count = self.getIssueCount(filename, issue_end) self.issue_count = self.getIssueCount(filename, issue_end)
self.remainder = self.getRemainder( filename, self.year, self.issue_count, self.volume, issue_end ) self.remainder = self.getRemainder(
filename,
self.year,
self.issue_count,
self.volume,
issue_end)
if self.issue != "": if self.issue != "":
# strip off leading zeros # strip off leading zeros

View File

@ -1,32 +1,35 @@
""" """A class for internal metadata storage
A python class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various The goal of this class is to handle ALL the data that might come from various
tagging schemes and databases, such as ComicVine or GCD. This makes conversion tagging schemes and databases, such as ComicVine or GCD. This makes conversion
possible, however lossy it might be possible, however lossy it might be
""" """
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
import utils import utils
# These page info classes are exactly the same as the CIX scheme, since it's unique
class PageType: class PageType:
"""
These page info classes are exactly the same as the CIX scheme, since
it's unique
"""
FrontCover = "FrontCover" FrontCover = "FrontCover"
InnerCover = "InnerCover" InnerCover = "InnerCover"
Roundup = "Roundup" Roundup = "Roundup"
@ -50,6 +53,7 @@ class PageInfo:
ImageHeight = 0 ImageHeight = 0
""" """
class GenericMetadata: class GenericMetadata:
def __init__(self): def __init__(self):
@ -106,14 +110,16 @@ class GenericMetadata:
self.lastMark = None self.lastMark = None
self.coverImage = None self.coverImage = None
def overlay( self, new_md ): def overlay(self, new_md):
# Overlay a metadata object on this one """Overlay a metadata object on this one
# that is, when the new object has non-None
# values, over-write them to this one
def assign( cur, new ): That is, when the new object has non-None values, over-write them
to this one.
"""
def assign(cur, new):
if new is not None: if new is not None:
if type(new) == str and len(new) == 0: if isinstance(new, str) and len(new) == 0:
setattr(self, cur, None) setattr(self, cur, None)
else: else:
setattr(self, cur, new) setattr(self, cur, new)
@ -121,45 +127,45 @@ class GenericMetadata:
if not new_md.isEmpty: if not new_md.isEmpty:
self.isEmpty = False self.isEmpty = False
assign( 'series', new_md.series ) assign('series', new_md.series)
assign( "issue", new_md.issue ) assign("issue", new_md.issue)
assign( "issueCount", new_md.issueCount ) assign("issueCount", new_md.issueCount)
assign( "title", new_md.title ) assign("title", new_md.title)
assign( "publisher", new_md.publisher ) assign("publisher", new_md.publisher)
assign( "day", new_md.day ) assign("day", new_md.day)
assign( "month", new_md.month ) assign("month", new_md.month)
assign( "year", new_md.year ) assign("year", new_md.year)
assign( "volume", new_md.volume ) assign("volume", new_md.volume)
assign( "volumeCount", new_md.volumeCount ) assign("volumeCount", new_md.volumeCount)
assign( "genre", new_md.genre ) assign("genre", new_md.genre)
assign( "language", new_md.language ) assign("language", new_md.language)
assign( "country", new_md.country ) assign("country", new_md.country)
assign( "criticalRating", new_md.criticalRating ) assign("criticalRating", new_md.criticalRating)
assign( "alternateSeries", new_md.alternateSeries ) assign("alternateSeries", new_md.alternateSeries)
assign( "alternateNumber", new_md.alternateNumber ) assign("alternateNumber", new_md.alternateNumber)
assign( "alternateCount", new_md.alternateCount ) assign("alternateCount", new_md.alternateCount)
assign( "imprint", new_md.imprint ) assign("imprint", new_md.imprint)
assign( "webLink", new_md.webLink ) assign("webLink", new_md.webLink)
assign( "format", new_md.format ) assign("format", new_md.format)
assign( "manga", new_md.manga ) assign("manga", new_md.manga)
assign( "blackAndWhite", new_md.blackAndWhite ) assign("blackAndWhite", new_md.blackAndWhite)
assign( "maturityRating", new_md.maturityRating ) assign("maturityRating", new_md.maturityRating)
assign( "storyArc", new_md.storyArc ) assign("storyArc", new_md.storyArc)
assign( "seriesGroup", new_md.seriesGroup ) assign("seriesGroup", new_md.seriesGroup)
assign( "scanInfo", new_md.scanInfo ) assign("scanInfo", new_md.scanInfo)
assign( "characters", new_md.characters ) assign("characters", new_md.characters)
assign( "teams", new_md.teams ) assign("teams", new_md.teams)
assign( "locations", new_md.locations ) assign("locations", new_md.locations)
assign( "comments", new_md.comments ) assign("comments", new_md.comments)
assign( "notes", new_md.notes ) assign("notes", new_md.notes)
assign( "price", new_md.price ) assign("price", new_md.price)
assign( "isVersionOf", new_md.isVersionOf ) assign("isVersionOf", new_md.isVersionOf)
assign( "rights", new_md.rights ) assign("rights", new_md.rights)
assign( "identifier", new_md.identifier ) assign("identifier", new_md.identifier)
assign( "lastMark", new_md.lastMark ) assign("lastMark", new_md.lastMark)
self.overlayCredits( new_md.credits ) self.overlayCredits(new_md.credits)
# TODO # TODO
# not sure if the tags and pages should broken down, or treated # not sure if the tags and pages should broken down, or treated
@ -168,15 +174,14 @@ class GenericMetadata:
# For now, go the easy route, where any overlay # For now, go the easy route, where any overlay
# value wipes out the whole list # value wipes out the whole list
if len(new_md.tags) > 0: if len(new_md.tags) > 0:
assign( "tags", new_md.tags ) assign("tags", new_md.tags)
if len(new_md.pages) > 0: if len(new_md.pages) > 0:
assign( "pages", new_md.pages ) assign("pages", new_md.pages)
def overlayCredits(self, new_credits):
def overlayCredits( self, new_credits ):
for c in new_credits: for c in new_credits:
if c.has_key('primary') and c['primary']: if 'primary' in c and c['primary']:
primary = True primary = True
else: else:
primary = False primary = False
@ -188,37 +193,38 @@ class GenericMetadata:
self.credits.remove(r) self.credits.remove(r)
# otherwise, add it! # otherwise, add it!
else: else:
self.addCredit( c['person'], c['role'], primary ) self.addCredit(c['person'], c['role'], primary)
def setDefaultPageList( self, count ): def setDefaultPageList(self, count):
# generate a default page list, with the first page marked as the cover # generate a default page list, with the first page marked as the cover
for i in range(count): for i in range(count):
page_dict = dict() page_dict = dict()
page_dict['Image'] = str(i) page_dict['Image'] = str(i)
if i == 0: if i == 0:
page_dict['Type'] = PageType.FrontCover page_dict['Type'] = PageType.FrontCover
self.pages.append( page_dict ) self.pages.append(page_dict)
def getArchivePageIndex( self, pagenum ): def getArchivePageIndex(self, pagenum):
# convert the displayed page number to the page index of the file in the archive # convert the displayed page number to the page index of the file in
if pagenum < len( self.pages ): # the archive
return int( self.pages[pagenum]['Image'] ) if pagenum < len(self.pages):
return int(self.pages[pagenum]['Image'])
else: else:
return 0 return 0
def getCoverPageIndexList( self ): def getCoverPageIndexList(self):
# return a list of archive page indices of cover pages # return a list of archive page indices of cover pages
coverlist = [] coverlist = []
for p in self.pages: for p in self.pages:
if 'Type' in p and p['Type'] == PageType.FrontCover: if 'Type' in p and p['Type'] == PageType.FrontCover:
coverlist.append( int(p['Image'])) coverlist.append(int(p['Image']))
if len(coverlist) == 0: if len(coverlist) == 0:
coverlist.append( 0 ) coverlist.append(0)
return coverlist return coverlist
def addCredit( self, person, role, primary = False ): def addCredit(self, person, role, primary=False):
credit = dict() credit = dict()
credit['person'] = person credit['person'] = person
@ -229,8 +235,8 @@ class GenericMetadata:
# look to see if it's not already there... # look to see if it's not already there...
found = False found = False
for c in self.credits: for c in self.credits:
if ( c['person'].lower() == person.lower() and if (c['person'].lower() == person.lower() and
c['role'].lower() == role.lower() ): c['role'].lower() == role.lower()):
# no need to add it. just adjust the "primary" flag as needed # no need to add it. just adjust the "primary" flag as needed
c['primary'] = primary c['primary'] = primary
found = True found = True
@ -239,78 +245,77 @@ class GenericMetadata:
if not found: if not found:
self.credits.append(credit) self.credits.append(credit)
def __str__(self):
def __str__( self ):
vals = [] vals = []
if self.isEmpty: if self.isEmpty:
return "No metadata" return "No metadata"
def add_string( tag, val ): def add_string(tag, val):
if val is not None and u"{0}".format(val) != "": if val is not None and u"{0}".format(val) != "":
vals.append( (tag, val) ) vals.append((tag, val))
def add_attr_string( tag ): def add_attr_string(tag):
val = getattr(self,tag) val = getattr(self, tag)
add_string( tag, getattr(self,tag) ) add_string(tag, getattr(self, tag))
add_attr_string( "series" ) add_attr_string("series")
add_attr_string( "issue" ) add_attr_string("issue")
add_attr_string( "issueCount" ) add_attr_string("issueCount")
add_attr_string( "title" ) add_attr_string("title")
add_attr_string( "publisher" ) add_attr_string("publisher")
add_attr_string( "year" ) add_attr_string("year")
add_attr_string( "month" ) add_attr_string("month")
add_attr_string( "day" ) add_attr_string("day")
add_attr_string( "volume" ) add_attr_string("volume")
add_attr_string( "volumeCount" ) add_attr_string("volumeCount")
add_attr_string( "genre" ) add_attr_string("genre")
add_attr_string( "language" ) add_attr_string("language")
add_attr_string( "country" ) add_attr_string("country")
add_attr_string( "criticalRating" ) add_attr_string("criticalRating")
add_attr_string( "alternateSeries" ) add_attr_string("alternateSeries")
add_attr_string( "alternateNumber" ) add_attr_string("alternateNumber")
add_attr_string( "alternateCount" ) add_attr_string("alternateCount")
add_attr_string( "imprint" ) add_attr_string("imprint")
add_attr_string( "webLink" ) add_attr_string("webLink")
add_attr_string( "format" ) add_attr_string("format")
add_attr_string( "manga" ) add_attr_string("manga")
add_attr_string( "price" ) add_attr_string("price")
add_attr_string( "isVersionOf" ) add_attr_string("isVersionOf")
add_attr_string( "rights" ) add_attr_string("rights")
add_attr_string( "identifier" ) add_attr_string("identifier")
add_attr_string( "lastMark" ) add_attr_string("lastMark")
if self.blackAndWhite: if self.blackAndWhite:
add_attr_string( "blackAndWhite" ) add_attr_string("blackAndWhite")
add_attr_string( "maturityRating" ) add_attr_string("maturityRating")
add_attr_string( "storyArc" ) add_attr_string("storyArc")
add_attr_string( "seriesGroup" ) add_attr_string("seriesGroup")
add_attr_string( "scanInfo" ) add_attr_string("scanInfo")
add_attr_string( "characters" ) add_attr_string("characters")
add_attr_string( "teams" ) add_attr_string("teams")
add_attr_string( "locations" ) add_attr_string("locations")
add_attr_string( "comments" ) add_attr_string("comments")
add_attr_string( "notes" ) add_attr_string("notes")
add_string( "tags", utils.listToString( self.tags ) ) add_string("tags", utils.listToString(self.tags))
for c in self.credits: for c in self.credits:
primary = "" primary = ""
if c.has_key('primary') and c['primary']: if 'primary' in c and c['primary']:
primary = " [P]" primary = " [P]"
add_string( "credit", c['role']+": "+c['person'] + primary) add_string("credit", c['role'] + ": " + c['person'] + primary)
# find the longest field name # find the longest field name
flen = 0 flen = 0
for i in vals: for i in vals:
flen = max( flen, len(i[0]) ) flen = max(flen, len(i[0]))
flen += 1 flen += 1
#format the data nicely # format the data nicely
outstr = "" outstr = ""
fmt_str = u"{0: <" + str(flen) + "} {1}\n" fmt_str = u"{0: <" + str(flen) + "} {1}\n"
for i in vals: for i in vals:
outstr += fmt_str.format( i[0]+":", i[1] ) outstr += fmt_str.format(i[0] + ":", i[1])
return outstr return outstr

View File

@ -1,43 +1,36 @@
# coding=utf-8 # coding=utf-8
""" """Support for mixed digit/string type Issue field
Class for handling the odd permutations of an 'issue number' that the comics industry throws at us
e.g.:
"12"
"12.1"
"0"
"-1"
"5AU"
"100-2"
Class for handling the odd permutations of an 'issue number' that the
comics industry throws at us.
e.g.: "12", "12.1", "0", "-1", "5AU", "100-2"
""" """
""" # Copyright 2012-2014 Anthony Beville
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
You may obtain a copy of the License at # You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
limitations under the License. # limitations under the License.
"""
#import utils
#import math
#import re
import utils
import math
import re
class IssueString: class IssueString:
def __init__(self, text): def __init__(self, text):
# break up the issue number string into 2 parts: the numeric and suffix string. # break up the issue number string into 2 parts: the numeric and suffix string.
# ( assumes that the numeric portion is always first ) # (assumes that the numeric portion is always first)
self.num = None self.num = None
self.suffix = "" self.suffix = ""
@ -45,7 +38,7 @@ class IssueString:
if text is None: if text is None:
return return
if type(text) == int: if isinstance(text, int):
text = str(text) text = str(text)
if len(text) == 0: if len(text) == 0:
@ -53,7 +46,7 @@ class IssueString:
text = unicode(text) text = unicode(text)
#skip the minus sign if it's first # skip the minus sign if it's first
if text[0] == '-': if text[0] == '-':
start = 1 start = 1
else: else:
@ -61,9 +54,10 @@ class IssueString:
# if it's still not numeric at start skip it # if it's still not numeric at start skip it
if text[start].isdigit() or text[start] == ".": if text[start].isdigit() or text[start] == ".":
# walk through the string, look for split point (the first non-numeric) # walk through the string, look for split point (the first
# non-numeric)
decimal_count = 0 decimal_count = 0
for idx in range( start, len(text) ): for idx in range(start, len(text)):
if text[idx] not in "0123456789.": if text[idx] not in "0123456789.":
break break
# special case: also split on second "." # special case: also split on second "."
@ -76,10 +70,11 @@ class IssueString:
# move trailing numeric decimal to suffix # move trailing numeric decimal to suffix
# (only if there is other junk after ) # (only if there is other junk after )
if text[idx-1] == "." and len(text) != idx: if text[idx - 1] == "." and len(text) != idx:
idx = idx -1 idx = idx - 1
# if there is no numeric after the minus, make the minus part of the suffix # if there is no numeric after the minus, make the minus part of
# the suffix
if idx == 1 and start == 1: if idx == 1 and start == 1:
idx = 0 idx = 0
@ -87,15 +82,15 @@ class IssueString:
part2 = text[idx:len(text)] part2 = text[idx:len(text)]
if part1 != "": if part1 != "":
self.num = float( part1 ) self.num = float(part1)
self.suffix = part2 self.suffix = part2
else: else:
self.suffix = text self.suffix = text
#print "num: {0} suf: {1}".format(self.num, self.suffix) # print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString( self, pad = 0 ): def asString(self, pad=0):
#return the float, left side zero-padded, with suffix attached # return the float, left side zero-padded, with suffix attached
if self.num is None: if self.num is None:
return self.suffix return self.suffix
@ -103,17 +98,17 @@ class IssueString:
num_f = abs(self.num) num_f = abs(self.num)
num_int = int( num_f ) num_int = int(num_f)
num_s = str( num_int ) num_s = str(num_int)
if float( num_int ) != num_f: if float(num_int) != num_f:
num_s = str( num_f ) num_s = str(num_f)
num_s += self.suffix num_s += self.suffix
# create padding # create padding
padding = "" padding = ""
l = len( str(num_int)) l = len(str(num_int))
if l < pad : if l < pad:
padding = "0" * (pad - l) padding = "0" * (pad - l)
num_s = padding + num_s num_s = padding + num_s
@ -122,8 +117,8 @@ class IssueString:
return num_s return num_s
def asFloat( self ): def asFloat(self):
#return the float, with no suffix # return the float, with no suffix
if self.suffix == u"½": if self.suffix == u"½":
if self.num is not None: if self.num is not None:
return self.num + .5 return self.num + .5
@ -131,10 +126,8 @@ class IssueString:
return .5 return .5
return self.num return self.num
def asInt( self ): def asInt(self):
#return the int version of the float # return the int version of the float
if self.num is None: if self.num is None:
return None return None
return int( self.num ) return int(self.num)

View File

@ -1,25 +1,20 @@
# coding=utf-8 # coding=utf-8
"""Some generic utilities"""
""" # Copyright 2012-2014 Anthony Beville
Some generic utilities
"""
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
""" # http://www.apache.org/licenses/LICENSE-2.0
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License"); # Unless required by applicable law or agreed to in writing, software
you may not use this file except in compliance with the License. # distributed under the License is distributed on an "AS IS" BASIS,
You may obtain a copy of the License at # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys import sys
import os import os
import re import re
@ -31,13 +26,15 @@ import codecs
class UtilsVars: class UtilsVars:
already_fixed_encoding = False already_fixed_encoding = False
def get_actual_preferred_encoding(): def get_actual_preferred_encoding():
preferred_encoding = locale.getpreferredencoding() preferred_encoding = locale.getpreferredencoding()
if platform.system() == "Darwin": if platform.system() == "Darwin":
preferred_encoding = "utf-8" preferred_encoding = "utf-8"
return preferred_encoding return preferred_encoding
def fix_output_encoding( ):
def fix_output_encoding():
if not UtilsVars.already_fixed_encoding: if not UtilsVars.already_fixed_encoding:
# this reads the environment and inits the right locale # this reads the environment and inits the right locale
locale.setlocale(locale.LC_ALL, "") locale.setlocale(locale.LC_ALL, "")
@ -48,37 +45,38 @@ def fix_output_encoding( ):
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr) sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
UtilsVars.already_fixed_encoding = True UtilsVars.already_fixed_encoding = True
def get_recursive_filelist( pathlist ):
""" def get_recursive_filelist(pathlist):
Get a recursive list of of all files under all path items in the list """Get a recursive list of of all files under all path items in the list"""
"""
filename_encoding = sys.getfilesystemencoding() filename_encoding = sys.getfilesystemencoding()
filelist = [] filelist = []
for p in pathlist: for p in pathlist:
# if path is a folder, walk it recursivly, and all files underneath # if path is a folder, walk it recursively, and all files underneath
if type(p) == str: if isinstance(p, str):
#make sure string is unicode # make sure string is unicode
p = p.decode(filename_encoding) #, 'replace') p = p.decode(filename_encoding) # , 'replace')
elif type(p) != unicode: elif not isinstance(p, unicode):
#it's probably a QString # it's probably a QString
p = unicode(p) p = unicode(p)
if os.path.isdir( p ): if os.path.isdir(p):
for root,dirs,files in os.walk( p ): for root, dirs, files in os.walk(p):
for f in files: for f in files:
if type(f) == str: if isinstance(f, str):
#make sure string is unicode # make sure string is unicode
f = f.decode(filename_encoding, 'replace') f = f.decode(filename_encoding, 'replace')
elif type(f) != unicode: elif not isinstance(f, unicode):
#it's probably a QString # it's probably a QString
f = unicode(f) f = unicode(f)
filelist.append(os.path.join(root,f)) filelist.append(os.path.join(root, f))
else: else:
filelist.append(p) filelist.append(p)
return filelist return filelist
def listToString( l ):
def listToString(l):
string = "" string = ""
if l is not None: if l is not None:
for item in l: for item in l:
@ -87,19 +85,23 @@ def listToString( l ):
string += item string += item
return string return string
def addtopath( dirname ):
def addtopath(dirname):
if dirname is not None and dirname != "": if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname # verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname) tmpdirname = re.escape(dirname)
pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep) pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format(
dir=tmpdirname,
sep=os.pathsep)
match = re.search(pattern, os.environ['PATH']) match = re.search(pattern, os.environ['PATH'])
if not match: if not match:
os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH'] os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH']
# returns executable path, if it exists
def which(program): def which(program):
"""Returns path of the executable, if it exists"""
def is_exe(fpath): def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK) return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
@ -116,13 +118,14 @@ def which(program):
return None return None
def removearticles( text ):
def removearticles(text):
text = text.lower() text = text.lower()
articles = ['and', 'the', 'a', '&', 'issue' ] articles = ['and', 'the', 'a', '&', 'issue']
newText = '' newText = ''
for word in text.split(' '): for word in text.split(' '):
if word not in articles: if word not in articles:
newText += word+' ' newText += word + ' '
newText = newText[:-1] newText = newText[:-1]
@ -131,8 +134,8 @@ def removearticles( text ):
newText = newText.replace(",", "") newText = newText.replace(",", "")
newText = newText.replace("-", " ") newText = newText.replace("-", " ")
# since the CV api changed, searches for series names with periods # since the CV API changed, searches for series names with periods
# now explicity require the period to be in the search key, # now explicitly require the period to be in the search key,
# so the line below is removed (for now) # so the line below is removed (for now)
#newText = newText.replace(".", "") #newText = newText.replace(".", "")
@ -141,17 +144,19 @@ def removearticles( text ):
def unique_file(file_name): def unique_file(file_name):
counter = 1 counter = 1
file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext') # returns ('/path/file', '.ext')
while 1: file_name_parts = os.path.splitext(file_name)
if not os.path.lexists( file_name): while True:
if not os.path.lexists(file_name):
return file_name return file_name
file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1] file_name = file_name_parts[
0] + ' (' + str(counter) + ')' + file_name_parts[1]
counter += 1 counter += 1
# -o- coding: utf-8 -o- # -o- coding: utf-8 -o-
# ISO639 python dict # ISO639 python dict
# oficial list in http://www.loc.gov/standards/iso639-2/php/code_list.php # official list in http://www.loc.gov/standards/iso639-2/php/code_list.php
lang_dict = { lang_dict = {
'ab': 'Abkhaz', 'ab': 'Abkhaz',
@ -576,22 +581,12 @@ countries = [
] ]
def getLanguageDict(): def getLanguageDict():
return lang_dict return lang_dict
def getLanguageFromISO( iso ):
if iso == None: def getLanguageFromISO(iso):
if iso is None:
return None return None
else: else:
return lang_dict[ iso ] return lang_dict[iso]