Merge branch 'fcanc-master'

This commit is contained in:
davide-romanini 2015-03-01 15:44:11 +01:00
commit d84110ccb2
8 changed files with 2037 additions and 1960 deletions

View File

@ -1,30 +1,28 @@
"""
A python class to encapsulate CoMet data
"""
"""A class to encapsulate CoMet data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata
import utils
class CoMet:
writer_synonyms = ['writer', 'plotter', 'scripter']
@ -72,7 +70,8 @@ class CoMet:
root = ET.Element("comet")
root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/"
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
root.attrib[
'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
# helper func
def assign(comet_entry, md_entry):
@ -116,7 +115,8 @@ class CoMet:
assign('coverImage', md.coverImage)
# need to specially process the credits, since they are structured differently than CIX
# need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
@ -129,26 +129,46 @@ class CoMet:
for credit in metadata.credits:
if credit['role'].lower() in set(self.writer_synonyms):
ET.SubElement(root, 'writer').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'writer').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.penciller_synonyms):
ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'penciller').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.inker_synonyms):
ET.SubElement(root, 'inker').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'inker').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.colorist_synonyms):
ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'colorist').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.letterer_synonyms):
ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'letterer').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.cover_synonyms):
ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'coverDesigner').text = u"{0}".format(
credit['person'])
if credit['role'].lower() in set(self.editor_synonyms):
ET.SubElement(root, 'editor').text = u"{0}".format(credit['person'])
ET.SubElement(
root,
'editor').text = u"{0}".format(
credit['person'])
# self pretty-print
self.indent(root)
@ -157,7 +177,6 @@ class CoMet:
tree = ET.ElementTree(root)
return tree
def convertXMLToMetadata(self, tree):
root = tree.getroot()
@ -229,7 +248,6 @@ class CoMet:
if n.tag == 'coverDesigner':
metadata.addCredit(n.text.strip(), "Cover")
metadata.isEmpty = False
return metadata
@ -246,7 +264,6 @@ class CoMet:
return True
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
@ -257,4 +274,3 @@ class CoMet:
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

View File

@ -1,22 +1,18 @@
"""
A python class to represent a single comic, be it file or folder of images
"""
"""A class to represent a single comic, be it file or folder of images"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import zipfile
import os
@ -25,27 +21,57 @@ import sys
import tempfile
import subprocess
import platform
import locale
import shutil
import ctypes
import time
import StringIO
#import io
#import locale
#import shutil
from natsort import natsorted
from PyPDF2 import PdfFileReader
from unrar import rarfile
from unrar import unrarlib
import unrar.constants
import ctypes
import io
from unrar import constants
#import UnRAR2
#from UnRAR2.rar_exceptions import *
if platform.system() == "Windows":
import _subprocess
try:
import Image
pil_available = True
except ImportError:
pil_available = False
from comicinfoxml import ComicInfoXml
from comicbookinfo import ComicBookInfo
from comet import CoMet
from genericmetadata import GenericMetadata, PageType
from filenameparser import FileNameParser
#from settings import ComicTaggerSettings
sys.path.insert(0, os.path.abspath("."))
class OpenableRarFile(rarfile.RarFile):
def open(self, member):
# print "opening %s..." % member
# based on https://github.com/matiasb/python-unrar/pull/4/files
res = []
if isinstance(member, rarfile.RarInfo):
member = member.filename
archive = unrarlib.RAROpenArchiveDataEx(self.filename, mode=constants.RAR_OM_EXTRACT)
archive = unrarlib.RAROpenArchiveDataEx(
self.filename,
mode=constants.RAR_OM_EXTRACT)
handle = self._open(archive)
found, buf = False, []
def _callback(msg, UserData, P1, P2):
if msg == constants.UCM_PROCESSDATA:
data = (ctypes.c_char * P2).from_address(P1).raw
@ -56,7 +82,8 @@ class OpenableRarFile(rarfile.RarFile):
try:
rarinfo = self._read_header(handle)
while rarinfo is not None:
#print "checking rar archive %s against %s" % (rarinfo.filename, member)
# print "checking rar archive %s against %s" % (
# rarinfo.filename, member)
if rarinfo.filename == member:
self._process_current(handle, constants.RAR_TEST)
found = True
@ -72,37 +99,17 @@ class OpenableRarFile(rarfile.RarFile):
return ''.join(buf)
if platform.system() == "Windows":
import _subprocess
import time
import StringIO
try:
import Image
pil_available = True
except ImportError:
pil_available = False
sys.path.insert(0, os.path.abspath(".") )
#import UnRAR2
#from UnRAR2.rar_exceptions import *
#from settings import ComicTaggerSettings
from comicinfoxml import ComicInfoXml
from comicbookinfo import ComicBookInfo
from comet import CoMet
from genericmetadata import GenericMetadata, PageType
from filenameparser import FileNameParser
from PyPDF2 import PdfFileReader
class MetaDataStyle:
CBI = 0
CIX = 1
COMET = 2
name = ['ComicBookLover', 'ComicRack', 'CoMet']
class ZipArchiver:
"""ZIP implementation"""
def __init__(self, path):
self.path = path
@ -122,12 +129,14 @@ class ZipArchiver:
try:
data = zf.read(archive_file)
except zipfile.BadZipfile as e:
print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file)
print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
zf.close()
raise IOError
except Exception as e:
zf.close()
print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file)
print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
raise IOError
finally:
zf.close()
@ -149,7 +158,10 @@ class ZipArchiver:
self.rebuildZipFile([archive_file])
# now just add the archive file as a new one
zf = zipfile.ZipFile(self.path, mode='a', compression=zipfile.ZIP_DEFLATED )
zf = zipfile.ZipFile(
self.path,
mode='a',
compression=zipfile.ZIP_DEFLATED)
zf.writestr(archive_file, data)
zf.close()
return True
@ -163,14 +175,18 @@ class ZipArchiver:
zf.close()
return namelist
except Exception as e:
print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(e, self.path)
print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(
e, self.path)
return []
# zip helper func
def rebuildZipFile(self, exclude_list):
"""Zip helper func
# this recompresses the zip archive, without the files in the exclude_list
#print ">> sys.stderr, Rebuilding zip {0} without {1}".format( self.path, exclude_list )
This recompresses the zip archive, without the files in the exclude_list
"""
# print ">> sys.stderr, Rebuilding zip {0} without {1}".format(
# self.path, exclude_list )
# generate temp file
tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path))
@ -193,7 +209,6 @@ class ZipArchiver:
os.remove(self.path)
os.rename(tmp_name, self.path)
def writeZipComment(self, filename, comment):
"""
This is a custom function for writing a comment to a zip file,
@ -240,7 +255,9 @@ class ZipArchiver:
# Pack the length of the comment string
format = "H" # one 2-byte integer
comment_length = struct.pack(format, len(comment)) # pack integer in a binary string
comment_length = struct.pack(
format,
len(comment)) # pack integer in a binary string
# write out the length
fo.write(comment_length)
@ -258,7 +275,8 @@ class ZipArchiver:
return True
def copyFromArchive(self, otherArchive):
# Replace the current zip with one copied from another archive
"""Replace the current zip with one copied from another archive"""
try:
zout = zipfile.ZipFile(self.path, 'w')
for fname in otherArchive.getArchiveFilenameList():
@ -273,18 +291,21 @@ class ZipArchiver:
if not self.writeZipComment(self.path, comment):
return False
except Exception as e:
print >> sys.stderr, u"Error while copying to {0}: {1}".format(self.path, e)
print >> sys.stderr, u"Error while copying to {0}: {1}".format(
self.path, e)
return False
else:
return True
#------------------------------------------
# RAR implementation
class RarArchiver:
"""RAR implementation"""
devnull = None
def __init__(self, path, rar_exe_path):
self.path = path
self.rar_exe_path = rar_exe_path
@ -321,7 +342,12 @@ class RarArchiver:
working_dir = os.path.dirname(os.path.abspath(self.path))
# use external program to write comment to Rar archive
subprocess.call([self.rar_exe_path, 'c', '-w' + working_dir , '-c-', '-z' + tmp_name, self.path],
subprocess.call([self.rar_exe_path,
'c',
'-w' + working_dir,
'-c-',
'-z' + tmp_name,
self.path],
startupinfo=self.startupinfo,
stdout=RarArchiver.devnull)
@ -356,28 +382,31 @@ class RarArchiver:
#data = open(tmp_file).read()
entries = [(rarc.getinfo(archive_file), data)]
#shutil.rmtree(tmp_folder, ignore_errors=True)
#entries = rarc.read_files( archive_file )
if entries[0][0].file_size != len(entries[0][1]):
print >> sys.stderr, u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format(
entries[0][0].file_size,len(entries[0][1]), self.path, archive_file, tries)
entries[0][0].file_size, len(
entries[0][1]), self.path, archive_file, tries)
continue
except (OSError, IOError) as e:
print >> sys.stderr, u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
print >> sys.stderr, u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
time.sleep(1)
except Exception as e:
print >> sys.stderr, u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries)
print >> sys.stderr, u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
break
else:
# Success"
# entries is a list of of tuples: ( rarinfo, filedata)
if tries > 1:
print >> sys.stderr, u"Attempted read_files() {0} times".format(tries)
print >> sys.stderr, u"Attempted read_files() {0} times".format(
tries)
if (len(entries) == 1):
return entries[0][1]
else:
@ -385,8 +414,6 @@ class RarArchiver:
raise IOError
def writeArchiveFile(self, archive_file, data):
if self.rar_exe_path is not None:
@ -404,7 +431,13 @@ class RarArchiver:
f.close()
# use external program to write file to Rar archive
subprocess.call([self.rar_exe_path, 'a', '-w' + working_dir ,'-c-', '-ep', self.path, tmp_file],
subprocess.call([self.rar_exe_path,
'a',
'-w' + working_dir,
'-c-',
'-ep',
self.path,
tmp_file],
startupinfo=self.startupinfo,
stdout=RarArchiver.devnull)
@ -423,7 +456,11 @@ class RarArchiver:
if self.rar_exe_path is not None:
try:
# use external program to remove file from Rar archive
subprocess.call([self.rar_exe_path, 'd','-c-', self.path, archive_file],
subprocess.call([self.rar_exe_path,
'd',
'-c-',
self.path,
archive_file],
startupinfo=self.startupinfo,
stdout=RarArchiver.devnull)
@ -453,7 +490,8 @@ class RarArchiver:
namelist.append(item.filename)
except (OSError, IOError) as e:
print >> sys.stderr, u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
print >> sys.stderr, u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
time.sleep(1)
else:
@ -462,7 +500,6 @@ class RarArchiver:
raise e
def getRARObj(self):
tries = 0
while tries < 7:
@ -472,7 +509,8 @@ class RarArchiver:
rarc = OpenableRarFile(self.path)
except (OSError, IOError) as e:
print >> sys.stderr, u"getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries)
print >> sys.stderr, u"getRARObj(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
time.sleep(1)
else:
@ -481,10 +519,11 @@ class RarArchiver:
raise e
#------------------------------------------
# Folder implementation
class FolderArchiver:
"""Folder implementation"""
def __init__(self, path):
self.path = path
self.comment_file_name = "ComicTaggerFolderComment.txt"
@ -544,40 +583,54 @@ class FolderArchiver:
return itemlist
#------------------------------------------
# Unknown implementation
class UnknownArchiver:
"""Unknown implementation"""
def __init__(self, path):
self.path = path
def getArchiveComment(self):
return ""
def setArchiveComment(self, comment):
return False
def readArchiveFile(self):
return ""
def writeArchiveFile(self, archive_file, data):
return False
def removeArchiveFile(self, archive_file):
return False
def getArchiveFilenameList(self):
return []
class PdfArchiver:
def __init__(self, path):
self.path = path
def getArchiveComment(self):
return ""
def setArchiveComment(self, comment):
return False
def readArchiveFile(self, page_num):
return subprocess.check_output(['mudraw', '-o','-', self.path, str(int(os.path.basename(page_num)[:-4]))])
return subprocess.check_output(
['mudraw', '-o', '-', self.path, str(int(os.path.basename(page_num)[:-4]))])
def writeArchiveFile(self, archive_file, data):
return False
def removeArchiveFile(self, archive_file):
return False
def getArchiveFilenameList(self):
out = []
pdf = PdfFileReader(open(self.path, 'rb'))
@ -586,6 +639,8 @@ class PdfArchiver:
return out
#------------------------------------------------------------------
class ComicArchive:
logo_data = None
@ -611,7 +666,9 @@ class ComicArchive:
if ext == ".cbr" or ext == ".rar":
if self.rarTest():
self.archive_type = self.ArchiveType.Rar
self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path )
self.archiver = RarArchiver(
self.path,
rar_exe_path=self.rar_exe_path)
elif self.zipTest():
self.archive_type = self.ArchiveType.Zip
@ -623,7 +680,9 @@ class ComicArchive:
elif self.rarTest():
self.archive_type = self.ArchiveType.Rar
self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path )
self.archiver = RarArchiver(
self.path,
rar_exe_path=self.rar_exe_path)
elif os.path.basename(self.path)[-3:] == 'pdf':
self.archive_type = self.ArchiveType.Pdf
self.archiver = PdfArchiver(self.path)
@ -634,8 +693,9 @@ class ComicArchive:
with open(fname, 'rb') as fd:
ComicArchive.logo_data = fd.read()
# Clears the cached data
def resetCache(self):
"""Clears the cached data"""
self.has_cix = None
self.has_cbi = None
self.has_comet = None
@ -665,14 +725,15 @@ class ComicArchive:
else:
return True
def isZip(self):
return self.archive_type == self.ArchiveType.Zip
def isRar(self):
return self.archive_type == self.ArchiveType.Rar
def isPdf(self):
return self.archive_type == self.ArchiveType.Pdf
def isFolder(self):
return self.archive_type == self.ArchiveType.Folder
@ -705,7 +766,8 @@ class ComicArchive:
ext = os.path.splitext(self.path)[1].lower()
if (
( self.isZip() or self.isRar() or self.isPdf()) #or self.isFolder() )
# or self.isFolder() )
(self.isZip() or self.isRar() or self.isPdf())
and
(self.getNumberOfPages() > 0)
@ -736,7 +798,6 @@ class ComicArchive:
retcode = self.writeCoMet(metadata)
return retcode
def hasMetadata(self, style):
if style == MetaDataStyle.CIX:
@ -803,13 +864,18 @@ class ComicArchive:
for name in name_list:
fname = os.path.split(name)[1]
length = len(fname)
if length_buckets.has_key( length ):
if length in length_buckets:
length_buckets[length] += 1
else:
length_buckets[length] = 1
# sort by most common
sorted_buckets = sorted(length_buckets.iteritems(), key=lambda (k,v): (v,k), reverse=True)
sorted_buckets = sorted(
length_buckets.iteritems(),
key=lambda k_v: (
k_v[1],
k_v[0]),
reverse=True)
# statistical mode occurence is first
mode_length = sorted_buckets[0][0]
@ -829,20 +895,21 @@ class ComicArchive:
if len(final_name) > mode_length:
scanner_page_index = count - 1
# see if the last page doesn't start with the same prefix as most others
# see if the last page doesn't start with the same prefix as most
# others
elif not final_name.startswith(prefix):
scanner_page_index = count - 1
return scanner_page_index
def getPageNameList(self, sort_list=True):
if self.page_list is None:
# get the list file names in the archive, and sort
files = self.archiver.getArchiveFilenameList()
# seems like some archive creators are on Windows, and don't know about case-sensitivity!
# seems like some archive creators are on Windows, and don't know
# about case-sensitivity!
if sort_list:
def keyfunc(k):
# hack to account for some weird scanner ID pages
@ -856,7 +923,11 @@ class ComicArchive:
# make a sub-list of image files
self.page_list = []
for name in files:
if ( name[-4:].lower() in [ ".jpg", "jpeg", ".png", ".gif", "webp" ] and os.path.basename(name)[0] != "." ):
if (name[-4:].lower() in [".jpg",
"jpeg",
".png",
".gif",
"webp"] and os.path.basename(name)[0] != "."):
self.page_list.append(name)
return self.page_list
@ -888,7 +959,8 @@ class ComicArchive:
def hasCBI(self):
if self.has_cbi is None:
#if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ):
# if ( not ( self.isZip() or self.isRar()) or not
# self.seemsToBeAComicArchive() ):
if not self.seemsToBeAComicArchive():
self.has_cbi = False
else:
@ -955,7 +1027,9 @@ class ComicArchive:
if metadata is not None:
self.applyArchiveInfoToMetadata(metadata, calc_page_sizes=True)
cix_string = ComicInfoXml().stringFromMetadata(metadata)
write_success = self.archiver.writeArchiveFile( self.ci_xml_filename, cix_string )
write_success = self.archiver.writeArchiveFile(
self.ci_xml_filename,
cix_string)
if write_success:
self.has_cix = True
self.cix_md = metadata
@ -966,7 +1040,8 @@ class ComicArchive:
def removeCIX(self):
if self.hasCIX():
write_success = self.archiver.removeArchiveFile( self.ci_xml_filename )
write_success = self.archiver.removeArchiveFile(
self.ci_xml_filename)
if write_success:
self.has_cix = False
self.cix_md = None
@ -974,7 +1049,6 @@ class ComicArchive:
return write_success
return True
def hasCIX(self):
if self.has_cix is None:
@ -986,7 +1060,6 @@ class ComicArchive:
self.has_cix = False
return self.has_cix
def readCoMet(self):
if self.comet_md is None:
raw_comet = self.readRawCoMet()
@ -1007,7 +1080,8 @@ class ComicArchive:
break
if cover_idx != 0:
del (self.comet_md.pages[0]['Type'])
self.comet_md.pages[ cover_idx ]['Type'] = PageType.FrontCover
self.comet_md.pages[cover_idx][
'Type'] = PageType.FrontCover
return self.comet_md
@ -1036,7 +1110,9 @@ class ComicArchive:
metadata.coverImage = self.getPageName(cover_idx)
comet_string = CoMet().stringFromMetadata(metadata)
write_success = self.archiver.writeArchiveFile( self.comet_filename, comet_string )
write_success = self.archiver.writeArchiveFile(
self.comet_filename,
comet_string)
if write_success:
self.has_comet = True
self.comet_md = metadata
@ -1047,7 +1123,8 @@ class ComicArchive:
def removeCoMet(self):
if self.hasCoMet():
write_success = self.archiver.removeArchiveFile( self.comet_filename )
write_success = self.archiver.removeArchiveFile(
self.comet_filename)
if write_success:
self.has_comet = False
self.comet_md = None
@ -1061,7 +1138,8 @@ class ComicArchive:
if not self.seemsToBeAComicArchive():
return self.has_comet
#look at all xml files in root, and search for CoMet data, get first
# look at all xml files in root, and search for CoMet data, get
# first
for n in self.archiver.getArchiveFilenameList():
if (os.path.dirname(n) == "" and
os.path.splitext(n)[1].lower() == '.xml'):
@ -1079,8 +1157,6 @@ class ComicArchive:
return self.has_comet
def applyArchiveInfoToMetadata(self, md, calc_page_sizes=False):
md.pageCount = self.getNumberOfPages()
@ -1106,8 +1182,6 @@ class ComicArchive:
data = self.getPage(idx)
p['ImageSize'] = str(len(data))
def metadataFromFilename(self, parse_scan_info=True):
metadata = GenericMetadata()
@ -1140,4 +1214,3 @@ class ComicArchive:
zip_archiver = ZipArchiver(zipfilename)
return zip_archiver.copyFromArchive(self.archiver)

View File

@ -1,34 +1,29 @@
"""
A python class to encapsulate the ComicBookInfo data
"""
"""A class to encapsulate the ComicBookInfo data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from datetime import datetime
import zipfile
#import zipfile
from genericmetadata import GenericMetadata
import utils
#import ctversion
class ComicBookInfo:
class ComicBookInfo:
def metadataFromString(self, string):
@ -88,8 +83,8 @@ class ComicBookInfo:
cbi_container = self.createJSONDictionary(metadata)
return json.dumps(cbi_container)
#verify that the string actually contains CBI data in JSON format
def validateString(self, string):
"""Verify that the string actually contains CBI data in JSON format"""
try:
cbi_container = json.loads(string)
@ -98,10 +93,9 @@ class ComicBookInfo:
return ('ComicBookInfo/1.0' in cbi_container)
def createJSONDictionary(self, metadata):
"""Create the dictionary that we will convert to JSON text"""
# Create the dictionary that we will convert to JSON text
cbi = dict()
cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version,
'lastModified': str(datetime.now()),
@ -141,7 +135,6 @@ class ComicBookInfo:
return cbi_container
def writeToExternalFile(self, filename, metadata):
cbi_container = self.createJSONDictionary(metadata)
@ -149,4 +142,3 @@ class ComicBookInfo:
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close

View File

@ -1,30 +1,28 @@
"""
A python class to encapsulate ComicRack's ComicInfo.xml data
"""
"""A class to encapsulate ComicRack's ComicInfo.xml data"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
#from datetime import datetime
#from pprint import pprint
#import zipfile
from genericmetadata import GenericMetadata
import utils
class ComicInfoXml:
writer_synonyms = ['writer', 'plotter', 'scripter']
@ -35,7 +33,6 @@ class ComicInfoXml:
cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist']
editor_synonyms = ['editor']
def getParseableCredits(self):
parsable_credits = []
parsable_credits.extend(self.writer_synonyms)
@ -85,6 +82,7 @@ class ComicInfoXml:
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema"
# helper func
def assign(cix_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
@ -105,7 +103,8 @@ class ComicInfoXml:
assign('Month', md.month)
assign('Day', md.day)
# need to specially process the credits, since they are structured differently than CIX
# need to specially process the credits, since they are structured
# differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
@ -114,7 +113,8 @@ class ComicInfoXml:
credit_cover_list = list()
credit_editor_list = list()
# first, loop thru credits, and build a list for each role that CIX supports
# first, loop thru credits, and build a list for each role that CIX
# supports
for credit in metadata.credits:
if credit['role'].lower() in set(self.writer_synonyms):
@ -197,7 +197,6 @@ class ComicInfoXml:
tree = ET.ElementTree(root)
return tree
def convertXMLToMetadata(self, tree):
root = tree.getroot()
@ -209,7 +208,6 @@ class ComicInfoXml:
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
@ -290,4 +288,3 @@ class ComicInfoXml:
tree = ET.parse(filename)
return self.convertXMLToMetadata(tree)

View File

@ -1,26 +1,21 @@
"""
Functions for parsing comic info from filename
"""Functions for parsing comic info from filename
This should probably be re-written, but, well, it mostly works!
"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Some portions of this code were modified from pyComicMetaThis project
# http://code.google.com/p/pycomicmetathis/
@ -29,6 +24,7 @@ import re
import os
from urllib import unquote
class FileNameParser:
def repl(self, m):
@ -43,13 +39,12 @@ class FileNameParser:
string = re.sub(ph, self.repl, string)
return string # .strip()
def getIssueCount(self, filename, issue_end):
count = ""
filename = filename[issue_end:]
# replace any name seperators with spaces
# replace any name separators with spaces
tmpstr = self.fixSpaces(filename)
found = False
@ -64,28 +59,30 @@ class FileNameParser:
count = match.group()
found = True
count = count.lstrip("0")
return count
def getIssueNumber(self, filename):
# Returns a tuple of issue number string, and start and end indexs in the filename
# (The indexes will be used to split the string up for further parsing)
"""Returns a tuple of issue number string, and start and end indexes in the filename
(The indexes will be used to split the string up for further parsing)
"""
found = False
issue = ''
start = 0
end = 0
# first, look for multiple "--", this means it's formatted differently from most:
# first, look for multiple "--", this means it's formatted differently
# from most:
if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
# the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename)
elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
# the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ")
@ -94,10 +91,11 @@ class FileNameParser:
filename = re.sub("\(.*?\)", self.repl, filename)
filename = re.sub("\[.*?\]", self.repl, filename)
# replace any name seperators with spaces
# replace any name separators with spaces
filename = self.fixSpaces(filename)
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
# remove any "of NN" phrase with spaces (problem: this could break on
# some titles)
filename = re.sub("of [\d]+", self.repl, filename)
# print u"[{0}]".format(filename)
@ -119,14 +117,15 @@ class FileNameParser:
# Now try to search for the likely issue number word in the list
# first look for a word with "#" followed by digits with optional sufix
# first look for a word with "#" followed by digits with optional suffix
# this is almost certainly the issue number
for w in reversed(word_list):
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
found = True
break
# same as above but w/o a '#', and only look at the last word in the list
# same as above but w/o a '#', and only look at the last word in the
# list
if not found:
w = word_list[-1]
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
@ -149,19 +148,20 @@ class FileNameParser:
return issue, start, end
def getSeriesName(self, filename, issue_start):
# use the issue number string index to split the filename string
"""Use the issue number string index to split the filename string"""
if issue_start != 0:
filename = filename[:issue_start]
# in case there is no issue number, remove some obvious stuff
if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
# the pattern seems to be that anything to left of the first "--"
# is the series name followed by issue
filename = re.sub("--.*", self.repl, filename)
elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
# the pattern seems to be that anything to left of the first "__"
# is the series name followed by issue
filename = re.sub("__.*", self.repl, filename)
filename = filename.replace("+", " ")
@ -218,13 +218,13 @@ class FileNameParser:
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
if match:
year = match.group()
# remove non-numerics
# remove non-digits
year = re.sub("[^0-9]", "", year)
return year
def getRemainder(self, filename, year, count, volume, issue_end):
"""Make a guess at where the the non-interesting stuff begins"""
#make a guess at where the the non-interesting stuff begins
remainder = ""
if "--" in filename:
@ -243,7 +243,9 @@ class FileNameParser:
remainder = remainder.replace("of " + count, "", 1)
remainder = remainder.replace("()", "")
remainder = remainder.replace(" "," ") # cleans some whitespace mess
remainder = remainder.replace(
" ",
" ") # cleans some whitespace mess
return remainder.strip()
@ -258,7 +260,7 @@ class FileNameParser:
# url decode, just in case
filename = unquote(filename)
# sometimes archives get messed up names from too many decodings
# sometimes archives get messed up names from too many decodes
# often url encodings will break and leave "_28" and "_29" in place
# of "(" and ")" see if there are a number of these, and replace them
if filename.count("_28") > 1 and filename.count("_29") > 1:
@ -268,14 +270,18 @@ class FileNameParser:
self.issue, issue_start, issue_end = self.getIssueNumber(filename)
self.series, self.volume = self.getSeriesName(filename, issue_start)
# provides proper value when the filename doesn't have a issue number
if issue_end == 0:
issue_end = len(self.series)
self.year = self.getYear(filename, issue_end)
self.issue_count = self.getIssueCount(filename, issue_end)
self.remainder = self.getRemainder( filename, self.year, self.issue_count, self.volume, issue_end )
self.remainder = self.getRemainder(
filename,
self.year,
self.issue_count,
self.volume,
issue_end)
if self.issue != "":
# strip off leading zeros

View File

@ -1,5 +1,4 @@
"""
A python class for internal metadata storage
"""A class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various
tagging schemes and databases, such as ComicVine or GCD. This makes conversion
@ -7,26 +6,30 @@
"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utils
# These page info classes are exactly the same as the CIX scheme, since it's unique
class PageType:
"""
These page info classes are exactly the same as the CIX scheme, since
it's unique
"""
FrontCover = "FrontCover"
InnerCover = "InnerCover"
Roundup = "Roundup"
@ -50,6 +53,7 @@ class PageInfo:
ImageHeight = 0
"""
class GenericMetadata:
def __init__(self):
@ -107,13 +111,15 @@ class GenericMetadata:
self.coverImage = None
def overlay(self, new_md):
# Overlay a metadata object on this one
# that is, when the new object has non-None
# values, over-write them to this one
"""Overlay a metadata object on this one
That is, when the new object has non-None values, over-write them
to this one.
"""
def assign(cur, new):
if new is not None:
if type(new) == str and len(new) == 0:
if isinstance(new, str) and len(new) == 0:
setattr(self, cur, None)
else:
setattr(self, cur, new)
@ -173,10 +179,9 @@ class GenericMetadata:
if len(new_md.pages) > 0:
assign("pages", new_md.pages)
def overlayCredits(self, new_credits):
for c in new_credits:
if c.has_key('primary') and c['primary']:
if 'primary' in c and c['primary']:
primary = True
else:
primary = False
@ -200,7 +205,8 @@ class GenericMetadata:
self.pages.append(page_dict)
def getArchivePageIndex(self, pagenum):
# convert the displayed page number to the page index of the file in the archive
# convert the displayed page number to the page index of the file in
# the archive
if pagenum < len(self.pages):
return int(self.pages[pagenum]['Image'])
else:
@ -239,7 +245,6 @@ class GenericMetadata:
if not found:
self.credits.append(credit)
def __str__(self):
vals = []
if self.isEmpty:
@ -297,7 +302,7 @@ class GenericMetadata:
for c in self.credits:
primary = ""
if c.has_key('primary') and c['primary']:
if 'primary' in c and c['primary']:
primary = " [P]"
add_string("credit", c['role'] + ": " + c['person'] + primary)

View File

@ -1,39 +1,32 @@
# coding=utf-8
"""
Class for handling the odd permutations of an 'issue number' that the comics industry throws at us
e.g.:
"12"
"12.1"
"0"
"-1"
"5AU"
"100-2"
"""Support for mixed digit/string type Issue field
Class for handling the odd permutations of an 'issue number' that the
comics industry throws at us.
e.g.: "12", "12.1", "0", "-1", "5AU", "100-2"
"""
"""
Copyright 2012-2014 Anthony Beville
# Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#import utils
#import math
#import re
import utils
import math
import re
class IssueString:
def __init__(self, text):
# break up the issue number string into 2 parts: the numeric and suffix string.
@ -45,7 +38,7 @@ class IssueString:
if text is None:
return
if type(text) == int:
if isinstance(text, int):
text = str(text)
if len(text) == 0:
@ -61,7 +54,8 @@ class IssueString:
# if it's still not numeric at start skip it
if text[start].isdigit() or text[start] == ".":
# walk through the string, look for split point (the first non-numeric)
# walk through the string, look for split point (the first
# non-numeric)
decimal_count = 0
for idx in range(start, len(text)):
if text[idx] not in "0123456789.":
@ -79,7 +73,8 @@ class IssueString:
if text[idx - 1] == "." and len(text) != idx:
idx = idx - 1
# if there is no numeric after the minus, make the minus part of the suffix
# if there is no numeric after the minus, make the minus part of
# the suffix
if idx == 1 and start == 1:
idx = 0
@ -136,5 +131,3 @@ class IssueString:
if self.num is None:
return None
return int(self.num)

View File

@ -1,25 +1,20 @@
# coding=utf-8
"""Some generic utilities"""
"""
Some generic utilities
"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
"""
Copyright 2012-2014 Anthony Beville
# http://www.apache.org/licenses/LICENSE-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
import os
import re
@ -31,12 +26,14 @@ import codecs
class UtilsVars:
already_fixed_encoding = False
def get_actual_preferred_encoding():
preferred_encoding = locale.getpreferredencoding()
if platform.system() == "Darwin":
preferred_encoding = "utf-8"
return preferred_encoding
def fix_output_encoding():
if not UtilsVars.already_fixed_encoding:
# this reads the environment and inits the right locale
@ -48,28 +45,28 @@ def fix_output_encoding( ):
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
UtilsVars.already_fixed_encoding = True
def get_recursive_filelist(pathlist):
"""
Get a recursive list of of all files under all path items in the list
"""
"""Get a recursive list of of all files under all path items in the list"""
filename_encoding = sys.getfilesystemencoding()
filelist = []
for p in pathlist:
# if path is a folder, walk it recursivly, and all files underneath
if type(p) == str:
# if path is a folder, walk it recursively, and all files underneath
if isinstance(p, str):
# make sure string is unicode
p = p.decode(filename_encoding) # , 'replace')
elif type(p) != unicode:
elif not isinstance(p, unicode):
# it's probably a QString
p = unicode(p)
if os.path.isdir(p):
for root, dirs, files in os.walk(p):
for f in files:
if type(f) == str:
if isinstance(f, str):
# make sure string is unicode
f = f.decode(filename_encoding, 'replace')
elif type(f) != unicode:
elif not isinstance(f, unicode):
# it's probably a QString
f = unicode(f)
filelist.append(os.path.join(root, f))
@ -78,6 +75,7 @@ def get_recursive_filelist( pathlist ):
return filelist
def listToString(l):
string = ""
if l is not None:
@ -87,19 +85,23 @@ def listToString( l ):
string += item
return string
def addtopath(dirname):
if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep)
pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format(
dir=tmpdirname,
sep=os.pathsep)
match = re.search(pattern, os.environ['PATH'])
if not match:
os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH']
# returns executable path, if it exists
def which(program):
"""Returns path of the executable, if it exists"""
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
@ -116,6 +118,7 @@ def which(program):
return None
def removearticles(text):
text = text.lower()
articles = ['and', 'the', 'a', '&', 'issue']
@ -131,8 +134,8 @@ def removearticles( text ):
newText = newText.replace(",", "")
newText = newText.replace("-", " ")
# since the CV api changed, searches for series names with periods
# now explicity require the period to be in the search key,
# since the CV API changed, searches for series names with periods
# now explicitly require the period to be in the search key,
# so the line below is removed (for now)
#newText = newText.replace(".", "")
@ -141,17 +144,19 @@ def removearticles( text ):
def unique_file(file_name):
counter = 1
file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext')
while 1:
# returns ('/path/file', '.ext')
file_name_parts = os.path.splitext(file_name)
while True:
if not os.path.lexists(file_name):
return file_name
file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1]
file_name = file_name_parts[
0] + ' (' + str(counter) + ')' + file_name_parts[1]
counter += 1
# -o- coding: utf-8 -o-
# ISO639 python dict
# oficial list in http://www.loc.gov/standards/iso639-2/php/code_list.php
# official list in http://www.loc.gov/standards/iso639-2/php/code_list.php
lang_dict = {
'ab': 'Abkhaz',
@ -576,22 +581,12 @@ countries = [
]
def getLanguageDict():
return lang_dict
def getLanguageFromISO(iso):
if iso == None:
if iso is None:
return None
else:
return lang_dict[iso]