Integrated cbt/tar file support

Added logging capability
Code cosmetics
This commit is contained in:
Ozzieisaacs 2020-05-23 16:09:10 +02:00
parent 15dff9ce4e
commit 3e15b950b7
11 changed files with 327 additions and 239 deletions

View File

@ -2,4 +2,4 @@
comicapi originates [here](https://github.com/davide-romanini/comicapi), was integrated into [ComicStreamer](https://github.com/davide-romanini/ComicStreamer), was modified in [this fork](https://github.com/kounch/ComicStreamer), and has now been extracted and packaged by yours truly (Iris W).
# Installation
you can use pip to install this. cbr support is off by default—you'll need to do `pip install unrar` as well as having libunrar.so available.
you can use pip to install this. cbr support is off by default—you'll need to do `pip install rarfile` as well as having "unrar" available.

View File

@ -1 +1,3 @@
__author__ = 'dromanin'
__version__ = '2.1.1'

26
comicapi/comet.py Executable file → Normal file
View File

@ -1,5 +1,5 @@
"""
A python class to encapsulate CoMet data
A python class to encapsulate CoMet data
Copyright 2012-2014 Anthony Beville
@ -7,7 +7,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -16,9 +16,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
@ -64,7 +61,7 @@ class CoMet:
def convertMetadataToXML(self, filename, metadata):
#shorthand for the metadata
# shorthand for the metadata
md = metadata
# build a tree structure
@ -74,7 +71,7 @@ class CoMet:
root.attrib[
'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd"
#helper func
# helper func
def assign(comet_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry)
@ -84,7 +81,7 @@ class CoMet:
md.title = ""
assign('title', md.title)
assign('series', md.series)
assign('issue', md.issue) #must be int??
assign('issue', md.issue) # must be int??
assign('volume', md.volume)
assign('description', md.comments)
assign('publisher', md.publisher)
@ -116,15 +113,6 @@ class CoMet:
assign('coverImage', md.coverImage)
# need to specially process the credits, since they are structured differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
@ -169,7 +157,6 @@ class CoMet:
if root.tag != 'comet':
raise KeyError("Not a comet XML!")
#return None
metadata = GenericMetadata()
md = metadata
@ -234,7 +221,7 @@ class CoMet:
return metadata
#verify that the string actually contains CoMet data in XML format
# verify that the string actually contains CoMet data in XML format
def validateString(self, string):
try:
tree = ET.ElementTree(ET.fromstring(string))
@ -249,7 +236,6 @@ class CoMet:
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):

380
comicapi/comicarchive.py Executable file → Normal file
View File

@ -17,84 +17,36 @@ limitations under the License.
"""
import zipfile
import tarfile
import os
import struct
import sys
import tempfile
import subprocess
import platform
import locale
import shutil
import logging
from natsort import natsorted
try:
import rarfile
#from unrar import unrarlib
#import unrar.constants
#from unrar import constants
rarsupport = True
except ImportError:
rarsupport = False
import ctypes
import io
'''if rarsupport:
class OpenableRarFile(rarfile.RarFile):
def open(self, member):
#print "opening %s..." % member
# based on https://github.com/matiasb/python-unrar/pull/4/files
if isinstance(member, rarfile.RarInfo):
member = member.filename
archive = unrarlib.RAROpenArchiveDataEx(
self.filename, mode=constants.RAR_OM_EXTRACT)
handle = self._open(archive)
found, buf = False, []
def _callback(msg, UserData, P1, P2):
if msg == constants.UCM_PROCESSDATA:
data = (ctypes.c_char * P2).from_address(P1).raw
buf.append(data)
return 1
c_callback = unrarlib.UNRARCALLBACK(_callback)
unrarlib.RARSetCallback(handle, c_callback, 1)
try:
rarinfo = self._read_header(handle)
while rarinfo is not None:
#print "checking rar archive %s against %s" % (rarinfo.filename, member)
if rarinfo.filename == member:
self._process_current(handle, constants.RAR_TEST)
found = True
else:
self._process_current(handle, constants.RAR_SKIP)
rarinfo = self._read_header(handle)
except unrarlib.UnrarException:
raise rarfile.BadRarFile("Bad RAR archive data.")
finally:
self._close(handle)
if not found:
raise KeyError('There is no item named %r in the archive' % member)
return b''.join(buf)'''
# if platform.system() == "Windows":
# import _subprocess
import time
from io import StringIO
from io import BytesIO
try:
from PIL import Image
pil_available = True
except ImportError:
pil_available = False
sys.path.insert(0, os.path.abspath("."))
#import UnRAR2
#from UnRAR2.rar_exceptions import *
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
sys.path.insert(0, os.path.abspath("."))
#from settings import ComicTaggerSettings
from comicapi.comicinfoxml import ComicInfoXml
from comicapi.comicbookinfo import ComicBookInfo
from comicapi.comet import CoMet
@ -124,7 +76,6 @@ class ZipArchiver:
return self.writeZipComment(self.path, comment)
def readArchiveFile(self, archive_file):
data = ""
zf = zipfile.ZipFile(self.path, 'r')
try:
@ -132,14 +83,14 @@ class ZipArchiver:
except zipfile.BadZipfile as e:
errMsg = u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
zf.close()
raise IOError
except Exception as e:
zf.close()
errMsg = u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
raise IOError
finally:
zf.close()
@ -160,7 +111,7 @@ class ZipArchiver:
try:
self.rebuildZipFile([archive_file])
#now just add the archive file as a new one
# now just add the archive file as a new one
zf = zipfile.ZipFile(
self.path, mode='a', compression=zipfile.ZIP_DEFLATED)
zf.writestr(archive_file, data)
@ -178,14 +129,14 @@ class ZipArchiver:
except Exception as e:
errMsg = u"Unable to get zipfile list [{0}]: {1}".format(
e, self.path)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return []
# zip helper func
def rebuildZipFile(self, exclude_list):
# this recompresses the zip archive, without the files in the exclude_list
#errMsg=u"Rebuilding zip {0} without {1}".format( self.path, exclude_list )
# errMsg=u"Rebuilding zip {0} without {1}".format( self.path, exclude_list )
# generate temp file
tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path))
@ -195,10 +146,10 @@ class ZipArchiver:
zout = zipfile.ZipFile(tmp_name, 'w')
for item in zin.infolist():
buffer = zin.read(item.filename)
if (item.filename not in exclude_list):
if item.filename not in exclude_list:
zout.writestr(item, buffer)
#preserve the old comment
# preserve the old comment
zout.comment = zin.comment
zout.close()
@ -218,14 +169,13 @@ class ZipArchiver:
see: http://en.wikipedia.org/wiki/Zip_(file_format)#Structure
"""
#get file size
statinfo = os.stat(filename)
file_length = statinfo.st_size
try:
fo = open(filename, "r+b")
#the starting position, relative to EOF
# the starting position, relative to EOF
pos = -4
found = False
@ -238,13 +188,13 @@ class ZipArchiver:
value = fo.read(4)
#look for the end of central directory signature
# look for the end of central directory signature
if bytearray(value) == bytearray([0x50, 0x4b, 0x05, 0x06]):
found = True
else:
# not found, step back another byte
pos = pos - 1
#print pos,"{1} int: {0:x}".format(bytearray(value)[0], value)
# print pos,"{1} int: {0:x}".format(bytearray(value)[0], value)
if found:
@ -282,22 +232,200 @@ class ZipArchiver:
zout.writestr(fname, data)
zout.close()
#preserve the old comment
# preserve the old comment
comment = otherArchive.getArchiveComment()
if comment is not None:
if not self.writeZipComment(self.path, comment):
return False
except Exception as e:
errMsg = u"Error while copying to {0}: {1}".format(self.path, e)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return False
else:
return True
#------------------------------------------
class TarArchiver:
def __init__(self, path):
self.path = path
def getArchiveComment(self):
tf = tarfile.TarFile(self.path, 'r')
comment = tf.comment
tf.close()
return comment
def setArchiveComment(self, comment):
return self.writeTarComment(self.path, comment)
def readArchiveFile(self, archive_file):
tf = tarfile.TarFile(self.path, 'r')
try:
data = tf.extractfile(archive_file).read()
except tarfile.TarError as e:
errMsg = u"bad tarfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
logger.info(errMsg)
tf.close()
raise IOError
except Exception as e:
tf.close()
errMsg = u"bad tarfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
logger.info(errMsg)
raise IOError
finally:
tf.close()
return data
def removeArchiveFile(self, archive_file):
try:
self.rebuildTarFile([archive_file])
except:
return False
else:
return True
def writeArchiveFile(self, archive_file, data):
# At the moment, no other option but to rebuild the whole
# zip archive w/o the indicated file. Very sucky, but maybe
# another solution can be found
try:
self.rebuildTarFile([archive_file])
# now just add the archive file as a new one
tf = tarfile.Tarfile(
self.path, mode='a')
tf.writestr(archive_file, data)
tf.close()
return True
except:
return False
def getArchiveFilenameList(self):
try:
tf = tarfile.TarFile(self.path, 'r')
namelist = tf.getnames()
tf.close()
return namelist
except Exception as e:
errMsg = u"Unable to get tarfile list [{0}]: {1}".format(
e, self.path)
logger.info(errMsg)
return []
# zip helper func
def rebuildTarFile(self, exclude_list):
# this recompresses the zip archive, without the files in the exclude_list
# errMsg=u"Rebuilding zip {0} without {1}".format( self.path, exclude_list )
# generate temp file
tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path))
os.close(tmp_fd)
tin = tarfile.TarFile(self.path, 'r')
tout = tarfile.TarFile(tmp_name, 'w')
for item in tin.infolist():
buffer = tin.read(item.filename)
if (item.filename not in exclude_list):
tout.writestr(item, buffer)
# preserve the old comment
tout.comment = tin.comment
tout.close()
tin.close()
# replace with the new file
os.remove(self.path)
os.rename(tmp_name, self.path)
def writeTarComment(self, filename, comment):
"""
This is a custom function for writing a comment to a tar file,
since the built-in one doesn't seem to work on Windows and Mac OS/X
"""
statinfo = os.stat(filename)
file_length = statinfo.st_size
try:
fo = open(filename, "r+b")
# the starting position, relative to EOF
pos = -4
found = False
value = bytearray()
# walk backwards to find the "End of Central Directory" record
while (not found) and (-pos != file_length):
# seek, relative to EOF
fo.seek(pos, 2)
value = fo.read(4)
# look for the end of central directory signature
if bytearray(value) == bytearray([0x50, 0x4b, 0x05, 0x06]):
found = True
else:
# not found, step back another byte
pos = pos - 1
if found:
# now skip forward 20 bytes to the comment length word
pos += 20
fo.seek(pos, 2)
# Pack the length of the comment string
format = "H" # one 2-byte integer
comment_length = struct.pack(
format, len(comment)) # pack integer in a binary string
# write out the length
fo.write(comment_length)
fo.seek(pos + 2, 2)
# write out the comment itself
fo.write(comment)
fo.truncate()
fo.close()
else:
raise Exception('Failed to write comment to tar file!')
except:
return False
else:
return True
def copyFromArchive(self, otherArchive):
# Replace the current zip with one copied from another archive
try:
tout = tarfile.TarFile(self.path, 'w')
for fname in otherArchive.getArchiveFilenameList():
data = otherArchive.readArchiveFile(fname)
if data is not None:
tout.writestr(fname, data)
tout.close()
# preserve the old comment
comment = otherArchive.getArchiveComment()
if comment is not None:
if not self.writeTarComment(self.path, comment):
return False
except Exception as e:
errMsg = u"Error while copying to {0}: {1}".format(self.path, e)
logger.info(errMsg)
return False
else:
return True
# ------------------------------------------
# RAR implementation
if rarsupport:
class RarArchiver:
@ -319,7 +447,7 @@ if rarsupport:
self.startupinfo = None
def __del__(self):
#RarArchiver.devnull.close()
# RarArchiver.devnull.close()
pass
def getArchiveComment(self):
@ -363,8 +491,6 @@ if rarsupport:
# Make sure to escape brackets, since some funky stuff is going on
# underneath with "fnmatch"
#archive_file = archive_file.replace("[", '[[]')
entries = []
rarc = self.getRARObj()
@ -376,30 +502,30 @@ if rarsupport:
entries = [(rarc.getinfo(archive_file), data)]
if entries[0][0].file_size != len(entries[0][1]):
errMsg = u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format(
entries[0][0].file_size, len(entries[0][1]), self.path,
archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
errMsg = u"readArchiveFile(): " \
u"[file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]\n".format(
entries[0][0].file_size, len(entries[0][1]), self.path, archive_file, tries)
logger.info(errMsg)
continue
except (OSError, IOError) as e:
errMsg = u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
except Exception as e:
errMsg = u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
break
else:
#Success"
#entries is a list of of tuples: ( rarinfo, filedata)
# Success"
# entries is a list of of tuples: ( rarinfo, filedata)
if tries > 1:
errMsg = u"Attempted read_files() {0} times".format(tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
if (len(entries) == 1):
logger.info(errMsg)
if len(entries) == 1:
return entries[0][1]
else:
raise IOError
@ -463,14 +589,14 @@ if rarsupport:
def getArchiveFilenameList(self):
rarc = self.getRARObj()
#namelist = [ item.filename for item in rarc.infolist() ]
#return namelist
# namelist = [ item.filename for item in rarc.infolist() ]
# return namelist
tries = 0
while tries < 7:
try:
tries = tries + 1
#namelist = [ item.filename for item in rarc.infolist() ]
# namelist = [ item.filename for item in rarc.infolist() ]
namelist = []
for item in rarc.infolist():
if item.file_size != 0:
@ -479,11 +605,11 @@ if rarsupport:
except (OSError, IOError) as e:
errMsg = u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
else:
#Success"
# Success"
return namelist
raise e
@ -493,24 +619,22 @@ if rarsupport:
while tries < 7:
try:
tries = tries + 1
#rarc = UnRAR2.RarFile( self.path )
rarc = rarfile.RarFile(self.path)
#rarc = OpenableRarFile(self.path)
except (OSError, IOError) as e:
errMsg = u"getRARObj(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
else:
#Success"
# Success"
return rarc
raise e
#------------------------------------------
# ------------------------------------------
# Folder implementation
class FolderArchiver:
def __init__(self, path):
@ -531,7 +655,7 @@ class FolderArchiver:
with open(fname, 'rb') as f:
data = f.read()
f.close()
except IOError as e:
except IOError:
pass
return data
@ -573,7 +697,7 @@ class FolderArchiver:
return itemlist
#------------------------------------------
# ------------------------------------------
# Unknown implementation
class UnknownArchiver:
def __init__(self, path):
@ -628,13 +752,13 @@ class PdfArchiver:
return out
#------------------------------------------------------------------
# ------------------------------------------------------------------
class ComicArchive:
logo_data = None
class ArchiveType:
Zip, Rar, Folder, Pdf, Unknown = range(5)
Zip, Rar, Tar, Folder, Pdf, Unknown = range(6)
def __init__(self, path, rar_exe_path=None, default_image_path=None):
self.path = path
@ -665,6 +789,10 @@ class ComicArchive:
self.archive_type = self.ArchiveType.Zip
self.archiver = ZipArchiver(self.path)
if self.tarTest():
self.archive_type = self.ArchiveType.Tar
self.archiver = TarArchiver(self.path)
elif self.rarTest():
self.archive_type = self.ArchiveType.Rar
self.archiver = RarArchiver(
@ -674,7 +802,6 @@ class ComicArchive:
self.archiver = PdfArchiver(self.path)
if ComicArchive.logo_data is None and self.default_image_path:
#fname = ComicTaggerSettings.getGraphic('nocover.png')
fname = self.default_image_path
with open(fname, 'rb') as fd:
ComicArchive.logo_data = fd.read()
@ -702,6 +829,9 @@ class ComicArchive:
def zipTest(self):
return zipfile.is_zipfile(self.path)
def tarTest(self):
return tarfile.is_tarfile(self.path)
def rarTest(self):
try:
rarc = rarfile.RarFile(self.path)
@ -713,6 +843,9 @@ class ComicArchive:
def isZip(self):
return self.archive_type == self.ArchiveType.Zip
def isTar(self):
return self.archive_type == self.ArchiveType.Tar
def isRar(self):
return self.archive_type == self.ArchiveType.Rar
@ -748,11 +881,8 @@ class ComicArchive:
def seemsToBeAComicArchive(self):
# Do we even care about extensions??
ext = os.path.splitext(self.path)[1].lower()
if ((self.isZip() or self.isRar() or self.isPdf()
) #or self.isFolder() )
if ((self.isZip() or self.isRar() or self.isPdf() or self.isTar()
)
and (self.getNumberOfPages() > 0)):
return True
else:
@ -811,8 +941,8 @@ class ComicArchive:
try:
image_data = self.archiver.readArchiveFile(filename)
except IOError:
errMsg = u"Error reading in page. Substituting logo page."
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
errMsg = u"Error reading in page. Substituting logo page."
logger.info(errMsg)
image_data = ComicArchive.logo_data
return image_data
@ -834,11 +964,11 @@ class ComicArchive:
scanner_page_index = None
#make a guess at the scanner page
# make a guess at the scanner page
name_list = self.getPageNameList()
count = self.getNumberOfPages()
#too few pages to really know
# too few pages to really know
if count < 5:
return None
@ -870,7 +1000,7 @@ class ComicArchive:
prefix = os.path.commonprefix(common_length_list)
if mode_length <= 7 and prefix == "":
#probably all numbers
# probably all numbers
if len(final_name) > mode_length:
scanner_page_index = count - 1
@ -890,13 +1020,13 @@ class ComicArchive:
if sort_list:
def keyfunc(k):
#hack to account for some weird scanner ID pages
#basename=os.path.split(k)[1]
#if basename < '0':
# hack to account for some weird scanner ID pages
# basename=os.path.split(k)[1]
# if basename < '0':
# k = os.path.join(os.path.split(k)[0], "z" + basename)
return k.lower()
files = natsorted(files, key=keyfunc) #, signed=False)
files = natsorted(files, key=keyfunc) #, signed=False)
# make a sub-list of image files
self.page_list = []
@ -927,15 +1057,15 @@ class ComicArchive:
return self.cbi_md
def readRawCBI(self):
if (not self.hasCBI()):
if not self.hasCBI():
return None
return self.archiver.getArchiveComment()
else:
return self.archiver.getArchiveComment()
def hasCBI(self):
if self.has_cbi is None:
#if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ):
# if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ):
if not self.seemsToBeAComicArchive():
self.has_cbi = False
else:
@ -975,7 +1105,7 @@ class ComicArchive:
else:
self.cix_md = ComicInfoXml().metadataFromString(raw_cix)
#validate the existing page list (make sure count is correct)
# validate the existing page list (make sure count is correct)
if len(self.cix_md.pages) != 0:
if len(self.cix_md.pages) != self.getNumberOfPages():
# pages array doesn't match the actual number of images we're seeing
@ -1044,7 +1174,7 @@ class ComicArchive:
self.comet_md = CoMet().metadataFromString(raw_comet)
self.comet_md.setDefaultPageList(self.getNumberOfPages())
#use the coverImage value from the comet_data to mark the cover in this struct
# use the coverImage value from the comet_data to mark the cover in this struct
# walk through list of images in file, and find the matching one for md.coverImage
# need to remove the existing one in the default
if self.comet_md.coverImage is not None:
@ -1063,14 +1193,14 @@ class ComicArchive:
def readRawCoMet(self):
if not self.hasCoMet():
errMsg = u"{} doesn't have CoMet data!".format(self.path)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return None
try:
raw_comet = self.archiver.readArchiveFile(self.comet_filename)
except IOError:
errMsg = u"Error reading in raw CoMet!"
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
raw_comet = ""
return raw_comet
@ -1114,7 +1244,7 @@ class ComicArchive:
if not self.seemsToBeAComicArchive():
return self.has_comet
#look at all xml files in root, and search for CoMet data, get first
# look at all xml files in root, and search for CoMet data, get first
for n in self.archiver.getArchiveFilenameList():
if (os.path.dirname(n) == ""
and os.path.splitext(n)[1].lower() == '.xml'):
@ -1124,7 +1254,7 @@ class ComicArchive:
except:
data = ""
errMsg = u"Error reading in Comet XML for validation!"
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
if CoMet().validateString(data):
# since we found it, save it!
self.comet_filename = n
@ -1182,11 +1312,3 @@ class ComicArchive:
metadata.isEmpty = False
return metadata
def exportAsZip(self, zipfilename):
if self.archive_type == self.ArchiveType.Zip:
# nothing to do, we're already a zip
return True
zip_archiver = ZipArchiver(zipfilename)
return zip_archiver.copyFromArchive(self.archiver)

23
comicapi/comicbookinfo.py Executable file → Normal file
View File

@ -1,5 +1,5 @@
"""
A python class to encapsulate the ComicBookInfo data
A python class to encapsulate the ComicBookInfo data
Copyright 2012-2014 Anthony Beville
@ -7,7 +7,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -18,13 +18,10 @@ limitations under the License.
import json
from datetime import datetime
import zipfile
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
#import ctversion
class ComicBookInfo:
def metadataFromString(self, string):
@ -35,7 +32,7 @@ class ComicBookInfo:
cbi = cbi_container['ComicBookInfo/1.0']
#helper func
# helper func
# If item is not in CBI, return None
def xlate(cbi_entry):
if cbi_entry in cbi:
@ -66,7 +63,7 @@ class ComicBookInfo:
if metadata.tags is None:
metadata.tags = []
#need to massage the language string to be ISO
# need to massage the language string to be ISO
if metadata.language is not None:
# reverse look-up
pattern = metadata.language
@ -86,7 +83,7 @@ class ComicBookInfo:
cbi_container = self.createJSONDictionary(metadata)
return json.dumps(cbi_container)
#verify that the string actually contains CBI data in JSON format
# verify that the string actually contains CBI data in JSON format
def validateString(self, string):
try:
@ -94,24 +91,24 @@ class ComicBookInfo:
except:
return False
return ('ComicBookInfo/1.0' in cbi_container)
return 'ComicBookInfo/1.0' in cbi_container
def createJSONDictionary(self, metadata):
# Create the dictionary that we will convert to JSON text
cbi = dict()
cbi_container = {
'appID': 'ComicTagger/' + '1.0.0', #ctversion.version,
'appID': 'ComicTagger/' + '1.0.0', # ctversion.version,
'lastModified': str(datetime.now()),
'ComicBookInfo/1.0': cbi
}
#helper func
# helper func
def assign(cbi_entry, md_entry):
if md_entry is not None:
cbi[cbi_entry] = md_entry
#helper func
# helper func
def toInt(s):
i = None
if type(s) in [str, int]:
@ -147,4 +144,4 @@ class ComicBookInfo:
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close
f.close()

14
comicapi/comicinfoxml.py Executable file → Normal file
View File

@ -1,5 +1,5 @@
"""
A python class to encapsulate ComicRack's ComicInfo.xml data
A python class to encapsulate ComicRack's ComicInfo.xml data
Copyright 2012-2014 Anthony Beville
@ -7,7 +7,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -16,9 +16,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
@ -75,7 +72,7 @@ class ComicInfoXml:
def convertMetadataToXML(self, filename, metadata):
#shorthand for the metadata
# shorthand for the metadata
md = metadata
# build a tree structure
@ -83,7 +80,7 @@ class ComicInfoXml:
root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema"
#helper func
# helper func
def assign(cix_entry, md_entry):
if md_entry is not None:
ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry)
@ -267,7 +264,7 @@ class ComicInfoXml:
if pages_node is not None:
for page in pages_node:
metadata.pages.append(page.attrib)
#print page.attrib
# print page.attrib
metadata.isEmpty = False
@ -276,7 +273,6 @@ class ComicInfoXml:
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):

19
comicapi/filenameparser.py Executable file → Normal file
View File

@ -1,5 +1,5 @@
"""
Functions for parsing comic info from filename
Functions for parsing comic info from filename
This should probably be re-written, but, well, it mostly works!
@ -9,7 +9,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -37,7 +37,7 @@ class FileNameParser:
placeholders = ['[_]', ' +']
for ph in placeholders:
string = re.sub(ph, self.repl, string)
return string #.strip()
return string
def getIssueCount(self, filename, issue_end):
@ -57,7 +57,6 @@ class FileNameParser:
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
if match:
count = match.group()
found = True
count = count.lstrip("0")
@ -94,8 +93,6 @@ class FileNameParser:
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
filename = re.sub("of [\d]+", self.repl, filename)
#print u"[{0}]".format(filename)
# we should now have a cleaned up filename version with all the words in
# the same positions as original filename
@ -108,7 +105,7 @@ class FileNameParser:
if len(word_list) > 1:
word_list = word_list[1:]
else:
#only one word?? just bail.
# only one word?? just bail.
return issue, start, end
# Now try to search for the likely issue number word in the list
@ -164,7 +161,7 @@ class FileNameParser:
series = tmpstr
volume = ""
#save the last word
# save the last word
try:
last_word = series.split()[-1]
except:
@ -182,7 +179,7 @@ class FileNameParser:
# if a volume wasn't found, see if the last word is a year in parentheses
# since that's a common way to designate the volume
if volume == "":
#match either (YEAR), (YEAR-), or (YEAR-YEAR2)
# match either (YEAR), (YEAR-), or (YEAR-YEAR2)
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
if match:
volume = match.group(2)
@ -218,7 +215,7 @@ class FileNameParser:
def getRemainder(self, filename, year, count, issue_end):
#make a guess at where the the non-interesting stuff begins
# make a guess at where the the non-interesting stuff begins
remainder = ""
if "--" in filename:
@ -246,7 +243,7 @@ class FileNameParser:
# remove the extension
filename = os.path.splitext(filename)[0]
#url decode, just in case
# url decode, just in case
filename = unquote(filename)
# sometimes archives get messed up names from too many decodings

26
comicapi/genericmetadata.py Executable file → Normal file
View File

@ -1,17 +1,17 @@
"""
A python class for internal metadata storage
The goal of this class is to handle ALL the data that might come from various
tagging schemes and databases, such as ComicVine or GCD. This makes conversion
tagging schemes and databases, such as ComicVine or GCD. This makes conversion
possible, however lossy it might be
Copyright 2012-2014 Anthony Beville
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -38,18 +38,6 @@ class PageType:
Deleted = "Deleted"
"""
class PageInfo:
Image = 0
Type = PageType.Story
DoublePage = False
ImageSize = 0
Key = ""
ImageWidth = 0
ImageHeight = 0
"""
class GenericMetadata:
def __init__(self):
@ -174,8 +162,8 @@ class GenericMetadata:
def overlayCredits(self, new_credits):
for c in new_credits:
if 'primary' in c:
# if c.has_key('primary') and c['primary']:
if 'primary' in c:
primary = True
else:
primary = False
@ -295,8 +283,8 @@ class GenericMetadata:
for c in self.credits:
primary = ""
if 'primary' in c:
# if c.has_key('primary') and c['primary']:
if 'primary' in c:
primary = " [P]"
add_string("credit", c['role'] + ": " + c['person'] + primary)
@ -306,7 +294,7 @@ class GenericMetadata:
flen = max(flen, len(i[0]))
flen += 1
#format the data nicely
# format the data nicely
outstr = ""
fmt_str = u"{0: <" + str(flen) + "} {1}\n"
for i in vals:

16
comicapi/issuestring.py Executable file → Normal file
View File

@ -17,7 +17,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -26,10 +26,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import comicapi.utils
import math
import re
class IssueString:
def __init__(self, text):
@ -49,7 +45,7 @@ class IssueString:
if len(text) == 0:
return
#skip the minus sign if it's first
# skip the minus sign if it's first
if text[0] == '-':
start = 1
else:
@ -88,10 +84,8 @@ class IssueString:
else:
self.suffix = text
#print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString(self, pad=0):
#return the float, left side zero-padded, with suffix attached
# return the float, left side zero-padded, with suffix attached
if self.num is None:
return self.suffix
@ -119,7 +113,7 @@ class IssueString:
return num_s
def asFloat(self):
#return the float, with no suffix
# return the float, with no suffix
if self.suffix == u"½":
if self.num is not None:
return self.num + .5
@ -128,7 +122,7 @@ class IssueString:
return self.num
def asInt(self):
#return the int version of the float
# return the int version of the float
if self.num is None:
return None
return int(self.num)

54
comicapi/utils.py Executable file → Normal file
View File

@ -9,7 +9,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
@ -28,13 +28,15 @@ import codecs
class UtilsVars:
already_fixed_encoding = False
def get_actual_preferred_encoding():
preferred_encoding = locale.getpreferredencoding()
if platform.system() == "Darwin":
preferred_encoding = "utf-8"
return preferred_encoding
def fix_output_encoding( ):
def fix_output_encoding():
if not UtilsVars.already_fixed_encoding:
# this reads the environment and inits the right locale
locale.setlocale(locale.LC_ALL, "")
@ -45,37 +47,39 @@ def fix_output_encoding( ):
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
UtilsVars.already_fixed_encoding = True
def get_recursive_filelist( pathlist ):
def get_recursive_filelist(pathlist):
"""
Get a recursive list of of all files under all path items in the list
"""
Get a recursive list of of all files under all path items in the list
"""
filename_encoding = sys.getfilesystemencoding()
filelist = []
for p in pathlist:
# if path is a folder, walk it recursivly, and all files underneath
if type(p) == str:
#make sure string is unicode
p = p.decode(filename_encoding) #, 'replace')
# make sure string is unicode
p = p.decode(filename_encoding)
elif type(p) != str:
#it's probably a QString
# it's probably a QString
p = str(p)
if os.path.isdir( p ):
for root,dirs,files in os.walk( p ):
if os.path.isdir(p):
for root, dirs, files in os.walk(p):
for f in files:
if type(f) == str:
#make sure string is unicode
# make sure string is unicode
f = f.decode(filename_encoding, 'replace')
elif type(f) != str:
#it's probably a QString
# it's probably a QString
f = str(f)
filelist.append(os.path.join(root,f))
filelist.append(os.path.join(root, f))
else:
filelist.append(p)
return filelist
def listToString( l ):
def listToString(l):
string = ""
if l is not None:
for item in l:
@ -84,17 +88,19 @@ def listToString( l ):
string += item
return string
def addtopath( dirname ):
def addtopath(dirname):
if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep)
pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format(dir=tmpdirname, sep=os.pathsep)
match = re.search(pattern, os.environ['PATH'])
if not match:
os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH']
# returns executable path, if it exists
def which(program):
@ -113,9 +119,10 @@ def which(program):
return None
def removearticles( text ):
def removearticles(text):
text = text.lower()
articles = ['and', 'the', 'a', '&', 'issue' ]
articles = ['and', 'the', 'a', '&', 'issue']
newText = ''
for word in text.split(' '):
if word not in articles:
@ -131,16 +138,15 @@ def removearticles( text ):
# since the CV api changed, searches for series names with periods
# now explicity require the period to be in the search key,
# so the line below is removed (for now)
#newText = newText.replace(".", "")
return newText
def unique_file(file_name):
counter = 1
file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext')
file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext')
while 1:
if not os.path.lexists( file_name):
if not os.path.lexists(file_name):
return file_name
file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1]
counter += 1
@ -573,12 +579,12 @@ countries = [
]
def getLanguageDict():
return lang_dict
def getLanguageFromISO( iso ):
def getLanguageFromISO(iso):
if iso == None:
return None
else:
return lang_dict[ iso ]
return lang_dict[iso]

View File

@ -1,8 +1,8 @@
from setuptools import setup
setup(
name = 'comicapi',
version = '2.1',
description = 'Comic archive (cbr/cbz) and metadata utilities. Extracted from the comictagger project.',
version = '2.1.1',
description = 'Comic archive (cbr/cbz/cbt) and metadata utilities. Extracted from the comictagger project.',
author = 'Iris W',
packages = ['comicapi'],
install_requires = ['natsort>=3.5.2'],