Integrated cbt/tar file support

Added logging capability
Code cosmetics
This commit is contained in:
Ozzieisaacs 2020-05-23 16:09:10 +02:00
parent 15dff9ce4e
commit 3e15b950b7
11 changed files with 327 additions and 239 deletions

View File

@ -2,4 +2,4 @@
comicapi originates [here](https://github.com/davide-romanini/comicapi), was integrated into [ComicStreamer](https://github.com/davide-romanini/ComicStreamer), was modified in [this fork](https://github.com/kounch/ComicStreamer), and has now been extracted and packaged by yours truly (Iris W).
# Installation
you can use pip to install this. cbr support is off by default—you'll need to do `pip install unrar` as well as having libunrar.so available.
you can use pip to install this. cbr support is off by default—you'll need to do `pip install rarfile` as well as having "unrar" available.

View File

@ -1 +1,3 @@
__author__ = 'dromanin'
__version__ = '2.1.1'

14
comicapi/comet.py Executable file → Normal file
View File

@ -16,9 +16,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
@ -116,15 +113,6 @@ class CoMet:
assign('coverImage', md.coverImage)
# need to specially process the credits, since they are structured differently than CIX
credit_writer_list = list()
credit_penciller_list = list()
credit_inker_list = list()
credit_colorist_list = list()
credit_letterer_list = list()
credit_cover_list = list()
credit_editor_list = list()
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
@ -169,7 +157,6 @@ class CoMet:
if root.tag != 'comet':
raise KeyError("Not a comet XML!")
#return None
metadata = GenericMetadata()
md = metadata
@ -249,7 +236,6 @@ class CoMet:
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):

316
comicapi/comicarchive.py Executable file → Normal file
View File

@ -17,84 +17,36 @@ limitations under the License.
"""
import zipfile
import tarfile
import os
import struct
import sys
import tempfile
import subprocess
import platform
import locale
import shutil
import logging
from natsort import natsorted
try:
import rarfile
#from unrar import unrarlib
#import unrar.constants
#from unrar import constants
rarsupport = True
except ImportError:
rarsupport = False
import ctypes
import io
'''if rarsupport:
class OpenableRarFile(rarfile.RarFile):
def open(self, member):
#print "opening %s..." % member
# based on https://github.com/matiasb/python-unrar/pull/4/files
if isinstance(member, rarfile.RarInfo):
member = member.filename
archive = unrarlib.RAROpenArchiveDataEx(
self.filename, mode=constants.RAR_OM_EXTRACT)
handle = self._open(archive)
found, buf = False, []
def _callback(msg, UserData, P1, P2):
if msg == constants.UCM_PROCESSDATA:
data = (ctypes.c_char * P2).from_address(P1).raw
buf.append(data)
return 1
c_callback = unrarlib.UNRARCALLBACK(_callback)
unrarlib.RARSetCallback(handle, c_callback, 1)
try:
rarinfo = self._read_header(handle)
while rarinfo is not None:
#print "checking rar archive %s against %s" % (rarinfo.filename, member)
if rarinfo.filename == member:
self._process_current(handle, constants.RAR_TEST)
found = True
else:
self._process_current(handle, constants.RAR_SKIP)
rarinfo = self._read_header(handle)
except unrarlib.UnrarException:
raise rarfile.BadRarFile("Bad RAR archive data.")
finally:
self._close(handle)
if not found:
raise KeyError('There is no item named %r in the archive' % member)
return b''.join(buf)'''
# if platform.system() == "Windows":
# import _subprocess
import time
from io import StringIO
from io import BytesIO
try:
from PIL import Image
pil_available = True
except ImportError:
pil_available = False
sys.path.insert(0, os.path.abspath("."))
#import UnRAR2
#from UnRAR2.rar_exceptions import *
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
sys.path.insert(0, os.path.abspath("."))
#from settings import ComicTaggerSettings
from comicapi.comicinfoxml import ComicInfoXml
from comicapi.comicbookinfo import ComicBookInfo
from comicapi.comet import CoMet
@ -124,7 +76,6 @@ class ZipArchiver:
return self.writeZipComment(self.path, comment)
def readArchiveFile(self, archive_file):
data = ""
zf = zipfile.ZipFile(self.path, 'r')
try:
@ -132,14 +83,14 @@ class ZipArchiver:
except zipfile.BadZipfile as e:
errMsg = u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
zf.close()
raise IOError
except Exception as e:
zf.close()
errMsg = u"bad zipfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
raise IOError
finally:
zf.close()
@ -178,7 +129,7 @@ class ZipArchiver:
except Exception as e:
errMsg = u"Unable to get zipfile list [{0}]: {1}".format(
e, self.path)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return []
# zip helper func
@ -195,7 +146,7 @@ class ZipArchiver:
zout = zipfile.ZipFile(tmp_name, 'w')
for item in zin.infolist():
buffer = zin.read(item.filename)
if (item.filename not in exclude_list):
if item.filename not in exclude_list:
zout.writestr(item, buffer)
# preserve the old comment
@ -218,7 +169,6 @@ class ZipArchiver:
see: http://en.wikipedia.org/wiki/Zip_(file_format)#Structure
"""
#get file size
statinfo = os.stat(filename)
file_length = statinfo.st_size
@ -289,15 +239,193 @@ class ZipArchiver:
return False
except Exception as e:
errMsg = u"Error while copying to {0}: {1}".format(self.path, e)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return False
else:
return True
class TarArchiver:
def __init__(self, path):
self.path = path
def getArchiveComment(self):
tf = tarfile.TarFile(self.path, 'r')
comment = tf.comment
tf.close()
return comment
def setArchiveComment(self, comment):
return self.writeTarComment(self.path, comment)
def readArchiveFile(self, archive_file):
tf = tarfile.TarFile(self.path, 'r')
try:
data = tf.extractfile(archive_file).read()
except tarfile.TarError as e:
errMsg = u"bad tarfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
logger.info(errMsg)
tf.close()
raise IOError
except Exception as e:
tf.close()
errMsg = u"bad tarfile [{0}]: {1} :: {2}".format(
e, self.path, archive_file)
logger.info(errMsg)
raise IOError
finally:
tf.close()
return data
def removeArchiveFile(self, archive_file):
try:
self.rebuildTarFile([archive_file])
except:
return False
else:
return True
def writeArchiveFile(self, archive_file, data):
# At the moment, no other option but to rebuild the whole
# zip archive w/o the indicated file. Very sucky, but maybe
# another solution can be found
try:
self.rebuildTarFile([archive_file])
# now just add the archive file as a new one
tf = tarfile.Tarfile(
self.path, mode='a')
tf.writestr(archive_file, data)
tf.close()
return True
except:
return False
def getArchiveFilenameList(self):
try:
tf = tarfile.TarFile(self.path, 'r')
namelist = tf.getnames()
tf.close()
return namelist
except Exception as e:
errMsg = u"Unable to get tarfile list [{0}]: {1}".format(
e, self.path)
logger.info(errMsg)
return []
# zip helper func
def rebuildTarFile(self, exclude_list):
# this recompresses the zip archive, without the files in the exclude_list
# errMsg=u"Rebuilding zip {0} without {1}".format( self.path, exclude_list )
# generate temp file
tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path))
os.close(tmp_fd)
tin = tarfile.TarFile(self.path, 'r')
tout = tarfile.TarFile(tmp_name, 'w')
for item in tin.infolist():
buffer = tin.read(item.filename)
if (item.filename not in exclude_list):
tout.writestr(item, buffer)
# preserve the old comment
tout.comment = tin.comment
tout.close()
tin.close()
# replace with the new file
os.remove(self.path)
os.rename(tmp_name, self.path)
def writeTarComment(self, filename, comment):
"""
This is a custom function for writing a comment to a tar file,
since the built-in one doesn't seem to work on Windows and Mac OS/X
"""
statinfo = os.stat(filename)
file_length = statinfo.st_size
try:
fo = open(filename, "r+b")
# the starting position, relative to EOF
pos = -4
found = False
value = bytearray()
# walk backwards to find the "End of Central Directory" record
while (not found) and (-pos != file_length):
# seek, relative to EOF
fo.seek(pos, 2)
value = fo.read(4)
# look for the end of central directory signature
if bytearray(value) == bytearray([0x50, 0x4b, 0x05, 0x06]):
found = True
else:
# not found, step back another byte
pos = pos - 1
if found:
# now skip forward 20 bytes to the comment length word
pos += 20
fo.seek(pos, 2)
# Pack the length of the comment string
format = "H" # one 2-byte integer
comment_length = struct.pack(
format, len(comment)) # pack integer in a binary string
# write out the length
fo.write(comment_length)
fo.seek(pos + 2, 2)
# write out the comment itself
fo.write(comment)
fo.truncate()
fo.close()
else:
raise Exception('Failed to write comment to tar file!')
except:
return False
else:
return True
def copyFromArchive(self, otherArchive):
# Replace the current zip with one copied from another archive
try:
tout = tarfile.TarFile(self.path, 'w')
for fname in otherArchive.getArchiveFilenameList():
data = otherArchive.readArchiveFile(fname)
if data is not None:
tout.writestr(fname, data)
tout.close()
# preserve the old comment
comment = otherArchive.getArchiveComment()
if comment is not None:
if not self.writeTarComment(self.path, comment):
return False
except Exception as e:
errMsg = u"Error while copying to {0}: {1}".format(self.path, e)
logger.info(errMsg)
return False
else:
return True
# ------------------------------------------
# RAR implementation
if rarsupport:
class RarArchiver:
@ -363,8 +491,6 @@ if rarsupport:
# Make sure to escape brackets, since some funky stuff is going on
# underneath with "fnmatch"
#archive_file = archive_file.replace("[", '[[]')
entries = []
rarc = self.getRARObj()
@ -376,21 +502,21 @@ if rarsupport:
entries = [(rarc.getinfo(archive_file), data)]
if entries[0][0].file_size != len(entries[0][1]):
errMsg = u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format(
entries[0][0].file_size, len(entries[0][1]), self.path,
archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
errMsg = u"readArchiveFile(): " \
u"[file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]\n".format(
entries[0][0].file_size, len(entries[0][1]), self.path, archive_file, tries)
logger.info(errMsg)
continue
except (OSError, IOError) as e:
errMsg = u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
except Exception as e:
errMsg = u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(
str(e), self.path, archive_file, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
break
else:
@ -398,8 +524,8 @@ if rarsupport:
# entries is a list of of tuples: ( rarinfo, filedata)
if tries > 1:
errMsg = u"Attempted read_files() {0} times".format(tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
if (len(entries) == 1):
logger.info(errMsg)
if len(entries) == 1:
return entries[0][1]
else:
raise IOError
@ -479,7 +605,7 @@ if rarsupport:
except (OSError, IOError) as e:
errMsg = u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
else:
@ -493,14 +619,12 @@ if rarsupport:
while tries < 7:
try:
tries = tries + 1
#rarc = UnRAR2.RarFile( self.path )
rarc = rarfile.RarFile(self.path)
#rarc = OpenableRarFile(self.path)
except (OSError, IOError) as e:
errMsg = u"getRARObj(): [{0}] {1} attempt#{2}".format(
str(e), self.path, tries)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
time.sleep(1)
else:
@ -531,7 +655,7 @@ class FolderArchiver:
with open(fname, 'rb') as f:
data = f.read()
f.close()
except IOError as e:
except IOError:
pass
return data
@ -634,7 +758,7 @@ class ComicArchive:
logo_data = None
class ArchiveType:
Zip, Rar, Folder, Pdf, Unknown = range(5)
Zip, Rar, Tar, Folder, Pdf, Unknown = range(6)
def __init__(self, path, rar_exe_path=None, default_image_path=None):
self.path = path
@ -665,6 +789,10 @@ class ComicArchive:
self.archive_type = self.ArchiveType.Zip
self.archiver = ZipArchiver(self.path)
if self.tarTest():
self.archive_type = self.ArchiveType.Tar
self.archiver = TarArchiver(self.path)
elif self.rarTest():
self.archive_type = self.ArchiveType.Rar
self.archiver = RarArchiver(
@ -674,7 +802,6 @@ class ComicArchive:
self.archiver = PdfArchiver(self.path)
if ComicArchive.logo_data is None and self.default_image_path:
#fname = ComicTaggerSettings.getGraphic('nocover.png')
fname = self.default_image_path
with open(fname, 'rb') as fd:
ComicArchive.logo_data = fd.read()
@ -702,6 +829,9 @@ class ComicArchive:
def zipTest(self):
return zipfile.is_zipfile(self.path)
def tarTest(self):
return tarfile.is_tarfile(self.path)
def rarTest(self):
try:
rarc = rarfile.RarFile(self.path)
@ -713,6 +843,9 @@ class ComicArchive:
def isZip(self):
return self.archive_type == self.ArchiveType.Zip
def isTar(self):
return self.archive_type == self.ArchiveType.Tar
def isRar(self):
return self.archive_type == self.ArchiveType.Rar
@ -748,11 +881,8 @@ class ComicArchive:
def seemsToBeAComicArchive(self):
# Do we even care about extensions??
ext = os.path.splitext(self.path)[1].lower()
if ((self.isZip() or self.isRar() or self.isPdf()
) #or self.isFolder() )
if ((self.isZip() or self.isRar() or self.isPdf() or self.isTar()
)
and (self.getNumberOfPages() > 0)):
return True
else:
@ -812,7 +942,7 @@ class ComicArchive:
image_data = self.archiver.readArchiveFile(filename)
except IOError:
errMsg = u"Error reading in page. Substituting logo page."
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
image_data = ComicArchive.logo_data
return image_data
@ -927,9 +1057,9 @@ class ComicArchive:
return self.cbi_md
def readRawCBI(self):
if (not self.hasCBI()):
if not self.hasCBI():
return None
else:
return self.archiver.getArchiveComment()
def hasCBI(self):
@ -1063,14 +1193,14 @@ class ComicArchive:
def readRawCoMet(self):
if not self.hasCoMet():
errMsg = u"{} doesn't have CoMet data!".format(self.path)
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
return None
try:
raw_comet = self.archiver.readArchiveFile(self.comet_filename)
except IOError:
errMsg = u"Error reading in raw CoMet!"
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
raw_comet = ""
return raw_comet
@ -1124,7 +1254,7 @@ class ComicArchive:
except:
data = ""
errMsg = u"Error reading in Comet XML for validation!"
sys.stderr.buffer.write(bytes(errMsg, "UTF-8"))
logger.info(errMsg)
if CoMet().validateString(data):
# since we found it, save it!
self.comet_filename = n
@ -1182,11 +1312,3 @@ class ComicArchive:
metadata.isEmpty = False
return metadata
def exportAsZip(self, zipfilename):
if self.archive_type == self.ArchiveType.Zip:
# nothing to do, we're already a zip
return True
zip_archiver = ZipArchiver(zipfilename)
return zip_archiver.copyFromArchive(self.archiver)

7
comicapi/comicbookinfo.py Executable file → Normal file
View File

@ -18,13 +18,10 @@ limitations under the License.
import json
from datetime import datetime
import zipfile
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
#import ctversion
class ComicBookInfo:
def metadataFromString(self, string):
@ -94,7 +91,7 @@ class ComicBookInfo:
except:
return False
return ('ComicBookInfo/1.0' in cbi_container)
return 'ComicBookInfo/1.0' in cbi_container
def createJSONDictionary(self, metadata):
@ -147,4 +144,4 @@ class ComicBookInfo:
f = open(filename, 'w')
f.write(json.dumps(cbi_container, indent=4))
f.close
f.close()

4
comicapi/comicinfoxml.py Executable file → Normal file
View File

@ -16,9 +16,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from datetime import datetime
import zipfile
from pprint import pprint
import xml.etree.ElementTree as ET
from comicapi.genericmetadata import GenericMetadata
import comicapi.utils
@ -276,7 +273,6 @@ class ComicInfoXml:
def writeToExternalFile(self, filename, metadata):
tree = self.convertMetadataToXML(self, metadata)
#ET.dump(tree)
tree.write(filename, encoding='utf-8')
def readFromExternalFile(self, filename):

5
comicapi/filenameparser.py Executable file → Normal file
View File

@ -37,7 +37,7 @@ class FileNameParser:
placeholders = ['[_]', ' +']
for ph in placeholders:
string = re.sub(ph, self.repl, string)
return string #.strip()
return string
def getIssueCount(self, filename, issue_end):
@ -57,7 +57,6 @@ class FileNameParser:
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
if match:
count = match.group()
found = True
count = count.lstrip("0")
@ -94,8 +93,6 @@ class FileNameParser:
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
filename = re.sub("of [\d]+", self.repl, filename)
#print u"[{0}]".format(filename)
# we should now have a cleaned up filename version with all the words in
# the same positions as original filename

16
comicapi/genericmetadata.py Executable file → Normal file
View File

@ -38,18 +38,6 @@ class PageType:
Deleted = "Deleted"
"""
class PageInfo:
Image = 0
Type = PageType.Story
DoublePage = False
ImageSize = 0
Key = ""
ImageWidth = 0
ImageHeight = 0
"""
class GenericMetadata:
def __init__(self):
@ -174,8 +162,8 @@ class GenericMetadata:
def overlayCredits(self, new_credits):
for c in new_credits:
if 'primary' in c:
# if c.has_key('primary') and c['primary']:
if 'primary' in c:
primary = True
else:
primary = False
@ -295,8 +283,8 @@ class GenericMetadata:
for c in self.credits:
primary = ""
if 'primary' in c:
# if c.has_key('primary') and c['primary']:
if 'primary' in c:
primary = " [P]"
add_string("credit", c['role'] + ": " + c['person'] + primary)

6
comicapi/issuestring.py Executable file → Normal file
View File

@ -26,10 +26,6 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
import comicapi.utils
import math
import re
class IssueString:
def __init__(self, text):
@ -88,8 +84,6 @@ class IssueString:
else:
self.suffix = text
#print "num: {0} suf: {1}".format(self.num, self.suffix)
def asString(self, pad=0):
# return the float, left side zero-padded, with suffix attached
if self.num is None:

12
comicapi/utils.py Executable file → Normal file
View File

@ -28,12 +28,14 @@ import codecs
class UtilsVars:
already_fixed_encoding = False
def get_actual_preferred_encoding():
preferred_encoding = locale.getpreferredencoding()
if platform.system() == "Darwin":
preferred_encoding = "utf-8"
return preferred_encoding
def fix_output_encoding():
if not UtilsVars.already_fixed_encoding:
# this reads the environment and inits the right locale
@ -45,6 +47,7 @@ def fix_output_encoding( ):
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
UtilsVars.already_fixed_encoding = True
def get_recursive_filelist(pathlist):
"""
Get a recursive list of of all files under all path items in the list
@ -55,7 +58,7 @@ def get_recursive_filelist( pathlist ):
# if path is a folder, walk it recursivly, and all files underneath
if type(p) == str:
# make sure string is unicode
p = p.decode(filename_encoding) #, 'replace')
p = p.decode(filename_encoding)
elif type(p) != str:
# it's probably a QString
p = str(p)
@ -75,6 +78,7 @@ def get_recursive_filelist( pathlist ):
return filelist
def listToString(l):
string = ""
if l is not None:
@ -84,6 +88,7 @@ def listToString( l ):
string += item
return string
def addtopath(dirname):
if dirname is not None and dirname != "":
@ -95,6 +100,7 @@ def addtopath( dirname ):
if not match:
os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH']
# returns executable path, if it exists
def which(program):
@ -113,6 +119,7 @@ def which(program):
return None
def removearticles(text):
text = text.lower()
articles = ['and', 'the', 'a', '&', 'issue']
@ -131,7 +138,6 @@ def removearticles( text ):
# since the CV api changed, searches for series names with periods
# now explicity require the period to be in the search key,
# so the line below is removed (for now)
#newText = newText.replace(".", "")
return newText
@ -573,10 +579,10 @@ countries = [
]
def getLanguageDict():
return lang_dict
def getLanguageFromISO(iso):
if iso == None:
return None

View File

@ -1,8 +1,8 @@
from setuptools import setup
setup(
name = 'comicapi',
version = '2.1',
description = 'Comic archive (cbr/cbz) and metadata utilities. Extracted from the comictagger project.',
version = '2.1.1',
description = 'Comic archive (cbr/cbz/cbt) and metadata utilities. Extracted from the comictagger project.',
author = 'Iris W',
packages = ['comicapi'],
install_requires = ['natsort>=3.5.2'],