diff --git a/comicapi/__init__.py b/comicapi/__init__.py index 0d9bd7c..06d5141 100644 --- a/comicapi/__init__.py +++ b/comicapi/__init__.py @@ -1 +1 @@ -__author__ = 'dromanin' +__author__ = "dromanin" diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..04c22e1 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -15,175 +15,121 @@ # limitations under the License. import xml.etree.ElementTree as ET -#from datetime import datetime -#from pprint import pprint -#import zipfile -from .genericmetadata import GenericMetadata -from . import utils +from comicapi import utils +from comicapi.genericmetadata import GenericMetadata class CoMet: - writer_synonyms = ['writer', 'plotter', 'scripter'] - penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns'] - inker_synonyms = ['inker', 'artist', 'finishes'] - colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer'] - letterer_synonyms = ['letterer'] - cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist'] - editor_synonyms = ['editor'] + writer_synonyms = ["writer", "plotter", "scripter"] + penciller_synonyms = ["artist", "penciller", "penciler", "breakdowns"] + inker_synonyms = ["inker", "artist", "finishes"] + colorist_synonyms = ["colorist", "colourist", "colorer", "colourer"] + letterer_synonyms = ["letterer"] + cover_synonyms = ["cover", "covers", "coverartist", "cover artist"] + editor_synonyms = ["editor"] - def metadataFromString(self, string): + def metadata_from_string(self, string): tree = ET.ElementTree(ET.fromstring(string)) - return self.convertXMLToMetadata(tree) + return self.convert_xml_to_metadata(tree) - def stringFromMetadata(self, metadata): + def string_from_metadata(self, metadata): header = '\n' - tree = self.convertMetadataToXML(self, metadata) + tree = self.convert_metadata_to_xml(metadata) return header + ET.tostring(tree.getroot()) - def indent(self, elem, level=0): - # for making the XML output readable - i = "\n" + level * " " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - self.indent(elem, level + 1) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i - - def convertMetadataToXML(self, filename, metadata): + def convert_metadata_to_xml(self, metadata): # shorthand for the metadata md = metadata # build a tree structure root = ET.Element("comet") - root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/" - root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" - root.attrib[ - 'xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" + root.attrib["xmlns:comet"] = "http://www.denvog.com/comet/" + root.attrib["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance" + root.attrib["xsi:schemaLocation"] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" # helper func def assign(comet_entry, md_entry): if md_entry is not None: - ET.SubElement(root, comet_entry).text = "{0}".format(md_entry) + ET.SubElement(root, comet_entry).text = str(md_entry) # title is manditory if md.title is None: md.title = "" - assign('title', md.title) - assign('series', md.series) - assign('issue', md.issue) # must be int?? - assign('volume', md.volume) - assign('description', md.comments) - assign('publisher', md.publisher) - assign('pages', md.pageCount) - assign('format', md.format) - assign('language', md.language) - assign('rating', md.maturityRating) - assign('price', md.price) - assign('isVersionOf', md.isVersionOf) - assign('rights', md.rights) - assign('identifier', md.identifier) - assign('lastMark', md.lastMark) - assign('genre', md.genre) # TODO repeatable + assign("title", md.title) + assign("series", md.series) + assign("issue", md.issue) # must be int?? + assign("volume", md.volume) + assign("description", md.comments) + assign("publisher", md.publisher) + assign("pages", md.page_count) + assign("format", md.format) + assign("language", md.language) + assign("rating", md.maturity_rating) + assign("price", md.price) + assign("isVersionOf", md.is_version_of) + assign("rights", md.rights) + assign("identifier", md.identifier) + assign("lastMark", md.last_mark) + assign("genre", md.genre) # TODO repeatable if md.characters is not None: - char_list = [c.strip() for c in md.characters.split(',')] + char_list = [c.strip() for c in md.characters.split(",")] for c in char_list: - assign('character', c) + assign("character", c) if md.manga is not None and md.manga == "YesAndRightToLeft": - assign('readingDirection', "rtl") + assign("readingDirection", "rtl") - date_str = "" if md.year is not None: date_str = str(md.year).zfill(4) if md.month is not None: date_str += "-" + str(md.month).zfill(2) - assign('date', date_str) + assign("date", date_str) - assign('coverImage', md.coverImage) - - # need to specially process the credits, since they are structured - # differently than CIX - credit_writer_list = list() - credit_penciller_list = list() - credit_inker_list = list() - credit_colorist_list = list() - credit_letterer_list = list() - credit_cover_list = list() - credit_editor_list = list() + assign("coverImage", md.cover_image) # loop thru credits, and build a list for each role that CoMet supports for credit in metadata.credits: - if credit['role'].lower() in set(self.writer_synonyms): - ET.SubElement( - root, - 'writer').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.writer_synonyms): + ET.SubElement(root, "writer").text = str(credit["person"]) - if credit['role'].lower() in set(self.penciller_synonyms): - ET.SubElement( - root, - 'penciller').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.penciller_synonyms): + ET.SubElement(root, "penciller").text = str(credit["person"]) - if credit['role'].lower() in set(self.inker_synonyms): - ET.SubElement( - root, - 'inker').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.inker_synonyms): + ET.SubElement(root, "inker").text = str(credit["person"]) - if credit['role'].lower() in set(self.colorist_synonyms): - ET.SubElement( - root, - 'colorist').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.colorist_synonyms): + ET.SubElement(root, "colorist").text = str(credit["person"]) - if credit['role'].lower() in set(self.letterer_synonyms): - ET.SubElement( - root, - 'letterer').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.letterer_synonyms): + ET.SubElement(root, "letterer").text = str(credit["person"]) - if credit['role'].lower() in set(self.cover_synonyms): - ET.SubElement( - root, - 'coverDesigner').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.cover_synonyms): + ET.SubElement(root, "coverDesigner").text = str(credit["person"]) - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) - # self pretty-print - self.indent(root) + utils.indent(root) # wrap it in an ElementTree instance, and save as XML tree = ET.ElementTree(root) return tree - def convertXMLToMetadata(self, tree): + def convert_xml_to_metadata(self, tree): root = tree.getroot() - if root.tag != 'comet': - raise 1 - return None + if root.tag != "comet": + raise "1" metadata = GenericMetadata() md = metadata @@ -193,84 +139,85 @@ class CoMet: node = root.find(tag) if node is not None: return node.text - else: - return None + return None - md.series = xlate('series') - md.title = xlate('title') - md.issue = xlate('issue') - md.volume = xlate('volume') - md.comments = xlate('description') - md.publisher = xlate('publisher') - md.language = xlate('language') - md.format = xlate('format') - md.pageCount = xlate('pages') - md.maturityRating = xlate('rating') - md.price = xlate('price') - md.isVersionOf = xlate('isVersionOf') - md.rights = xlate('rights') - md.identifier = xlate('identifier') - md.lastMark = xlate('lastMark') - md.genre = xlate('genre') # TODO - repeatable field + md.series = xlate("series") + md.title = xlate("title") + md.issue = xlate("issue") + md.volume = xlate("volume") + md.comments = xlate("description") + md.publisher = xlate("publisher") + md.language = xlate("language") + md.format = xlate("format") + md.page_count = xlate("pages") + md.maturity_rating = xlate("rating") + md.price = xlate("price") + md.is_version_of = xlate("isVersionOf") + md.rights = xlate("rights") + md.identifier = xlate("identifier") + md.last_mark = xlate("lastMark") + md.genre = xlate("genre") # TODO - repeatable field - date = xlate('date') + date = xlate("date") if date is not None: - parts = date.split('-') + parts = date.split("-") if len(parts) > 0: md.year = parts[0] if len(parts) > 1: md.month = parts[1] - md.coverImage = xlate('coverImage') + md.cover_image = xlate("coverImage") - readingDirection = xlate('readingDirection') - if readingDirection is not None and readingDirection == "rtl": + reading_direction = xlate("readingDirection") + if reading_direction is not None and reading_direction == "rtl": md.manga = "YesAndRightToLeft" # loop for character tags char_list = [] for n in root: - if n.tag == 'character': + if n.tag == "character": char_list.append(n.text.strip()) - md.characters = utils.listToString(char_list) + md.characters = utils.list_to_string(char_list) # Now extract the credit info for n in root: - if (n.tag == 'writer' or - n.tag == 'penciller' or - n.tag == 'inker' or - n.tag == 'colorist' or - n.tag == 'letterer' or - n.tag == 'editor' - ): - metadata.addCredit(n.text.strip(), n.tag.title()) + if any( + [ + n.tag == "writer", + n.tag == "penciller", + n.tag == "inker", + n.tag == "colorist", + n.tag == "letterer", + n.tag == "editor", + ] + ): + metadata.add_credit(n.text.strip(), n.tag.title()) - if n.tag == 'coverDesigner': - metadata.addCredit(n.text.strip(), "Cover") + if n.tag == "coverDesigner": + metadata.add_credit(n.text.strip(), "Cover") - metadata.isEmpty = False + metadata.is_empty = False return metadata # verify that the string actually contains CoMet data in XML format - def validateString(self, string): + def validate_string(self, string): try: tree = ET.ElementTree(ET.fromstring(string)) root = tree.getroot() - if root.tag != 'comet': + if root.tag != "comet": raise Exception except: return False return True - def writeToExternalFile(self, filename, metadata): + def write_to_external_file(self, filename, metadata): - tree = self.convertMetadataToXML(self, metadata) - # ET.dump(tree) - tree.write(filename, encoding='utf-8') + tree = self.convert_metadata_to_xml(metadata) + tree.write(filename, encoding="utf-8") - def readFromExternalFile(self, filename): + def read_from_external_file(self, filename): tree = ET.parse(filename) - return self.convertXMLToMetadata(tree) + return self.convert_xml_to_metadata(tree) diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 91d5b77..02d84ab 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -14,42 +14,48 @@ # See the License for the specific language governing permissions and # limitations under the License. -import py7zr -import zipfile +import io +import logging import os +import platform import struct +import subprocess import sys import tempfile -import subprocess -import platform import time -import io +import zipfile import natsort +import py7zr + try: from unrar.cffi import rarfile except: pass try: - import Image + from PIL import Image + pil_available = True except ImportError: pil_available = False -from .comicinfoxml import ComicInfoXml -from .comicbookinfo import ComicBookInfo -from .comet import CoMet -from .genericmetadata import GenericMetadata, PageType -from .filenameparser import FileNameParser +from comicapi.comet import CoMet +from comicapi.comicbookinfo import ComicBookInfo +from comicapi.comicinfoxml import ComicInfoXml +from comicapi.filenameparser import FileNameParser +from comicapi.genericmetadata import GenericMetadata, PageType +logger = logging.getLogger(__name__) sys.path.insert(0, os.path.abspath(".")) + class MetaDataStyle: CBI = 0 CIX = 1 COMET = 2 - name = ['ComicBookLover', 'ComicRack', 'CoMet'] + name = ["ComicBookLover", "ComicRack", "CoMet"] + class SevenZipArchiver: @@ -68,15 +74,13 @@ class SevenZipArchiver: def readArchiveFile(self, archive_file): data = "" try: - with py7zr.SevenZipFile(self.path, 'r') as zf: + with py7zr.SevenZipFile(self.path, "r") as zf: data = zf.read(archive_file)[archive_file].read() except py7zr.Bad7zFile as e: - print("bad 7zip file [{0}]: {1} :: {2}".format(e, self.path, - archive_file), file=sys.stderr) + logger.waning("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) raise IOError except Exception as e: - print("bad 7zip file [{0}]: {1} :: {2}".format(e, self.path, - archive_file), file=sys.stderr) + logger.waning("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) raise IOError return data @@ -99,7 +103,7 @@ class SevenZipArchiver: self.rebuildSevenZipFile([archive_file]) # now just add the archive file as a new one - with py7zr.SevenZipFile(self.path, 'a') as zf: + with py7zr.SevenZipFile(self.path, "a") as zf: zf.writestr(data, archive_file) return True except: @@ -107,13 +111,12 @@ class SevenZipArchiver: def getArchiveFilenameList(self): try: - with py7zr.SevenZipFile(self.path, 'r') as zf: + with py7zr.SevenZipFile(self.path, "r") as zf: namelist = zf.getnames() return namelist except Exception as e: - print("Unable to get zipfile list [{0}]: {1}".format( - e, self.path), file=sys.stderr) + logger.warning("Unable to get 7zip file list [%s]: %s", e, self.path) return [] def rebuildSevenZipFile(self, exclude_list): @@ -125,14 +128,14 @@ class SevenZipArchiver: os.close(tmp_fd) try: - with py7zr.SevenZipFile(self.path, 'r') as zip: + with py7zr.SevenZipFile(self.path, "r") as zip: targets = [f for f in zip.getnames() if f not in exclude_list] - with py7zr.SevenZipFile(self.path, 'r') as zin: - with py7zr.SevenZipFile(tmp_name, 'w') as zout: + with py7zr.SevenZipFile(self.path, "r") as zin: + with py7zr.SevenZipFile(tmp_name, "w") as zout: for fname, bio in zin.read(targets).items(): zout.writef(bio, fname) except Exception as e: - print("Exception[{0}]: {1}".format(e, self.path)) + logger.warning("Exception[%s]: %s", e, self.path) return [] # replace with the new file @@ -142,18 +145,18 @@ class SevenZipArchiver: def copyFromArchive(self, otherArchive): """Replace the current zip with one copied from another archive""" try: - with py7zr.SevenZipFile(self.path, 'w') as zout: + with py7zr.SevenZipFile(self.path, "w") as zout: for fname in otherArchive.getArchiveFilenameList(): data = otherArchive.readArchiveFile(fname) if data is not None: zout.writestr(data, fname) except Exception as e: - print("Error while copying to {0}: {1}".format( - self.path, e), file=sys.stderr) + logger.warning("Error while copying to %s: %s", self.path, e) return False else: return True + class ZipArchiver: """ZIP implementation""" @@ -161,78 +164,64 @@ class ZipArchiver: def __init__(self, path): self.path = path - def getArchiveComment(self): - zf = zipfile.ZipFile(self.path, 'r') - comment = zf.comment - zf.close() + def get_comment(self): + with zipfile.ZipFile(self.path, "r") as zf: + comment = zf.comment return comment - def setArchiveComment(self, comment): - zf = zipfile.ZipFile(self.path, 'a') - zf.comment = bytes(comment, 'utf-8') - zf.close() + def set_comment(self, comment): + with zipfile.ZipFile(self.path, "a") as zf: + zf.comment = bytes(comment, "utf-8") return True - def readArchiveFile(self, archive_file): - data = "" - zf = zipfile.ZipFile(self.path, 'r') + def read_file(self, archive_file): + with zipfile.ZipFile(self.path, "r") as zf: - try: - data = zf.read(archive_file) - except zipfile.BadZipfile as e: - print("bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file), file=sys.stderr) - zf.close() - raise IOError - except Exception as e: - zf.close() - print("bad zipfile [{0}]: {1} :: {2}".format( - e, self.path, archive_file), file=sys.stderr) - raise IOError - finally: - zf.close() + try: + data = zf.read(archive_file) + except zipfile.BadZipfile as e: + logger.warning("bad zipfile [%s]: %s :: %s", e, self.path, archive_file) + raise IOError from e + except Exception as e: + logger.warning("bad zipfile [%s]: %s :: %s", e, self.path, archive_file) + raise IOError from e return data - def removeArchiveFile(self, archive_file): + def remove_file(self, archive_file): try: - self.rebuildZipFile([archive_file]) + self.rebuild_zip_file([archive_file]) except: return False else: return True - def writeArchiveFile(self, archive_file, data): + def write_file(self, archive_file, data): # At the moment, no other option but to rebuild the whole # zip archive w/o the indicated file. Very sucky, but maybe # another solution can be found try: - files = self.getArchiveFilenameList() + files = self.get_filename_list() if archive_file in files: - self.rebuildZipFile([archive_file]) + self.rebuild_zip_file([archive_file]) # now just add the archive file as a new one - zf = zipfile.ZipFile( - self.path, - mode='a', - allowZip64=True, - compression=zipfile.ZIP_DEFLATED) - zf.writestr(archive_file, data) - zf.close() + with zipfile.ZipFile(self.path, mode="a", allowZip64=True, compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr(archive_file, data) return True - except: + except Exception as e: + logger.warning("writing zip file failed [%s]: %s", e, self.path) return False - def getArchiveFilenameList(self): + def get_filename_list(self): try: - zf = zipfile.ZipFile(self.path, 'r') - namelist = zf.namelist() - zf.close() + with zipfile.ZipFile(self.path, "r") as zf: + namelist = zf.namelist() return namelist except Exception as e: - print("Unable to get zipfile list [{0}]: {1}".format( - e, self.path), file=sys.stderr) + logger.warning("Unable to get zipfile list [%s]: %s", e, self.path) return [] - def rebuildZipFile(self, exclude_list): + def rebuild_zip_file(self, exclude_list): """Zip helper func This recompresses the zip archive, without the files in the exclude_list @@ -240,24 +229,21 @@ class ZipArchiver: tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path)) os.close(tmp_fd) - zin = zipfile.ZipFile(self.path, 'r') - zout = zipfile.ZipFile(tmp_name, 'w', allowZip64=True) - for item in zin.infolist(): - buffer = zin.read(item.filename) - if (item.filename not in exclude_list): - zout.writestr(item, buffer) + with zipfile.ZipFile(self.path, "r") as zin: + with zipfile.ZipFile(tmp_name, "w", allowZip64=True) as zout: + for item in zin.infolist(): + buffer = zin.read(item.filename) + if item.filename not in exclude_list: + zout.writestr(item, buffer) - # preserve the old comment - zout.comment = zin.comment - - zout.close() - zin.close() + # preserve the old comment + zout.comment = zin.comment # replace with the new file os.remove(self.path) os.rename(tmp_name, self.path) - def writeZipComment(self, filename, comment): + def write_zip_comment(self, filename, comment): """ This is a custom function for writing a comment to a zip file, since the built-in one doesn't seem to work on Windows and Mac OS/X @@ -272,80 +258,75 @@ class ZipArchiver: file_length = statinfo.st_size try: - fo = open(filename, "r+b") + with open(filename, "r+b") as fo: - # the starting position, relative to EOF - pos = -4 + # the starting position, relative to EOF + pos = -4 - found = False - value = bytearray() + found = False - # walk backwards to find the "End of Central Directory" record - while (not found) and (-pos != file_length): - # seek, relative to EOF - fo.seek(pos, 2) + # walk backwards to find the "End of Central Directory" record + while (not found) and (-pos != file_length): + # seek, relative to EOF + fo.seek(pos, 2) - value = fo.read(4) + value = fo.read(4) - # look for the end of central directory signature - if bytearray(value) == bytearray([0x50, 0x4b, 0x05, 0x06]): - found = True + # look for the end of central directory signature + if bytearray(value) == bytearray([0x50, 0x4B, 0x05, 0x06]): + found = True + else: + # not found, step back another byte + pos = pos - 1 + + if found: + + # now skip forward 20 bytes to the comment length word + pos += 20 + fo.seek(pos, 2) + + # Pack the length of the comment string + fmt = "H" # one 2-byte integer + comment_length = struct.pack(fmt, len(comment)) # pack integer in a binary string + + # write out the length + fo.write(comment_length) + fo.seek(pos + 2, 2) + + # write out the comment itself + fo.write(bytes(comment)) + fo.truncate() else: - # not found, step back another byte - pos = pos - 1 - # print pos,"{1} int: {0:x}".format(bytearray(value)[0], value) - - if found: - - # now skip forward 20 bytes to the comment length word - pos += 20 - fo.seek(pos, 2) - - # Pack the length of the comment string - format = "H" # one 2-byte integer - comment_length = struct.pack( - format, - len(comment)) # pack integer in a binary string - - # write out the length - fo.write(comment_length) - fo.seek(pos + 2, 2) - - # write out the comment itself - fo.write(bytes(comment)) - fo.truncate() - fo.close() - else: - raise Exception('Failed to write comment to zip file!') - except Exception as e: + raise Exception("Failed to write comment to zip file!") + except Exception: return False else: return True - def copyFromArchive(self, otherArchive): + def copy_from_archive(self, other_archive): """Replace the current zip with one copied from another archive""" try: - zout = zipfile.ZipFile(self.path, 'w', allowZip64=True) - for fname in otherArchive.getArchiveFilenameList(): - data = otherArchive.readArchiveFile(fname) - if data is not None: - zout.writestr(fname, data) - zout.close() + with zipfile.ZipFile(self.path, "w", allowZip64=True) as zout: + for fname in other_archive.get_filename_list(): + data = other_archive.read_file(fname) + if data is not None: + zout.writestr(fname, data) # preserve the old comment - comment = otherArchive.getArchiveComment() + comment = other_archive.get_comment() if comment is not None: - if not self.writeZipComment(self.path, comment): + if not self.write_zip_comment(self.path, comment): return False except Exception as e: - print("Error while copying to {0}: {1}".format( - self.path, e), file=sys.stderr) + logger.warning("Error while copying to %s: %s", self.path, e) return False else: return True + class RarArchiver: """RAR implementation""" + devnull = None def __init__(self, path, rar_exe_path): @@ -362,49 +343,45 @@ class RarArchiver: else: self.startupinfo = None - def getArchiveComment(self): - rarc = self.getRARObj() + def get_comment(self): + rarc = self.get_rar_obj() return rarc.comment - def setArchiveComment(self, comment): + def set_comment(self, comment): if self.rar_exe_path is not None: try: # write comment to temp file tmp_fd, tmp_name = tempfile.mkstemp() - f = os.fdopen(tmp_fd, 'w+') + f = os.fdopen(tmp_fd, "w+") f.write(comment) f.close() working_dir = os.path.dirname(os.path.abspath(self.path)) # use external program to write comment to Rar archive - proc_args = [self.rar_exe_path, - 'c', - '-w' + working_dir, - '-c-', - '-z' + tmp_name, - self.path] - subprocess.call(proc_args, - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull, - stdin=RarArchiver.devnull, - stderr=RarArchiver.devnull) + proc_args = [self.rar_exe_path, "c", "-w" + working_dir, "-c-", "-z" + tmp_name, self.path] + subprocess.call( + proc_args, + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull, + stdin=RarArchiver.devnull, + stderr=RarArchiver.devnull, + ) if platform.system() == "Darwin": time.sleep(1) os.remove(tmp_name) except Exception as e: - print(e) + logger.warning(e) return False else: return True else: return False - def readArchiveFile(self, archive_file): - entries = [] + def read_file(self, archive_file): - rarc = self.getRARObj() + rarc = self.get_rar_obj() tries = 0 while tries < 7: @@ -414,33 +391,37 @@ class RarArchiver: entries = [(rarc.getinfo(archive_file), data)] if entries[0][0].file_size != len(entries[0][1]): - print("readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format( - entries[0][0].file_size, len( - entries[0][1]), self.path, archive_file, tries), file=sys.stderr) + logger.info( + "read_file(): [file is not expected size: %d vs %d] %s:%s [attempt # %d]", + entries[0][0].file_size, + len(entries[0][1]), + self.path, + archive_file, + tries, + ) continue except (OSError, IOError) as e: - print("readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format( - str(e), self.path, archive_file, tries), file=sys.stderr) + logger.warning("read_file(): [%s] %s:%s attempt #%d", e, self.path, archive_file, tries) time.sleep(1) except Exception as e: - print("Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format( - str(e), self.path, archive_file, tries), file=sys.stderr) + logger.warning( + "Unexpected exception in read_file(): [%s] for %s:%s attempt #%d", e, self.path, archive_file, tries + ) break else: # Success" # entries is a list of of tuples: ( rarinfo, filedata) if tries > 1: - print("Attempted read_files() {0} times".format( - tries), file=sys.stderr) - if (len(entries) == 1): + logger.info("Attempted read_files() {%d} times", tries) + if len(entries) == 1: return entries[0][1] - else: - raise IOError + + raise IOError raise IOError - def writeArchiveFile(self, archive_file, data): + def write_file(self, archive_file, data): if self.rar_exe_path is not None: try: @@ -452,22 +433,17 @@ class RarArchiver: # TODO: will this break if 'archive_file' is in a subfolder. i.e. "foo/bar.txt" # will need to create the subfolder above, I guess... - f = open(tmp_file, 'w') - f.write(data) - f.close() + with open(tmp_file, "w") as f: + f.write(data) # use external program to write file to Rar archive - subprocess.call([self.rar_exe_path, - 'a', - '-w' + working_dir, - '-c-', - '-ep', - self.path, - tmp_file], - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull, - stdin=RarArchiver.devnull, - stderr=RarArchiver.devnull) + subprocess.call( + [self.rar_exe_path, "a", "-w" + working_dir, "-c-", "-ep", self.path, tmp_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull, + stdin=RarArchiver.devnull, + stderr=RarArchiver.devnull, + ) if platform.system() == "Darwin": time.sleep(1) @@ -480,19 +456,17 @@ class RarArchiver: else: return False - def removeArchiveFile(self, archive_file): + def remove_file(self, archive_file): if self.rar_exe_path is not None: try: # use external program to remove file from Rar archive - subprocess.call([self.rar_exe_path, - 'd', - '-c-', - self.path, - archive_file], - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull, - stdin=RarArchiver.devnull, - stderr=RarArchiver.devnull) + subprocess.call( + [self.rar_exe_path, "d", "-c-", self.path, archive_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull, + stdin=RarArchiver.devnull, + stderr=RarArchiver.devnull, + ) if platform.system() == "Darwin": time.sleep(1) @@ -503,45 +477,41 @@ class RarArchiver: else: return False - def getArchiveFilenameList(self): - rarc = self.getRARObj() + def get_filename_list(self): + rarc = self.get_rar_obj() tries = 0 - while tries < 7: - try: - tries = tries + 1 - namelist = [] - for item in rarc.infolist(): - if item.file_size != 0: - namelist.append(item.filename) + # while tries < 7: + try: + tries = tries + 1 + namelist = [] + for item in rarc.infolist(): + if item.file_size != 0: + namelist.append(item.filename) - except (OSError, IOError) as e: - print("getArchiveFilenameList(): [{0}] {1} attempt#{2}".format( - str(e), self.path, tries), file=sys.stderr) - time.sleep(1) + except (OSError, IOError) as e: + logger.warning(f"get_filename_list(): [{e}] {self.path} attempt #{tries}".format(str(e), self.path, tries)) + time.sleep(1) - else: - # Success" - return namelist + else: + # Success + return namelist - raise e + return None - def getRARObj(self): + def get_rar_obj(self): tries = 0 - while tries < 7: - try: - tries = tries + 1 - rarc = rarfile.RarFile(self.path) + try: + tries = tries + 1 + rarc = rarfile.RarFile(self.path) - except (OSError, IOError) as e: - print("getRARObj(): [{0}] {1} attempt#{2}".format( - str(e), self.path, tries), file=sys.stderr) - time.sleep(1) + except (OSError, IOError) as e: + logger.warning("getRARObj(): [%s] %s attempt #%s", e, self.path, tries) + time.sleep(1) - else: - # Success" - return rarc + else: + return rarc - raise e + return None class FolderArchiver: @@ -552,30 +522,30 @@ class FolderArchiver: self.path = path self.comment_file_name = "ComicTaggerFolderComment.txt" - def getArchiveComment(self): - return self.readArchiveFile(self.comment_file_name) + def get_comment(self): + return self.read_file(self.comment_file_name) - def setArchiveComment(self, comment): - return self.writeArchiveFile(self.comment_file_name, comment) + def set_comment(self, comment): + return self.write_file(self.comment_file_name, comment) - def readArchiveFile(self, archive_file): + def read_file(self, archive_file): data = "" fname = os.path.join(self.path, archive_file) try: - with open(fname, 'rb') as f: + with open(fname, "rb") as f: data = f.read() f.close() - except IOError as e: + except IOError: pass return data - def writeArchiveFile(self, archive_file, data): + def write_file(self, archive_file, data): fname = os.path.join(self.path, archive_file) try: - with open(fname, 'w+') as f: + with open(fname, "w+") as f: f.write(data) f.close() except: @@ -583,7 +553,7 @@ class FolderArchiver: else: return True - def removeArchiveFile(self, archive_file): + def remove_file(self, archive_file): fname = os.path.join(self.path, archive_file) try: @@ -593,21 +563,22 @@ class FolderArchiver: else: return True - def getArchiveFilenameList(self): - return self.listFiles(self.path) + def get_filename_list(self): + return self.list_files(self.path) - def listFiles(self, folder): + def list_files(self, folder): - itemlist = list() + itemlist = [] for item in os.listdir(folder): itemlist.append(item) if os.path.isdir(item): - itemlist.extend(self.listFiles(os.path.join(folder, item))) + itemlist.extend(self.list_files(os.path.join(folder, item))) return itemlist +# noinspection PyUnusedLocal class UnknownArchiver: """Unknown implementation""" @@ -615,36 +586,47 @@ class UnknownArchiver: def __init__(self, path): self.path = path - def getArchiveComment(self): + def get_comment(self): return "" - def setArchiveComment(self, comment): + def set_comment(self, comment): return False - def readArchiveFile(self): + def read_file(self, archive_file): return "" - def writeArchiveFile(self, archive_file, data): + def write_file(self, archive_file, data): return False - def removeArchiveFile(self, archive_file): + def remove_file(self, archive_file): return False - def getArchiveFilenameList(self): + def get_filename_list(self): return [] + class ComicArchive: logo_data = None + class ArchiveType: SevenZip, Zip, Rar, Folder, Pdf, Unknown = list(range(6)) def __init__(self, path, rar_exe_path=None, default_image_path=None): + self.cbi_md = None + self.cix_md = None + self.comet_filename = None + self.comet_md = None + self.has__cbi = None + self.has__cix = None + self.has__comet = None self.path = path + self.page_count = None + self.page_list = None self.rar_exe_path = rar_exe_path - self.ci_xml_filename = 'ComicInfo.xml' - self.comet_default_filename = 'CoMet.xml' - self.resetCache() + self.ci_xml_filename = "ComicInfo.xml" + self.comet_default_filename = "CoMet.xml" + self.reset_cache() self.default_image_path = default_image_path # Use file extension to decide which archive test we do first @@ -653,43 +635,38 @@ class ComicArchive: self.archive_type = self.ArchiveType.Unknown self.archiver = UnknownArchiver(self.path) - if ext == ".cbr" or ext == ".rar": - if self.rarTest(): + if ext in [".cbr", ".rar"]: + if self.rar_test(): self.archive_type = self.ArchiveType.Rar - self.archiver = RarArchiver( - self.path, - rar_exe_path=self.rar_exe_path) + self.archiver = RarArchiver(self.path, rar_exe_path=self.rar_exe_path) - elif self.zipTest(): + elif self.zip_test(): self.archive_type = self.ArchiveType.Zip self.archiver = ZipArchiver(self.path) else: - if self.sevenZipTest(): + if self.sevenzip_test(): self.archive_type = self.ArchiveType.SevenZip self.archiver = SevenZipArchiver(self.path) - elif self.zipTest(): + elif self.zip_test(): self.archive_type = self.ArchiveType.Zip self.archiver = ZipArchiver(self.path) - elif self.rarTest(): + elif self.rar_test(): self.archive_type = self.ArchiveType.Rar - self.archiver = RarArchiver( - self.path, - rar_exe_path=self.rar_exe_path) + self.archiver = RarArchiver(self.path, rar_exe_path=self.rar_exe_path) if ComicArchive.logo_data is None: - #fname = ComicTaggerSettings.getGraphic('nocover.png') fname = self.default_image_path - with open(fname, 'rb') as fd: + with open(fname, "rb") as fd: ComicArchive.logo_data = fd.read() - def resetCache(self): + def reset_cache(self): """Clears the cached data""" - self.has_cix = None - self.has_cbi = None - self.has_comet = None + self.has__cix = None + self.has__cbi = None + self.has__comet = None self.comet_filename = None self.page_count = None self.page_list = None @@ -697,139 +674,129 @@ class ComicArchive: self.cbi_md = None self.comet_md = None - def loadCache(self, style_list): + def load_cache(self, style_list): for style in style_list: - self.readMetadata(style) + self.read_metadata(style) def rename(self, path): self.path = path self.archiver.path = path - def sevenZipTest(self): + def sevenzip_test(self): return py7zr.is_7zfile(self.path) - def zipTest(self): + def zip_test(self): return zipfile.is_zipfile(self.path) - def rarTest(self): + def rar_test(self): try: return rarfile.is_rarfile(self.path) except: return False - def isSevenZip(self): + def is_sevenzip(self): return self.archive_type == self.ArchiveType.SevenZip - def isZip(self): + def is_zip(self): return self.archive_type == self.ArchiveType.Zip - def isRar(self): + def is_rar(self): return self.archive_type == self.ArchiveType.Rar - def isPdf(self): + def is_pdf(self): return self.archive_type == self.ArchiveType.Pdf - def isFolder(self): + def is_folder(self): return self.archive_type == self.ArchiveType.Folder - def isWritable(self, check_rar_status=True): + def is_writable(self, check_rar_status=True): if self.archive_type == self.ArchiveType.Unknown: return False - elif check_rar_status and self.isRar() and not self.rar_exe_path: + if check_rar_status and self.is_rar() and not self.rar_exe_path: return False - elif not os.access(self.path, os.W_OK): + if not os.access(self.path, os.W_OK): return False - elif ((self.archive_type != self.ArchiveType.Folder) and - (not os.access(os.path.dirname(os.path.abspath(self.path)), os.W_OK))): + if (self.archive_type != self.ArchiveType.Folder) and ( + not os.access(os.path.dirname(os.path.abspath(self.path)), os.W_OK) + ): return False return True - def isWritableForStyle(self, data_style): + def is_writable_for_style(self, data_style): - if self.isRar() and data_style == MetaDataStyle.CBI: + if (self.is_rar() or self.is_sevenzip()) and data_style == MetaDataStyle.CBI: return False - return self.isWritable() + return self.is_writable() - def seemsToBeAComicArchive(self): - # Do we even care about extensions?? - ext = os.path.splitext(self.path)[1].lower() - - if ( - # or self.isFolder() ) - (self.isSevenZip() or self.isZip() or self.isRar()) - and - (self.getNumberOfPages() > 0) - - ): + def seems_to_be_a_comic_archive(self): + if (self.is_zip() or self.is_rar() or self.is_sevenzip()) and (self.get_number_of_pages() > 0): return True - else: - return False - def readMetadata(self, style): + return False + + def read_metadata(self, style): if style == MetaDataStyle.CIX: - return self.readCIX() - elif style == MetaDataStyle.CBI: - return self.readCBI() - elif style == MetaDataStyle.COMET: - return self.readCoMet() - else: - return GenericMetadata() + return self.read_cix() + if style == MetaDataStyle.CBI: + return self.read_cbi() + if style == MetaDataStyle.COMET: + return self.read_comet() + return GenericMetadata() - def writeMetadata(self, metadata, style): + def write_metadata(self, metadata, style): retcode = None if style == MetaDataStyle.CIX: - retcode = self.writeCIX(metadata) - elif style == MetaDataStyle.CBI: - retcode = self.writeCBI(metadata) - elif style == MetaDataStyle.COMET: - retcode = self.writeCoMet(metadata) + retcode = self.write_cix(metadata) + if style == MetaDataStyle.CBI: + retcode = self.write_cbi(metadata) + if style == MetaDataStyle.COMET: + retcode = self.write_comet(metadata) return retcode - def hasMetadata(self, style): + def has_metadata(self, style): if style == MetaDataStyle.CIX: - return self.hasCIX() - elif style == MetaDataStyle.CBI: - return self.hasCBI() - elif style == MetaDataStyle.COMET: - return self.hasCoMet() - else: - return False + return self.has_cix() + if style == MetaDataStyle.CBI: + return self.has_cbi() + if style == MetaDataStyle.COMET: + return self.has_comet() + return False - def removeMetadata(self, style): + def remove_metadata(self, style): retcode = True if style == MetaDataStyle.CIX: - retcode = self.removeCIX() + retcode = self.remove_cix() elif style == MetaDataStyle.CBI: - retcode = self.removeCBI() + retcode = self.remove_cbi() elif style == MetaDataStyle.COMET: - retcode = self.removeCoMet() + retcode = self.remove_co_met() return retcode - def getPage(self, index): + def get_page(self, index): image_data = None - filename = self.getPageName(index) + filename = self.get_page_name(index) if filename is not None: try: - image_data = self.archiver.readArchiveFile(filename) + image_data = self.archiver.read_file(filename) except IOError: - print("Error reading in page. Substituting logo page.", file=sys.stderr) + logger.warning("Error reading in page. Substituting logo page.") image_data = ComicArchive.logo_data return image_data - def getPageName(self, index): + def get_page_name(self, index): if index is None: return None - page_list = self.getPageNameList() + page_list = self.get_page_name_list() num_pages = len(page_list) if num_pages == 0 or index >= num_pages: @@ -837,19 +804,19 @@ class ComicArchive: return page_list[index] - def getScannerPageIndex(self): + def get_scanner_page_index(self): scanner_page_index = None # make a guess at the scanner page - name_list = self.getPageNameList() - count = self.getNumberOfPages() + name_list = self.get_page_name_list() + count = self.get_number_of_pages() # too few pages to really know if count < 5: return None # count the length of every filename, and count occurences - length_buckets = dict() + length_buckets = {} for name in name_list: fname = os.path.split(name)[1] length = len(fname) @@ -859,12 +826,7 @@ class ComicArchive: length_buckets[length] = 1 # sort by most common - sorted_buckets = sorted( - iter(length_buckets.items()), - key=lambda k_v: ( - k_v[1], - k_v[0]), - reverse=True) + sorted_buckets = sorted(iter(length_buckets.items()), key=lambda k_v: (k_v[1], k_v[0]), reverse=True) # statistical mode occurence is first mode_length = sorted_buckets[0][0] @@ -872,7 +834,7 @@ class ComicArchive: # we are only going to consider the final image file: final_name = os.path.split(name_list[count - 1])[1] - common_length_list = list() + common_length_list = [] for name in name_list: if len(os.path.split(name)[1]) == mode_length: common_length_list.append(os.path.split(name)[1]) @@ -884,294 +846,284 @@ class ComicArchive: if len(final_name) > mode_length: scanner_page_index = count - 1 - # see if the last page doesn't start with the same prefix as most - # others + # see if the last page doesn't start with the same prefix as most others elif not final_name.startswith(prefix): scanner_page_index = count - 1 return scanner_page_index - def getPageNameList(self, sort_list=True): + def get_page_name_list(self, sort_list=True): if self.page_list is None: # get the list file names in the archive, and sort - files = self.archiver.getArchiveFilenameList() + files = self.archiver.get_filename_list() - # seems like some archive creators are on Windows, and don't know - # about case-sensitivity! + # seems like some archive creators are on Windows, and don't know about case-sensitivity! if sort_list: - def keyfunc(k): - return k.lower() files = natsort.natsorted(files, alg=natsort.ns.IC | natsort.ns.I | natsort.ns.U) # make a sub-list of image files self.page_list = [] for name in files: - if (name[-4:].lower() in [".jpg", - "jpeg", - ".png", - ".gif", - "webp"] and os.path.basename(name)[0] != "."): + if ( + os.path.splitext(name)[1].lower() in [".jpg", "jpeg", ".png", ".gif", ".webp"] + and os.path.basename(name)[0] != "." + ): self.page_list.append(name) return self.page_list - def getNumberOfPages(self): + def get_number_of_pages(self): if self.page_count is None: - self.page_count = len(self.getPageNameList()) + self.page_count = len(self.get_page_name_list()) return self.page_count - def readCBI(self): + def read_cbi(self): if self.cbi_md is None: - raw_cbi = self.readRawCBI() + raw_cbi = self.read_raw_cbi() if raw_cbi is None: self.cbi_md = GenericMetadata() else: - self.cbi_md = ComicBookInfo().metadataFromString(raw_cbi) + self.cbi_md = ComicBookInfo().metadata_from_string(raw_cbi) - self.cbi_md.setDefaultPageList(self.getNumberOfPages()) + self.cbi_md.set_default_page_list(self.get_number_of_pages()) return self.cbi_md - def readRawCBI(self): - if (not self.hasCBI()): + def read_raw_cbi(self): + if not self.has_cbi(): return None - return self.archiver.getArchiveComment() + return self.archiver.get_comment() - def hasCBI(self): - if self.has_cbi is None: - - # if ( not (self.isSevenZip() or self.isZip() or self.isRar()) or not - # self.seemsToBeAComicArchive() ): - if not self.seemsToBeAComicArchive(): - self.has_cbi = False + def has_cbi(self): + if self.has__cbi is None: + if not self.seems_to_be_a_comic_archive(): + self.has__cbi = False else: - comment = self.archiver.getArchiveComment() - self.has_cbi = ComicBookInfo().validateString(comment) + comment = self.archiver.get_comment() + self.has__cbi = ComicBookInfo().validate_string(comment) - return self.has_cbi + return self.has__cbi - def writeCBI(self, metadata): + def write_cbi(self, metadata): if metadata is not None: - self.applyArchiveInfoToMetadata(metadata) - cbi_string = ComicBookInfo().stringFromMetadata(metadata) - write_success = self.archiver.setArchiveComment(cbi_string) + self.apply_archive_info_to_metadata(metadata) + cbi_string = ComicBookInfo().string_from_metadata(metadata) + write_success = self.archiver.set_comment(cbi_string) if write_success: - self.has_cbi = True + self.has__cbi = True self.cbi_md = metadata - self.resetCache() + self.reset_cache() return write_success - else: - return False - def removeCBI(self): - if self.hasCBI(): - write_success = self.archiver.setArchiveComment("") + return False + + def remove_cbi(self): + if self.has_cbi(): + write_success = self.archiver.set_comment("") if write_success: - self.has_cbi = False + self.has__cbi = False self.cbi_md = None - self.resetCache() + self.reset_cache() return write_success return True - def readCIX(self): + def read_cix(self): if self.cix_md is None: - raw_cix = self.readRawCIX() + raw_cix = self.read_raw_cix() if raw_cix is None or raw_cix == "": self.cix_md = GenericMetadata() else: - self.cix_md = ComicInfoXml().metadataFromString(raw_cix) + self.cix_md = ComicInfoXml().metadata_from_string(raw_cix) # validate the existing page list (make sure count is correct) if len(self.cix_md.pages) != 0: - if len(self.cix_md.pages) != self.getNumberOfPages(): + if len(self.cix_md.pages) != self.get_number_of_pages(): # pages array doesn't match the actual number of images we're seeing # in the archive, so discard the data self.cix_md.pages = [] if len(self.cix_md.pages) == 0: - self.cix_md.setDefaultPageList(self.getNumberOfPages()) + self.cix_md.set_default_page_list(self.get_number_of_pages()) return self.cix_md - def readRawCIX(self): - if not self.hasCIX(): + def read_raw_cix(self): + if not self.has_cix(): return None try: - raw_cix = self.archiver.readArchiveFile(self.ci_xml_filename) - except IOError: - print("Error reading in raw CIX!") + raw_cix = self.archiver.read_file(self.ci_xml_filename) + except IOError as e: + logger.warning("Error reading in raw CIX!: %s", e) raw_cix = "" return raw_cix - def writeCIX(self, metadata): + def write_cix(self, metadata): if metadata is not None: - self.applyArchiveInfoToMetadata(metadata, calc_page_sizes=True) - rawCIX = self.readRawCIX() - if rawCIX == "": - rawCIX = None - cix_string = ComicInfoXml().stringFromMetadata(metadata, xml=rawCIX) - write_success = self.archiver.writeArchiveFile( - self.ci_xml_filename, - cix_string) + self.apply_archive_info_to_metadata(metadata, calc_page_sizes=True) + raw_cix = self.read_raw_cix() + if raw_cix == "": + raw_cix = None + cix_string = ComicInfoXml().string_from_metadata(metadata, xml=raw_cix) + write_success = self.archiver.write_file(self.ci_xml_filename, cix_string) if write_success: - self.has_cix = True + self.has__cix = True self.cix_md = metadata - self.resetCache() + self.reset_cache() return write_success - else: - return False - def removeCIX(self): - if self.hasCIX(): - write_success = self.archiver.removeArchiveFile( - self.ci_xml_filename) + return False + + def remove_cix(self): + if self.has_cix(): + write_success = self.archiver.remove_file(self.ci_xml_filename) if write_success: - self.has_cix = False + self.has__cix = False self.cix_md = None - self.resetCache() + self.reset_cache() return write_success return True - def hasCIX(self): - if self.has_cix is None: + def has_cix(self): + if self.has__cix is None: - if not self.seemsToBeAComicArchive(): - self.has_cix = False - elif self.ci_xml_filename in self.archiver.getArchiveFilenameList(): - self.has_cix = True + if not self.seems_to_be_a_comic_archive(): + self.has__cix = False + elif self.ci_xml_filename in self.archiver.get_filename_list(): + self.has__cix = True else: - self.has_cix = False - return self.has_cix + self.has__cix = False + return self.has__cix - def readCoMet(self): + def read_comet(self): if self.comet_md is None: - raw_comet = self.readRawCoMet() + raw_comet = self.read_raw_comet() if raw_comet is None or raw_comet == "": self.comet_md = GenericMetadata() else: - self.comet_md = CoMet().metadataFromString(raw_comet) + self.comet_md = CoMet().metadata_from_string(raw_comet) - self.comet_md.setDefaultPageList(self.getNumberOfPages()) + self.comet_md.set_default_page_list(self.get_number_of_pages()) # use the coverImage value from the comet_data to mark the cover in this struct # walk through list of images in file, and find the matching one for md.coverImage # need to remove the existing one in the default - if self.comet_md.coverImage is not None: + if self.comet_md.cover_image is not None: cover_idx = 0 - for idx, f in enumerate(self.getPageNameList()): - if self.comet_md.coverImage == f: + for idx, f in enumerate(self.get_page_name_list()): + if self.comet_md.cover_image == f: cover_idx = idx break if cover_idx != 0: - del (self.comet_md.pages[0]['Type']) - self.comet_md.pages[cover_idx][ - 'Type'] = PageType.FrontCover + del self.comet_md.pages[0]["Type"] + self.comet_md.pages[cover_idx]["Type"] = PageType.FrontCover return self.comet_md - def readRawCoMet(self): - if not self.hasCoMet(): - print(self.path, "doesn't have CoMet data!", file=sys.stderr) + def read_raw_comet(self): + if not self.has_comet(): + err_msg = self.path + " doesn't have CoMet data!" + logger.info(err_msg) return None try: - raw_comet = self.archiver.readArchiveFile(self.comet_filename) - except IOError: - print("Error reading in raw CoMet!", file=sys.stderr) + raw_comet = self.archiver.read_file(self.comet_filename) + except IOError as e: + err_msg = f"Error reading in raw CoMet!: {e}" + logger.warning(err_msg) raw_comet = "" return raw_comet - def writeCoMet(self, metadata): + def write_comet(self, metadata): if metadata is not None: - if not self.hasCoMet(): + if not self.has_comet(): self.comet_filename = self.comet_default_filename - self.applyArchiveInfoToMetadata(metadata) + self.apply_archive_info_to_metadata(metadata) # Set the coverImage value, if it's not the first page - cover_idx = int(metadata.getCoverPageIndexList()[0]) + cover_idx = int(metadata.get_cover_page_index_list()[0]) if cover_idx != 0: - metadata.coverImage = self.getPageName(cover_idx) + metadata.cover_image = self.get_page_name(cover_idx) - comet_string = CoMet().stringFromMetadata(metadata) - write_success = self.archiver.writeArchiveFile( - self.comet_filename, - comet_string) + comet_string = CoMet().string_from_metadata(metadata) + write_success = self.archiver.write_file(self.comet_filename, comet_string) if write_success: - self.has_comet = True + self.has__comet = True self.comet_md = metadata - self.resetCache() + self.reset_cache() return write_success - else: - return False - def removeCoMet(self): - if self.hasCoMet(): - write_success = self.archiver.removeArchiveFile( - self.comet_filename) + return False + + def remove_co_met(self): + if self.has_comet(): + write_success = self.archiver.remove_file(self.comet_filename) if write_success: - self.has_comet = False + self.has__comet = False self.comet_md = None - self.resetCache() + self.reset_cache() return write_success return True - def hasCoMet(self): - if self.has_comet is None: - self.has_comet = False - if not self.seemsToBeAComicArchive(): - return self.has_comet + def has_comet(self): + if self.has__comet is None: + self.has__comet = False + if not self.seems_to_be_a_comic_archive(): + return self.has__comet - # look at all xml files in root, and search for CoMet data, get - # first - for n in self.archiver.getArchiveFilenameList(): - if (os.path.dirname(n) == "" and - os.path.splitext(n)[1].lower() == '.xml'): + # look at all xml files in root, and search for CoMet data, get first + for n in self.archiver.get_filename_list(): + if os.path.dirname(n) == "" and os.path.splitext(n)[1].lower() == ".xml": # read in XML file, and validate it try: - data = self.archiver.readArchiveFile(n) - except: + data = self.archiver.read_file(n) + except Exception as e: data = "" - print("Error reading in Comet XML for validation!", file=sys.stderr) - if CoMet().validateString(data): + err_msg = f"Error reading in Comet XML for validation!: {e}" + logger.warning(err_msg) + if CoMet().validate_string(data): # since we found it, save it! self.comet_filename = n - self.has_comet = True + self.has__comet = True break - return self.has_comet + return self.has__comet - def applyArchiveInfoToMetadata(self, md, calc_page_sizes=False): - md.pageCount = self.getNumberOfPages() + def apply_archive_info_to_metadata(self, md, calc_page_sizes=False): + md.page_count = self.get_number_of_pages() if calc_page_sizes: for p in md.pages: - idx = int(p['Image']) + idx = int(p["Image"]) if pil_available: - if 'ImageSize' not in p or 'ImageHeight' not in p or 'ImageWidth' not in p: - data = self.getPage(idx) + if "ImageSize" not in p or "ImageHeight" not in p or "ImageWidth" not in p: + data = self.get_page(idx) if data is not None: try: - im = Image.open(io.StringIO(data)) + if isinstance(data, bytes): + im = Image.open(io.BytesIO(data)) + else: + im = Image.open(io.StringIO(data)) w, h = im.size - p['ImageSize'] = str(len(data)) - p['ImageHeight'] = str(h) - p['ImageWidth'] = str(w) + p["ImageSize"] = str(len(data)) + p["ImageHeight"] = str(h) + p["ImageWidth"] = str(w) except IOError: - p['ImageSize'] = str(len(data)) + p["ImageSize"] = str(len(data)) else: - if 'ImageSize' not in p: - data = self.getPage(idx) - p['ImageSize'] = str(len(data)) + if "ImageSize" not in p: + data = self.get_page(idx) + p["ImageSize"] = str(len(data)) + + def metadata_from_filename(self, parse_scan_info=True): - def metadataFromFilename(self, parse_scan_info=True): metadata = GenericMetadata() fnp = FileNameParser() - fnp.parseFilename(self.path) + fnp.parse_filename(self.path) if fnp.issue != "": metadata.issue = fnp.issue @@ -1182,19 +1134,19 @@ class ComicArchive: if fnp.year != "": metadata.year = fnp.year if fnp.issue_count != "": - metadata.issueCount = fnp.issue_count + metadata.issue_count = fnp.issue_count if parse_scan_info: if fnp.remainder != "": - metadata.scanInfo = fnp.remainder + metadata.scan_info = fnp.remainder - metadata.isEmpty = False + metadata.is_empty = False return metadata - def exportAsZip(self, zipfilename): + def export_as_zip(self, zipfilename): if self.archive_type == self.ArchiveType.Zip: # nothing to do, we're already a zip return True zip_archiver = ZipArchiver(zipfilename) - return zip_archiver.copyFromArchive(self.archiver) + return zip_archiver.copy_from_archive(self.archiver) diff --git a/comicapi/comicbookinfo.py b/comicapi/comicbookinfo.py index e55fac9..f408ac8 100644 --- a/comicapi/comicbookinfo.py +++ b/comicapi/comicbookinfo.py @@ -15,42 +15,39 @@ # limitations under the License. import json +from collections import defaultdict from datetime import datetime -#import zipfile -from .genericmetadata import GenericMetadata -from . import utils -#import ctversion +from comicapi import utils +from comicapi.genericmetadata import GenericMetadata class ComicBookInfo: - def metadataFromString(self, string): - class Default(dict): - def __missing__(self, key): - return None - cbi_container = json.loads(str(string, 'utf-8')) + def metadata_from_string(self, string): + + cbi_container = json.loads(str(string, "utf-8")) metadata = GenericMetadata() - cbi = Default(cbi_container['ComicBookInfo/1.0']) + cbi = defaultdict(lambda: None, cbi_container["ComicBookInfo/1.0"]) - metadata.series = utils.xlate(cbi['series']) - metadata.title = utils.xlate(cbi['title']) - metadata.issue = utils.xlate(cbi['issue']) - metadata.publisher = utils.xlate(cbi['publisher']) - metadata.month = utils.xlate(cbi['publicationMonth'], True) - metadata.year = utils.xlate(cbi['publicationYear'], True) - metadata.issueCount = utils.xlate(cbi['numberOfIssues'], True) - metadata.comments = utils.xlate(cbi['comments']) - metadata.genre = utils.xlate(cbi['genre']) - metadata.volume = utils.xlate(cbi['volume'], True) - metadata.volumeCount = utils.xlate(cbi['numberOfVolumes'], True) - metadata.language = utils.xlate(cbi['language']) - metadata.country = utils.xlate(cbi['country']) - metadata.criticalRating = utils.xlate(cbi['rating']) + metadata.series = utils.xlate(cbi["series"]) + metadata.title = utils.xlate(cbi["title"]) + metadata.issue = utils.xlate(cbi["issue"]) + metadata.publisher = utils.xlate(cbi["publisher"]) + metadata.month = utils.xlate(cbi["publicationMonth"], True) + metadata.year = utils.xlate(cbi["publicationYear"], True) + metadata.issue_count = utils.xlate(cbi["numberOfIssues"], True) + metadata.comments = utils.xlate(cbi["comments"]) + metadata.genre = utils.xlate(cbi["genre"]) + metadata.volume = utils.xlate(cbi["volume"], True) + metadata.volume_count = utils.xlate(cbi["numberOfVolumes"], True) + metadata.language = utils.xlate(cbi["language"]) + metadata.country = utils.xlate(cbi["country"]) + metadata.critical_rating = utils.xlate(cbi["rating"]) - metadata.credits = cbi['credits'] - metadata.tags = cbi['tags'] + metadata.credits = cbi["credits"] + metadata.tags = cbi["tags"] # make sure credits and tags are at least empty lists and not None if metadata.credits is None: @@ -58,26 +55,20 @@ class ComicBookInfo: if metadata.tags is None: metadata.tags = [] - # need to massage the language string to be ISO + # need the language string to be ISO if metadata.language is not None: - # reverse look-up - pattern = metadata.language - metadata.language = None - for key in utils.getLanguageDict(): - if utils.getLanguageDict()[key] == pattern.encode('utf-8'): - metadata.language = key - break + metadata.language = utils.get_language(metadata.language) - metadata.isEmpty = False + metadata.is_empty = False return metadata - def stringFromMetadata(self, metadata): + def string_from_metadata(self, metadata): - cbi_container = self.createJSONDictionary(metadata) + cbi_container = self.create_json_dictionary(metadata) return json.dumps(cbi_container) - def validateString(self, string): + def validate_string(self, string): """Verify that the string actually contains CBI data in JSON format""" try: @@ -85,44 +76,45 @@ class ComicBookInfo: except: return False - return ('ComicBookInfo/1.0' in cbi_container) + return "ComicBookInfo/1.0" in cbi_container - def createJSONDictionary(self, metadata): + def create_json_dictionary(self, metadata): """Create the dictionary that we will convert to JSON text""" - cbi = dict() - cbi_container = {'appID': 'ComicTagger/' + '1.0.0', # ctversion.version, - 'lastModified': str(datetime.now()), - 'ComicBookInfo/1.0': cbi} + cbi = {} + cbi_container = { + "appID": "ComicTagger/" + "1.0.0", + "lastModified": str(datetime.now()), + "ComicBookInfo/1.0": cbi, + } # TODO: ctversion.version, # helper func def assign(cbi_entry, md_entry): if md_entry is not None or isinstance(md_entry, str) and md_entry != "": cbi[cbi_entry] = md_entry - assign('series', utils.xlate(metadata.series)) - assign('title', utils.xlate(metadata.title)) - assign('issue', utils.xlate(metadata.issue)) - assign('publisher', utils.xlate(metadata.publisher)) - assign('publicationMonth', utils.xlate(metadata.month, True)) - assign('publicationYear', utils.xlate(metadata.year, True)) - assign('numberOfIssues', utils.xlate(metadata.issueCount, True)) - assign('comments', utils.xlate(metadata.comments)) - assign('genre', utils.xlate(metadata.genre)) - assign('volume', utils.xlate(metadata.volume, True)) - assign('numberOfVolumes', utils.xlate(metadata.volumeCount, True)) - assign('language', utils.xlate(utils.getLanguageFromISO(metadata.language))) - assign('country', utils.xlate(metadata.country)) - assign('rating', utils.xlate(metadata.criticalRating)) - assign('credits', metadata.credits) - assign('tags', metadata.tags) + assign("series", utils.xlate(metadata.series)) + assign("title", utils.xlate(metadata.title)) + assign("issue", utils.xlate(metadata.issue)) + assign("publisher", utils.xlate(metadata.publisher)) + assign("publicationMonth", utils.xlate(metadata.month, True)) + assign("publicationYear", utils.xlate(metadata.year, True)) + assign("numberOfIssues", utils.xlate(metadata.issue_count, True)) + assign("comments", utils.xlate(metadata.comments)) + assign("genre", utils.xlate(metadata.genre)) + assign("volume", utils.xlate(metadata.volume, True)) + assign("numberOfVolumes", utils.xlate(metadata.volume_count, True)) + assign("language", utils.xlate(utils.get_language_from_iso(metadata.language))) + assign("country", utils.xlate(metadata.country)) + assign("rating", utils.xlate(metadata.critical_rating)) + assign("credits", metadata.credits) + assign("tags", metadata.tags) return cbi_container - def writeToExternalFile(self, filename, metadata): + def write_to_external_file(self, filename, metadata): - cbi_container = self.createJSONDictionary(metadata) + cbi_container = self.create_json_dictionary(metadata) - f = open(filename, 'w') - f.write(json.dumps(cbi_container, indent=4)) - f.close + with open(filename, "w") as f: + f.write(json.dumps(cbi_container, indent=4)) diff --git a/comicapi/comicinfoxml.py b/comicapi/comicinfoxml.py index d9b724a..13b96e5 100644 --- a/comicapi/comicinfoxml.py +++ b/comicapi/comicinfoxml.py @@ -15,26 +15,23 @@ # limitations under the License. import xml.etree.ElementTree as ET -#from datetime import datetime -#from pprint import pprint -#import zipfile -from .genericmetadata import GenericMetadata -from .issuestring import IssueString -from . import utils +from comicapi import utils +from comicapi.genericmetadata import GenericMetadata +from comicapi.issuestring import IssueString class ComicInfoXml: - writer_synonyms = ['writer', 'plotter', 'scripter'] - penciller_synonyms = ['artist', 'penciller', 'penciler', 'breakdowns'] - inker_synonyms = ['inker', 'artist', 'finishes'] - colorist_synonyms = ['colorist', 'colourist', 'colorer', 'colourer'] - letterer_synonyms = ['letterer'] - cover_synonyms = ['cover', 'covers', 'coverartist', 'cover artist'] - editor_synonyms = ['editor'] + writer_synonyms = ["writer", "plotter", "scripter"] + penciller_synonyms = ["artist", "penciller", "penciler", "breakdowns"] + inker_synonyms = ["inker", "artist", "finishes"] + colorist_synonyms = ["colorist", "colourist", "colorer", "colourer"] + letterer_synonyms = ["letterer"] + cover_synonyms = ["cover", "covers", "coverartist", "cover artist"] + editor_synonyms = ["editor"] - def getParseableCredits(self): + def get_parseable_credits(self): parsable_credits = [] parsable_credits.extend(self.writer_synonyms) parsable_credits.extend(self.penciller_synonyms) @@ -45,33 +42,17 @@ class ComicInfoXml: parsable_credits.extend(self.editor_synonyms) return parsable_credits - def metadataFromString(self, string): + def metadata_from_string(self, string): tree = ET.ElementTree(ET.fromstring(string)) - return self.convertXMLToMetadata(tree) + return self.convert_xml_to_metadata(tree) - def stringFromMetadata(self, metadata, xml=None): - tree = self.convertMetadataToXML(self, metadata, xml) + def string_from_metadata(self, metadata, xml=None): + tree = self.convert_metadata_to_xml(self, metadata, xml) tree_str = ET.tostring(tree.getroot(), encoding="utf-8", xml_declaration=True).decode() return tree_str - def indent(self, elem, level=0): - # for making the XML output readable - i = "\n" + level * " " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - self.indent(elem, level + 1) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i - - def convertMetadataToXML(self, filename, metadata, xml=None): + def convert_metadata_to_xml(self, filename, metadata, xml=None): # shorthand for the metadata md = metadata @@ -81,125 +62,123 @@ class ComicInfoXml: else: # build a tree structure root = ET.Element("ComicInfo") - root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" - root.attrib['xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema" + root.attrib["xmlns:xsi"] = "http://www.w3.org/2001/XMLSchema-instance" + root.attrib["xmlns:xsd"] = "http://www.w3.org/2001/XMLSchema" # helper func def assign(cix_entry, md_entry): if md_entry is not None: et_entry = root.find(cix_entry) if et_entry is not None: - et_entry.text = "{0}".format(md_entry) + et_entry.text = str(md_entry) else: - ET.SubElement(root, cix_entry).text = "{0}".format(md_entry) + ET.SubElement(root, cix_entry).text = str(md_entry) - assign('Title', md.title) - assign('Series', md.series) - assign('Number', md.issue) - assign('Count', md.issueCount) - assign('Volume', md.volume) - assign('AlternateSeries', md.alternateSeries) - assign('AlternateNumber', md.alternateNumber) - assign('StoryArc', md.storyArc) - assign('SeriesGroup', md.seriesGroup) - assign('AlternateCount', md.alternateCount) - assign('Summary', md.comments) - assign('Notes', md.notes) - assign('Year', md.year) - assign('Month', md.month) - assign('Day', md.day) + assign("Title", md.title) + assign("Series", md.series) + assign("Number", md.issue) + assign("Count", md.issue_count) + assign("Volume", md.volume) + assign("AlternateSeries", md.alternate_series) + assign("AlternateNumber", md.alternate_number) + assign("StoryArc", md.story_arc) + assign("SeriesGroup", md.series_group) + assign("AlternateCount", md.alternate_count) + assign("Summary", md.comments) + assign("Notes", md.notes) + assign("Year", md.year) + assign("Month", md.month) + assign("Day", md.day) # need to specially process the credits, since they are structured # differently than CIX - credit_writer_list = list() - credit_penciller_list = list() - credit_inker_list = list() - credit_colorist_list = list() - credit_letterer_list = list() - credit_cover_list = list() - credit_editor_list = list() + credit_writer_list = [] + credit_penciller_list = [] + credit_inker_list = [] + credit_colorist_list = [] + credit_letterer_list = [] + credit_cover_list = [] + credit_editor_list = [] # first, loop thru credits, and build a list for each role that CIX # supports for credit in metadata.credits: - if credit['role'].lower() in set(self.writer_synonyms): - credit_writer_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.writer_synonyms): + credit_writer_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.penciller_synonyms): - credit_penciller_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.penciller_synonyms): + credit_penciller_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.inker_synonyms): - credit_inker_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.inker_synonyms): + credit_inker_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.colorist_synonyms): - credit_colorist_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.colorist_synonyms): + credit_colorist_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.letterer_synonyms): - credit_letterer_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.letterer_synonyms): + credit_letterer_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.cover_synonyms): - credit_cover_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.cover_synonyms): + credit_cover_list.append(credit["person"].replace(",", "")) - if credit['role'].lower() in set(self.editor_synonyms): - credit_editor_list.append(credit['person'].replace(",", "")) + if credit["role"].lower() in set(self.editor_synonyms): + credit_editor_list.append(credit["person"].replace(",", "")) # second, convert each list to string, and add to XML struct - assign('Writer', utils.listToString(credit_writer_list)) + assign("Writer", utils.list_to_string(credit_writer_list)) - assign('Penciller', utils.listToString(credit_penciller_list)) + assign("Penciller", utils.list_to_string(credit_penciller_list)) - assign('Inker', utils.listToString(credit_inker_list)) + assign("Inker", utils.list_to_string(credit_inker_list)) - assign('Colorist', utils.listToString(credit_colorist_list)) + assign("Colorist", utils.list_to_string(credit_colorist_list)) - assign('Letterer', utils.listToString(credit_letterer_list)) + assign("Letterer", utils.list_to_string(credit_letterer_list)) - assign('CoverArtist', utils.listToString(credit_cover_list)) + assign("CoverArtist", utils.list_to_string(credit_cover_list)) - assign('Editor', utils.listToString(credit_editor_list)) + assign("Editor", utils.list_to_string(credit_editor_list)) - assign('Publisher', md.publisher) - assign('Imprint', md.imprint) - assign('Genre', md.genre) - assign('Web', md.webLink) - assign('PageCount', md.pageCount) - assign('LanguageISO', md.language) - assign('Format', md.format) - assign('AgeRating', md.maturityRating) - if md.blackAndWhite is not None and md.blackAndWhite: - assign('BlackAndWhite', "Yes") - assign('Manga', md.manga) - assign('Characters', md.characters) - assign('Teams', md.teams) - assign('Locations', md.locations) - assign('ScanInformation', md.scanInfo) + assign("Publisher", md.publisher) + assign("Imprint", md.imprint) + assign("Genre", md.genre) + assign("Web", md.web_link) + assign("PageCount", md.page_count) + assign("LanguageISO", md.language) + assign("Format", md.format) + assign("AgeRating", md.maturity_rating) + if md.black_and_white is not None and md.black_and_white: + ET.SubElement(root, "BlackAndWhite").text = "Yes" + assign("Manga", md.manga) + assign("Characters", md.characters) + assign("Teams", md.teams) + assign("Locations", md.locations) + assign("ScanInformation", md.scan_info) # loop and add the page entries under pages node - pages_node = root.find('Pages') + pages_node = root.find("Pages") if pages_node is not None: pages_node.clear() else: - pages_node = ET.SubElement(root, 'Pages') + pages_node = ET.SubElement(root, "Pages") for page_dict in md.pages: - page_node = ET.SubElement(pages_node, 'Page') + page_node = ET.SubElement(pages_node, "Page") page_node.attrib = page_dict - # self pretty-print - self.indent(root) + utils.indent(root) # wrap it in an ElementTree instance, and save as XML tree = ET.ElementTree(root) return tree - def convertXMLToMetadata(self, tree): + def convert_xml_to_metadata(self, tree): root = tree.getroot() - if root.tag != 'ComicInfo': - raise 1 - return None + if root.tag != "ComicInfo": + raise "1" def get(name): tag = root.find(name) @@ -209,74 +188,75 @@ class ComicInfoXml: md = GenericMetadata() - md.series = utils.xlate(get('Series')) - md.title = utils.xlate(get('Title')) - md.issue = IssueString(utils.xlate(get('Number'))).asString() - md.issueCount = utils.xlate(get('Count'), True) - md.volume = utils.xlate(get('Volume'), True) - md.alternateSeries = utils.xlate(get('AlternateSeries')) - md.alternateNumber = IssueString(utils.xlate(get('AlternateNumber'))).asString() - md.alternateCount = utils.xlate(get('AlternateCount'), True) - md.comments = utils.xlate(get('Summary')) - md.notes = utils.xlate(get('Notes')) - md.year = utils.xlate(get('Year'), True) - md.month = utils.xlate(get('Month'), True) - md.day = utils.xlate(get('Day'), True) - md.publisher = utils.xlate(get('Publisher')) - md.imprint = utils.xlate(get('Imprint')) - md.genre = utils.xlate(get('Genre')) - md.webLink = utils.xlate(get('Web')) - md.language = utils.xlate(get('LanguageISO')) - md.format = utils.xlate(get('Format')) - md.manga = utils.xlate(get('Manga')) - md.characters = utils.xlate(get('Characters')) - md.teams = utils.xlate(get('Teams')) - md.locations = utils.xlate(get('Locations')) - md.pageCount = utils.xlate(get('PageCount'), True) - md.scanInfo = utils.xlate(get('ScanInformation')) - md.storyArc = utils.xlate(get('StoryArc')) - md.seriesGroup = utils.xlate(get('SeriesGroup')) - md.maturityRating = utils.xlate(get('AgeRating')) + md.series = utils.xlate(get("Series")) + md.title = utils.xlate(get("Title")) + md.issue = IssueString(utils.xlate(get("Number"))).as_string() + md.issue_count = utils.xlate(get("Count"), True) + md.volume = utils.xlate(get("Volume"), True) + md.alternate_series = utils.xlate(get("AlternateSeries")) + md.alternate_number = IssueString(utils.xlate(get("AlternateNumber"))).as_string() + md.alternate_count = utils.xlate(get("AlternateCount"), True) + md.comments = utils.xlate(get("Summary")) + md.notes = utils.xlate(get("Notes")) + md.year = utils.xlate(get("Year"), True) + md.month = utils.xlate(get("Month"), True) + md.day = utils.xlate(get("Day"), True) + md.publisher = utils.xlate(get("Publisher")) + md.imprint = utils.xlate(get("Imprint")) + md.genre = utils.xlate(get("Genre")) + md.web_link = utils.xlate(get("Web")) + md.language = utils.xlate(get("LanguageISO")) + md.format = utils.xlate(get("Format")) + md.manga = utils.xlate(get("Manga")) + md.characters = utils.xlate(get("Characters")) + md.teams = utils.xlate(get("Teams")) + md.locations = utils.xlate(get("Locations")) + md.page_count = utils.xlate(get("PageCount"), True) + md.scan_info = utils.xlate(get("ScanInformation")) + md.story_arc = utils.xlate(get("StoryArc")) + md.series_group = utils.xlate(get("SeriesGroup")) + md.maturity_rating = utils.xlate(get("AgeRating")) - tmp = utils.xlate(get('BlackAndWhite')) + tmp = utils.xlate(get("BlackAndWhite")) if tmp is not None and tmp.lower() in ["yes", "true", "1"]: - md.blackAndWhite = True + md.black_and_white = True # Now extract the credit info for n in root: - if (n.tag == 'Writer' or - n.tag == 'Penciller' or - n.tag == 'Inker' or - n.tag == 'Colorist' or - n.tag == 'Letterer' or - n.tag == 'Editor' - ): + if any( + [ + n.tag == "Writer", + n.tag == "Penciller", + n.tag == "Inker", + n.tag == "Colorist", + n.tag == "Letterer", + n.tag == "Editor", + ] + ): if n.text is not None: - for name in n.text.split(','): - md.addCredit(name.strip(), n.tag) + for name in n.text.split(","): + md.add_credit(name.strip(), n.tag) - if n.tag == 'CoverArtist': + if n.tag == "CoverArtist": if n.text is not None: - for name in n.text.split(','): - md.addCredit(name.strip(), "Cover") + for name in n.text.split(","): + md.add_credit(name.strip(), "Cover") # parse page data now pages_node = root.find("Pages") if pages_node is not None: for page in pages_node: md.pages.append(page.attrib) - # print page.attrib - md.isEmpty = False + md.is_empty = False return md - def writeToExternalFile(self, filename, metadata, xml=None): + def write_to_external_file(self, filename, metadata, xml=None): - tree = self.convertMetadataToXML(self, metadata, xml) - # ET.dump(tree) + tree = self.convert_metadata_to_xml(self, metadata, xml) tree.write(filename, encoding="utf-8", xml_declaration=True) - def readFromExternalFile(self, filename): + def read_from_external_file(self, filename): tree = ET.parse(filename) - return self.convertXMLToMetadata(tree) + return self.convert_xml_to_metadata(tree) diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 476d14b..9e85ccd 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -20,56 +20,62 @@ This should probably be re-written, but, well, it mostly works! # Some portions of this code were modified from pyComicMetaThis project # http://code.google.com/p/pycomicmetathis/ -import re import os +import re from urllib.parse import unquote class FileNameParser: + def __init__(self): + self.series = "" + self.volume = "" + self.year = "" + self.issue_count = "" + self.remainder = "" + self.issue = "" def repl(self, m): - return ' ' * len(m.group()) + return " " * len(m.group()) - def fixSpaces(self, string, remove_dashes=True): + def fix_spaces(self, string, remove_dashes=True): if remove_dashes: - placeholders = ['[-_]', ' +'] + placeholders = [r"[-_]", r" +"] else: - placeholders = ['[_]', ' +'] + placeholders = [r"[_]", r" +"] for ph in placeholders: string = re.sub(ph, self.repl, string) return string # .strip() - def getIssueCount(self, filename, issue_end): + def get_issue_count(self, filename, issue_end): count = "" filename = filename[issue_end:] # replace any name separators with spaces - tmpstr = self.fixSpaces(filename) + tmpstr = self.fix_spaces(filename) found = False - match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) + match = re.search(r"(?<=\sof\s)\d+(?=\s)", tmpstr, re.IGNORECASE) if match: count = match.group() found = True if not found: - match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) + match = re.search(r"(?<=\(of\s)\d+(?=\))", tmpstr, re.IGNORECASE) if match: count = match.group() - found = True count = count.lstrip("0") return count - def getIssueNumber(self, filename): + def get_issue_number(self, filename): """Returns a tuple of issue number string, and start and end indexes in the filename (The indexes will be used to split the string up for further parsing) """ found = False - issue = '' + issue = "" start = 0 end = 0 @@ -78,25 +84,25 @@ class FileNameParser: if "--" in filename: # the pattern seems to be that anything to left of the first "--" # is the series name followed by issue - filename = re.sub("--.*", self.repl, filename) + filename = re.sub(r"--.*", self.repl, filename) elif "__" in filename: # the pattern seems to be that anything to left of the first "__" # is the series name followed by issue - filename = re.sub("__.*", self.repl, filename) + filename = re.sub(r"__.*", self.repl, filename) filename = filename.replace("+", " ") # replace parenthetical phrases with spaces - filename = re.sub("\(.*?\)", self.repl, filename) - filename = re.sub("\[.*?\]", self.repl, filename) + filename = re.sub(r"\(.*?\)", self.repl, filename) + filename = re.sub(r"\[.*?]", self.repl, filename) # replace any name separators with spaces - filename = self.fixSpaces(filename) + filename = self.fix_spaces(filename) # remove any "of NN" phrase with spaces (problem: this could break on # some titles) - filename = re.sub("of [\d]+", self.repl, filename) + filename = re.sub(r"of [\d]+", self.repl, filename) # print u"[{0}]".format(filename) @@ -104,8 +110,8 @@ class FileNameParser: # the same positions as original filename # make a list of each word and its position - word_list = list() - for m in re.finditer("\S+", filename): + word_list = [] + for m in re.finditer(r"\S+", filename): word_list.append((m.group(0), m.start(), m.end())) # remove the first word, since it can't be the issue number @@ -120,7 +126,7 @@ class FileNameParser: # first look for a word with "#" followed by digits with optional suffix # this is almost certainly the issue number for w in reversed(word_list): - if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + if re.match(r"#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): found = True break @@ -128,13 +134,13 @@ class FileNameParser: # list if not found: w = word_list[-1] - if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + if re.match(r"[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): found = True # now try to look for a # followed by any characters if not found: for w in reversed(word_list): - if re.match("#\S+", w[0]): + if re.match(r"#\S+", w[0]): found = True break @@ -142,12 +148,12 @@ class FileNameParser: issue = w[0] start = w[1] end = w[2] - if issue[0] == '#': + if issue[0] == "#": issue = issue[1:] return issue, start, end - def getSeriesName(self, filename, issue_start): + def get_series_name(self, filename, issue_start): """Use the issue number string index to split the filename string""" if issue_start != 0: @@ -157,15 +163,15 @@ class FileNameParser: if "--" in filename: # the pattern seems to be that anything to left of the first "--" # is the series name followed by issue - filename = re.sub("--.*", self.repl, filename) + filename = re.sub(r"--.*", self.repl, filename) elif "__" in filename: # the pattern seems to be that anything to left of the first "__" # is the series name followed by issue - filename = re.sub("__.*", self.repl, filename) + filename = re.sub(r"__.*", self.repl, filename) filename = filename.replace("+", " ") - tmpstr = self.fixSpaces(filename, remove_dashes=False) + tmpstr = self.fix_spaces(filename, remove_dashes=False) series = tmpstr volume = "" @@ -177,10 +183,10 @@ class FileNameParser: last_word = "" # remove any parenthetical phrases - series = re.sub("\(.*?\)", "", series) + series = re.sub(r"\(.*?\)", "", series) # search for volume number - match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) + match = re.search(r"(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$", series) if match: series = match.group(1) volume = match.group(3) @@ -189,7 +195,7 @@ class FileNameParser: # since that's a common way to designate the volume if volume == "": # match either (YEAR), (YEAR-), or (YEAR-YEAR2) - match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) + match = re.search(r"(\()(\d{4})(-(\d{4}|)|)(\))", last_word) if match: volume = match.group(2) @@ -203,26 +209,26 @@ class FileNameParser: try: last_word = series.split()[-1] if last_word.lower() in one_shot_words: - series = series.rsplit(' ', 1)[0] + series = series.rsplit(" ", 1)[0] except: pass return series, volume.strip() - def getYear(self, filename, issue_end): + def get_year(self, filename, issue_end): filename = filename[issue_end:] year = "" # look for four digit number with "(" ")" or "--" around it - match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) + match = re.search(r"(\(\d\d\d\d\))|(--\d\d\d\d--)", filename) if match: year = match.group() # remove non-digits - year = re.sub("[^0-9]", "", year) + year = re.sub(r"[^0-9]", "", year) return year - def getRemainder(self, filename, year, count, volume, issue_end): + def get_remainder(self, filename, year, count, volume, issue_end): """Make a guess at where the the non-interesting stuff begins""" remainder = "" @@ -234,7 +240,7 @@ class FileNameParser: elif issue_end != 0: remainder = filename[issue_end:] - remainder = self.fixSpaces(remainder, remove_dashes=False) + remainder = self.fix_spaces(remainder, remove_dashes=False) if volume != "": remainder = remainder.replace("Vol." + volume, "", 1) if year != "": @@ -243,13 +249,11 @@ class FileNameParser: remainder = remainder.replace("of " + count, "", 1) remainder = remainder.replace("()", "") - remainder = remainder.replace( - " ", - " ") # cleans some whitespace mess + remainder = remainder.replace(" ", " ") # cleans some whitespace mess return remainder.strip() - def parseFilename(self, filename): + def parse_filename(self, filename): # remove the path filename = os.path.basename(filename) @@ -267,21 +271,16 @@ class FileNameParser: filename = filename.replace("_28", "(") filename = filename.replace("_29", ")") - self.issue, issue_start, issue_end = self.getIssueNumber(filename) - self.series, self.volume = self.getSeriesName(filename, issue_start) + self.issue, issue_start, issue_end = self.get_issue_number(filename) + self.series, self.volume = self.get_series_name(filename, issue_start) # provides proper value when the filename doesn't have a issue number if issue_end == 0: issue_end = len(self.series) - self.year = self.getYear(filename, issue_end) - self.issue_count = self.getIssueCount(filename, issue_end) - self.remainder = self.getRemainder( - filename, - self.year, - self.issue_count, - self.volume, - issue_end) + self.year = self.get_year(filename, issue_end) + self.issue_count = self.get_issue_count(filename, issue_end) + self.remainder = self.get_remainder(filename, self.year, self.issue_count, self.volume, issue_end) if self.issue != "": # strip off leading zeros diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index b3679a2..e375d7a 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -20,7 +20,9 @@ possible, however lossy it might be # See the License for the specific language governing permissions and # limitations under the License. -from . import utils +from typing import List, TypedDict + +from comicapi import utils class PageType: @@ -42,24 +44,34 @@ class PageType: Other = "Other" Deleted = "Deleted" -""" -class PageInfo: - Image = 0 - Type = PageType.Story - DoublePage = False - ImageSize = 0 - Key = "" - ImageWidth = 0 - ImageHeight = 0 -""" + +class ImageMetadata(TypedDict): + Type: PageType + Image: int + ImageSize: str + ImageHeight: str + ImageWidth: str + + +class CreditMetadata(TypedDict): + person: str + role: str + primary: bool class GenericMetadata: + writer_synonyms = ["writer", "plotter", "scripter"] + penciller_synonyms = ["artist", "penciller", "penciler", "breakdowns"] + inker_synonyms = ["inker", "artist", "finishes"] + colorist_synonyms = ["colorist", "colourist", "colorer", "colourer"] + letterer_synonyms = ["letterer"] + cover_synonyms = ["cover", "covers", "coverartist", "cover artist"] + editor_synonyms = ["editor"] def __init__(self): - self.isEmpty = True - self.tagOrigin = None + self.is_empty = True + self.tag_origin = None self.series = None self.issue = None @@ -68,47 +80,47 @@ class GenericMetadata: self.month = None self.year = None self.day = None - self.issueCount = None + self.issue_count = None self.volume = None self.genre = None self.language = None # 2 letter iso code self.comments = None # use same way as Summary in CIX - self.volumeCount = None - self.criticalRating = None + self.volume_count = None + self.critical_rating = None self.country = None - self.alternateSeries = None - self.alternateNumber = None - self.alternateCount = None + self.alternate_series = None + self.alternate_number = None + self.alternate_count = None self.imprint = None self.notes = None - self.webLink = None + self.web_link = None self.format = None self.manga = None - self.blackAndWhite = None - self.pageCount = None - self.maturityRating = None + self.black_and_white = None + self.page_count = None + self.maturity_rating = None - self.storyArc = None - self.seriesGroup = None - self.scanInfo = None + self.story_arc = None + self.series_group = None + self.scan_info = None self.characters = None self.teams = None self.locations = None - self.credits = list() - self.tags = list() - self.pages = list() + self.credits: List[CreditMetadata] = [] + self.tags: List[str] = [] + self.pages: List[ImageMetadata] = [] # Some CoMet-only items self.price = None - self.isVersionOf = None + self.is_version_of = None self.rights = None self.identifier = None - self.lastMark = None - self.coverImage = None + self.last_mark = None + self.cover_image = None def overlay(self, new_md): """Overlay a metadata object on this one @@ -124,35 +136,36 @@ class GenericMetadata: else: setattr(self, cur, new) - if not new_md.isEmpty: - self.isEmpty = False + new_md: GenericMetadata + if not new_md.is_empty: + self.is_empty = False - assign('series', new_md.series) + assign("series", new_md.series) assign("issue", new_md.issue) - assign("issueCount", new_md.issueCount) + assign("issue_count", new_md.issue_count) assign("title", new_md.title) assign("publisher", new_md.publisher) assign("day", new_md.day) assign("month", new_md.month) assign("year", new_md.year) assign("volume", new_md.volume) - assign("volumeCount", new_md.volumeCount) + assign("volume_count", new_md.volume_count) assign("genre", new_md.genre) assign("language", new_md.language) assign("country", new_md.country) - assign("criticalRating", new_md.criticalRating) - assign("alternateSeries", new_md.alternateSeries) - assign("alternateNumber", new_md.alternateNumber) - assign("alternateCount", new_md.alternateCount) + assign("critical_rating", new_md.critical_rating) + assign("alternate_series", new_md.alternate_series) + assign("alternate_number", new_md.alternate_number) + assign("alternate_count", new_md.alternate_count) assign("imprint", new_md.imprint) - assign("webLink", new_md.webLink) + assign("web_link", new_md.web_link) assign("format", new_md.format) assign("manga", new_md.manga) - assign("blackAndWhite", new_md.blackAndWhite) - assign("maturityRating", new_md.maturityRating) - assign("storyArc", new_md.storyArc) - assign("seriesGroup", new_md.seriesGroup) - assign("scanInfo", new_md.scanInfo) + assign("black_and_white", new_md.black_and_white) + assign("maturity_rating", new_md.maturity_rating) + assign("story_arc", new_md.story_arc) + assign("series_group", new_md.series_group) + assign("scan_info", new_md.scan_info) assign("characters", new_md.characters) assign("teams", new_md.teams) assign("locations", new_md.locations) @@ -160,12 +173,12 @@ class GenericMetadata: assign("notes", new_md.notes) assign("price", new_md.price) - assign("isVersionOf", new_md.isVersionOf) + assign("is_version_of", new_md.is_version_of) assign("rights", new_md.rights) assign("identifier", new_md.identifier) - assign("lastMark", new_md.lastMark) + assign("last_mark", new_md.last_mark) - self.overlayCredits(new_md.credits) + self.overlay_credits(new_md.credits) # TODO # not sure if the tags and pages should broken down, or treated @@ -179,66 +192,62 @@ class GenericMetadata: if len(new_md.pages) > 0: assign("pages", new_md.pages) - def overlayCredits(self, new_credits): + def overlay_credits(self, new_credits): for c in new_credits: - if 'primary' in c and c['primary']: - primary = True - else: - primary = False + primary = bool("primary" in c and c["primary"]) # Remove credit role if person is blank - if c['person'] == "": + if c["person"] == "": for r in reversed(self.credits): - if r['role'].lower() == c['role'].lower(): + if r["role"].lower() == c["role"].lower(): self.credits.remove(r) # otherwise, add it! else: - self.addCredit(c['person'], c['role'], primary) + self.add_credit(c["person"], c["role"], primary) - def setDefaultPageList(self, count): + def set_default_page_list(self, count): # generate a default page list, with the first page marked as the cover for i in range(count): - page_dict = dict() - page_dict['Image'] = str(i) + page_dict = {} + page_dict["Image"] = str(i) if i == 0: - page_dict['Type'] = PageType.FrontCover + page_dict["Type"] = PageType.FrontCover self.pages.append(page_dict) - def getArchivePageIndex(self, pagenum): + def get_archive_page_index(self, pagenum): # convert the displayed page number to the page index of the file in # the archive if pagenum < len(self.pages): - return int(self.pages[pagenum]['Image']) - else: - return 0 + return int(self.pages[pagenum]["Image"]) - def getCoverPageIndexList(self): + return 0 + + def get_cover_page_index_list(self): # return a list of archive page indices of cover pages coverlist = [] for p in self.pages: - if 'Type' in p and p['Type'] == PageType.FrontCover: - coverlist.append(int(p['Image'])) + if "Type" in p and p["Type"] == PageType.FrontCover: + coverlist.append(int(p["Image"])) if len(coverlist) == 0: coverlist.append(0) return coverlist - def addCredit(self, person, role, primary=False): + def add_credit(self, person, role, primary=False): - credit = dict() - credit['person'] = person - credit['role'] = role + credit = {} + credit["person"] = person + credit["role"] = role if primary: - credit['primary'] = primary + credit["primary"] = primary # look to see if it's not already there... found = False for c in self.credits: - if (c['person'].lower() == person.lower() and - c['role'].lower() == role.lower()): + if c["person"].lower() == person.lower() and c["role"].lower() == role.lower(): # no need to add it. just adjust the "primary" flag as needed - c['primary'] = primary + c["primary"] = primary found = True break @@ -247,64 +256,63 @@ class GenericMetadata: def __str__(self): vals = [] - if self.isEmpty: + if self.is_empty: return "No metadata" def add_string(tag, val): - if val is not None and "{0}".format(val) != "": + if val is not None and str(val) != "": vals.append((tag, val)) def add_attr_string(tag): - val = getattr(self, tag) add_string(tag, getattr(self, tag)) add_attr_string("series") add_attr_string("issue") - add_attr_string("issueCount") + add_attr_string("issue_count") add_attr_string("title") add_attr_string("publisher") add_attr_string("year") add_attr_string("month") add_attr_string("day") add_attr_string("volume") - add_attr_string("volumeCount") + add_attr_string("volume_count") add_attr_string("genre") add_attr_string("language") add_attr_string("country") - add_attr_string("criticalRating") - add_attr_string("alternateSeries") - add_attr_string("alternateNumber") - add_attr_string("alternateCount") + add_attr_string("critical_rating") + add_attr_string("alternate_series") + add_attr_string("alternate_number") + add_attr_string("alternate_count") add_attr_string("imprint") - add_attr_string("webLink") + add_attr_string("web_link") add_attr_string("format") add_attr_string("manga") add_attr_string("price") - add_attr_string("isVersionOf") + add_attr_string("is_version_of") add_attr_string("rights") add_attr_string("identifier") - add_attr_string("lastMark") + add_attr_string("last_mark") - if self.blackAndWhite: - add_attr_string("blackAndWhite") - add_attr_string("maturityRating") - add_attr_string("storyArc") - add_attr_string("seriesGroup") - add_attr_string("scanInfo") + if self.black_and_white: + add_attr_string("black_and_white") + add_attr_string("maturity_rating") + add_attr_string("story_arc") + add_attr_string("series_group") + add_attr_string("scan_info") add_attr_string("characters") add_attr_string("teams") add_attr_string("locations") add_attr_string("comments") add_attr_string("notes") - add_string("tags", utils.listToString(self.tags)) + add_string("tags", utils.list_to_string(self.tags)) for c in self.credits: primary = "" - if 'primary' in c and c['primary']: + if "primary" in c and c["primary"]: primary = " [P]" - add_string("credit", c['role'] + ": " + c['person'] + primary) + add_string("credit", c["role"] + ": " + c["person"] + primary) # find the longest field name flen = 0 diff --git a/comicapi/issuestring.py b/comicapi/issuestring.py index 62732a8..53f5a63 100644 --- a/comicapi/issuestring.py +++ b/comicapi/issuestring.py @@ -1,4 +1,3 @@ -# coding=utf-8 """Support for mixed digit/string type Issue field Class for handling the odd permutations of an 'issue number' that the @@ -20,13 +19,8 @@ comics industry throws at us. # See the License for the specific language governing permissions and # limitations under the License. -#import utils -#import math -#import re - class IssueString: - def __init__(self, text): # break up the issue number string into 2 parts: the numeric and suffix string. @@ -43,10 +37,8 @@ class IssueString: if len(text) == 0: return - text = str(text) - # skip the minus sign if it's first - if text[0] == '-': + if text[0] == "-": start = 1 else: start = 0 @@ -78,7 +70,7 @@ class IssueString: idx = 0 part1 = text[0:idx] - part2 = text[idx:len(text)] + part2 = text[idx : len(text)] if part1 != "": self.num = float(part1) @@ -86,9 +78,7 @@ class IssueString: else: self.suffix = text - # print "num: {0} suf: {1}".format(self.num, self.suffix) - - def asString(self, pad=0): + def as_string(self, pad=0): # return the float, left side zero-padded, with suffix attached if self.num is None: return self.suffix @@ -106,9 +96,9 @@ class IssueString: # create padding padding = "" - l = len(str(num_int)) - if l < pad: - padding = "0" * (pad - l) + length = len(str(num_int)) + if length < pad: + padding = "0" * (pad - length) num_s = padding + num_s if negative: @@ -116,16 +106,16 @@ class IssueString: return num_s - def asFloat(self): + def as_float(self): # return the float, with no suffix if self.suffix == "½": if self.num is not None: - return self.num + .5 - else: - return .5 + return self.num + 0.5 + + return 0.5 return self.num - def asInt(self): + def as_int(self): # return the int version of the float if self.num is None: return None diff --git a/comicapi/utils.py b/comicapi/utils.py index 05bacc2..e88bc4e 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -1,4 +1,3 @@ -# coding=utf-8 """Some generic utilities""" # Copyright 2012-2014 Anthony Beville @@ -15,19 +14,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -import os -import re -import platform -import locale import codecs +import locale +import os +import platform +import re +import sys import unicodedata +from collections import defaultdict + +import pycountry class UtilsVars: already_fixed_encoding = False +def indent(elem, level=0): + # for making the XML output readable + i = "\n" + level * " " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for ele in elem: + indent(ele, level + 1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def get_actual_preferred_encoding(): preferred_encoding = locale.getpreferredencoding() if platform.system() == "Darwin": @@ -50,26 +69,17 @@ def fix_output_encoding(): def get_recursive_filelist(pathlist): """Get a recursive list of of all files under all path items in the list""" - filename_encoding = sys.getfilesystemencoding() filelist = [] for p in pathlist: # if path is a folder, walk it recursively, and all files underneath - if isinstance(p, str): - # make sure string is unicode - #p = p.decode(filename_encoding) # , 'replace') - pass - elif not isinstance(p, str): + if not isinstance(p, str): # it's probably a QString p = str(p) if os.path.isdir(p): - for root, dirs, files in os.walk(p): + for root, _, files in os.walk(p): for f in files: - if isinstance(f, str): - # make sure string is unicode - #f = f.decode(filename_encoding, 'replace') - pass - elif not isinstance(f, str): + if not isinstance(f, str): # it's probably a QString f = str(f) filelist.append(os.path.join(root, f)) @@ -79,28 +89,26 @@ def get_recursive_filelist(pathlist): return filelist -def listToString(l): +def list_to_string(lst): string = "" - if l is not None: - for item in l: + if lst is not None: + for item in lst: if len(string) > 0: string += ", " string += item return string -def addtopath(dirname): +def add_to_path(dirname): if dirname is not None and dirname != "": # verify that path doesn't already contain the given dirname tmpdirname = re.escape(dirname) - pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( - dir=tmpdirname, - sep=os.pathsep) + pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep) - match = re.search(pattern, os.environ['PATH']) + match = re.search(pattern, os.environ["PATH"]) if not match: - os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH'] + os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"] def which(program): @@ -109,7 +117,7 @@ def which(program): def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - fpath, fname = os.path.split(program) + fpath, _ = os.path.split(program) if fpath: if is_exe(program): return program @@ -122,496 +130,109 @@ def which(program): return None -def xlate(data, isInt=False): - class Default(dict): - def __missing__(self, key): - return None +def xlate(data, is_int=False): if data is None or data == "": return None - if isInt: - i = str(data).translate(Default(zip((ord(c) for c in "1234567890"),"1234567890"))) + if is_int: + i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890"))) if i == "0": return "0" - if i is "": + if i == "": return None return int(i) - else: - return str(data) + + return str(data) -def removearticles(text): +def remove_articles(text): text = text.lower() - articles = ['and', 'a', '&', 'issue', 'the'] - newText = '' - for word in text.split(' '): + articles = [ + "&", + "a", + "am", + "an", + "and", + "as", + "at", + "be", + "but", + "by", + "for", + "if", + "is", + "issue", + "it", + "it's", + "its", + "itself", + "of", + "or", + "so", + "the", + "the", + "with", + ] + new_text = "" + for word in text.split(" "): if word not in articles: - newText += word + ' ' + new_text += word + " " - newText = newText[:-1] + new_text = new_text[:-1] - return newText + return new_text def sanitize_title(text): # normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2 # this will probably cause issues with titles in other character sets e.g. chinese, japanese - text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') + text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii") # comicvine keeps apostrophes a part of the word text = text.replace("'", "") - text = text.replace("\"", "") + text = text.replace('"', "") # comicvine ignores punctuation and accents - text = re.sub(r'[^A-Za-z0-9]+',' ', text) + text = re.sub(r"[^A-Za-z0-9]+", " ", text) # remove extra space and articles and all lower case - text = removearticles(text).lower().strip() + text = remove_articles(text).lower().strip() return text def unique_file(file_name): counter = 1 - # returns ('/path/file', '.ext') file_name_parts = os.path.splitext(file_name) while True: if not os.path.lexists(file_name): return file_name - file_name = file_name_parts[ - 0] + ' (' + str(counter) + ')' + file_name_parts[1] + file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1] counter += 1 -# -o- coding: utf-8 -o- -# ISO639 python dict -# official list in http://www.loc.gov/standards/iso639-2/php/code_list.php +languages = defaultdict(lambda: None) -lang_dict = { - 'ab': 'Abkhaz', - 'aa': 'Afar', - 'af': 'Afrikaans', - 'ak': 'Akan', - 'sq': 'Albanian', - 'am': 'Amharic', - 'ar': 'Arabic', - 'an': 'Aragonese', - 'hy': 'Armenian', - 'as': 'Assamese', - 'av': 'Avaric', - 'ae': 'Avestan', - 'ay': 'Aymara', - 'az': 'Azerbaijani', - 'bm': 'Bambara', - 'ba': 'Bashkir', - 'eu': 'Basque', - 'be': 'Belarusian', - 'bn': 'Bengali', - 'bh': 'Bihari', - 'bi': 'Bislama', - 'bs': 'Bosnian', - 'br': 'Breton', - 'bg': 'Bulgarian', - 'my': 'Burmese', - 'ca': 'Catalan; Valencian', - 'ch': 'Chamorro', - 'ce': 'Chechen', - 'ny': 'Chichewa; Chewa; Nyanja', - 'zh': 'Chinese', - 'cv': 'Chuvash', - 'kw': 'Cornish', - 'co': 'Corsican', - 'cr': 'Cree', - 'hr': 'Croatian', - 'cs': 'Czech', - 'da': 'Danish', - 'dv': 'Divehi; Maldivian;', - 'nl': 'Dutch', - 'dz': 'Dzongkha', - 'en': 'English', - 'eo': 'Esperanto', - 'et': 'Estonian', - 'ee': 'Ewe', - 'fo': 'Faroese', - 'fj': 'Fijian', - 'fi': 'Finnish', - 'fr': 'French', - 'ff': 'Fula', - 'gl': 'Galician', - 'ka': 'Georgian', - 'de': 'German', - 'el': 'Greek, Modern', - 'gn': 'Guaraní', - 'gu': 'Gujarati', - 'ht': 'Haitian', - 'ha': 'Hausa', - 'he': 'Hebrew (modern)', - 'hz': 'Herero', - 'hi': 'Hindi', - 'ho': 'Hiri Motu', - 'hu': 'Hungarian', - 'ia': 'Interlingua', - 'id': 'Indonesian', - 'ie': 'Interlingue', - 'ga': 'Irish', - 'ig': 'Igbo', - 'ik': 'Inupiaq', - 'io': 'Ido', - 'is': 'Icelandic', - 'it': 'Italian', - 'iu': 'Inuktitut', - 'ja': 'Japanese', - 'jv': 'Javanese', - 'kl': 'Kalaallisut', - 'kn': 'Kannada', - 'kr': 'Kanuri', - 'ks': 'Kashmiri', - 'kk': 'Kazakh', - 'km': 'Khmer', - 'ki': 'Kikuyu, Gikuyu', - 'rw': 'Kinyarwanda', - 'ky': 'Kirghiz, Kyrgyz', - 'kv': 'Komi', - 'kg': 'Kongo', - 'ko': 'Korean', - 'ku': 'Kurdish', - 'kj': 'Kwanyama, Kuanyama', - 'la': 'Latin', - 'lb': 'Luxembourgish', - 'lg': 'Luganda', - 'li': 'Limburgish', - 'ln': 'Lingala', - 'lo': 'Lao', - 'lt': 'Lithuanian', - 'lu': 'Luba-Katanga', - 'lv': 'Latvian', - 'gv': 'Manx', - 'mk': 'Macedonian', - 'mg': 'Malagasy', - 'ms': 'Malay', - 'ml': 'Malayalam', - 'mt': 'Maltese', - 'mi': 'Māori', - 'mr': 'Marathi (Marāṭhī)', - 'mh': 'Marshallese', - 'mn': 'Mongolian', - 'na': 'Nauru', - 'nv': 'Navajo, Navaho', - 'nb': 'Norwegian Bokmål', - 'nd': 'North Ndebele', - 'ne': 'Nepali', - 'ng': 'Ndonga', - 'nn': 'Norwegian Nynorsk', - 'no': 'Norwegian', - 'ii': 'Nuosu', - 'nr': 'South Ndebele', - 'oc': 'Occitan', - 'oj': 'Ojibwe, Ojibwa', - 'cu': 'Old Church Slavonic', - 'om': 'Oromo', - 'or': 'Oriya', - 'os': 'Ossetian, Ossetic', - 'pa': 'Panjabi, Punjabi', - 'pi': 'Pāli', - 'fa': 'Persian', - 'pl': 'Polish', - 'ps': 'Pashto, Pushto', - 'pt': 'Portuguese', - 'qu': 'Quechua', - 'rm': 'Romansh', - 'rn': 'Kirundi', - 'ro': 'Romanian, Moldavan', - 'ru': 'Russian', - 'sa': 'Sanskrit (Saṁskṛta)', - 'sc': 'Sardinian', - 'sd': 'Sindhi', - 'se': 'Northern Sami', - 'sm': 'Samoan', - 'sg': 'Sango', - 'sr': 'Serbian', - 'gd': 'Scottish Gaelic', - 'sn': 'Shona', - 'si': 'Sinhala, Sinhalese', - 'sk': 'Slovak', - 'sl': 'Slovene', - 'so': 'Somali', - 'st': 'Southern Sotho', - 'es': 'Spanish; Castilian', - 'su': 'Sundanese', - 'sw': 'Swahili', - 'ss': 'Swati', - 'sv': 'Swedish', - 'ta': 'Tamil', - 'te': 'Telugu', - 'tg': 'Tajik', - 'th': 'Thai', - 'ti': 'Tigrinya', - 'bo': 'Tibetan', - 'tk': 'Turkmen', - 'tl': 'Tagalog', - 'tn': 'Tswana', - 'to': 'Tonga', - 'tr': 'Turkish', - 'ts': 'Tsonga', - 'tt': 'Tatar', - 'tw': 'Twi', - 'ty': 'Tahitian', - 'ug': 'Uighur, Uyghur', - 'uk': 'Ukrainian', - 'ur': 'Urdu', - 'uz': 'Uzbek', - 've': 'Venda', - 'vi': 'Vietnamese', - 'vo': 'Volapük', - 'wa': 'Walloon', - 'cy': 'Welsh', - 'wo': 'Wolof', - 'fy': 'Western Frisian', - 'xh': 'Xhosa', - 'yi': 'Yiddish', - 'yo': 'Yoruba', - 'za': 'Zhuang, Chuang', - 'zu': 'Zulu', -} +countries = defaultdict(lambda: None) + +for c in pycountry.countries: + if "alpha_2" in c._fields: + countries[c.alpha_2] = c.name + +for lng in pycountry.languages: + if "alpha_2" in lng._fields: + languages[lng.alpha_2] = lng.name -countries = [ - ('AF', 'Afghanistan'), - ('AL', 'Albania'), - ('DZ', 'Algeria'), - ('AS', 'American Samoa'), - ('AD', 'Andorra'), - ('AO', 'Angola'), - ('AI', 'Anguilla'), - ('AQ', 'Antarctica'), - ('AG', 'Antigua And Barbuda'), - ('AR', 'Argentina'), - ('AM', 'Armenia'), - ('AW', 'Aruba'), - ('AU', 'Australia'), - ('AT', 'Austria'), - ('AZ', 'Azerbaijan'), - ('BS', 'Bahamas'), - ('BH', 'Bahrain'), - ('BD', 'Bangladesh'), - ('BB', 'Barbados'), - ('BY', 'Belarus'), - ('BE', 'Belgium'), - ('BZ', 'Belize'), - ('BJ', 'Benin'), - ('BM', 'Bermuda'), - ('BT', 'Bhutan'), - ('BO', 'Bolivia'), - ('BA', 'Bosnia And Herzegowina'), - ('BW', 'Botswana'), - ('BV', 'Bouvet Island'), - ('BR', 'Brazil'), - ('BN', 'Brunei Darussalam'), - ('BG', 'Bulgaria'), - ('BF', 'Burkina Faso'), - ('BI', 'Burundi'), - ('KH', 'Cambodia'), - ('CM', 'Cameroon'), - ('CA', 'Canada'), - ('CV', 'Cape Verde'), - ('KY', 'Cayman Islands'), - ('CF', 'Central African Rep'), - ('TD', 'Chad'), - ('CL', 'Chile'), - ('CN', 'China'), - ('CX', 'Christmas Island'), - ('CC', 'Cocos Islands'), - ('CO', 'Colombia'), - ('KM', 'Comoros'), - ('CG', 'Congo'), - ('CK', 'Cook Islands'), - ('CR', 'Costa Rica'), - ('CI', 'Cote D`ivoire'), - ('HR', 'Croatia'), - ('CU', 'Cuba'), - ('CY', 'Cyprus'), - ('CZ', 'Czech Republic'), - ('DK', 'Denmark'), - ('DJ', 'Djibouti'), - ('DM', 'Dominica'), - ('DO', 'Dominican Republic'), - ('TP', 'East Timor'), - ('EC', 'Ecuador'), - ('EG', 'Egypt'), - ('SV', 'El Salvador'), - ('GQ', 'Equatorial Guinea'), - ('ER', 'Eritrea'), - ('EE', 'Estonia'), - ('ET', 'Ethiopia'), - ('FK', 'Falkland Islands (Malvinas)'), - ('FO', 'Faroe Islands'), - ('FJ', 'Fiji'), - ('FI', 'Finland'), - ('FR', 'France'), - ('GF', 'French Guiana'), - ('PF', 'French Polynesia'), - ('TF', 'French S. Territories'), - ('GA', 'Gabon'), - ('GM', 'Gambia'), - ('GE', 'Georgia'), - ('DE', 'Germany'), - ('GH', 'Ghana'), - ('GI', 'Gibraltar'), - ('GR', 'Greece'), - ('GL', 'Greenland'), - ('GD', 'Grenada'), - ('GP', 'Guadeloupe'), - ('GU', 'Guam'), - ('GT', 'Guatemala'), - ('GN', 'Guinea'), - ('GW', 'Guinea-bissau'), - ('GY', 'Guyana'), - ('HT', 'Haiti'), - ('HN', 'Honduras'), - ('HK', 'Hong Kong'), - ('HU', 'Hungary'), - ('IS', 'Iceland'), - ('IN', 'India'), - ('ID', 'Indonesia'), - ('IR', 'Iran'), - ('IQ', 'Iraq'), - ('IE', 'Ireland'), - ('IL', 'Israel'), - ('IT', 'Italy'), - ('JM', 'Jamaica'), - ('JP', 'Japan'), - ('JO', 'Jordan'), - ('KZ', 'Kazakhstan'), - ('KE', 'Kenya'), - ('KI', 'Kiribati'), - ('KP', 'Korea (North)'), - ('KR', 'Korea (South)'), - ('KW', 'Kuwait'), - ('KG', 'Kyrgyzstan'), - ('LA', 'Laos'), - ('LV', 'Latvia'), - ('LB', 'Lebanon'), - ('LS', 'Lesotho'), - ('LR', 'Liberia'), - ('LY', 'Libya'), - ('LI', 'Liechtenstein'), - ('LT', 'Lithuania'), - ('LU', 'Luxembourg'), - ('MO', 'Macau'), - ('MK', 'Macedonia'), - ('MG', 'Madagascar'), - ('MW', 'Malawi'), - ('MY', 'Malaysia'), - ('MV', 'Maldives'), - ('ML', 'Mali'), - ('MT', 'Malta'), - ('MH', 'Marshall Islands'), - ('MQ', 'Martinique'), - ('MR', 'Mauritania'), - ('MU', 'Mauritius'), - ('YT', 'Mayotte'), - ('MX', 'Mexico'), - ('FM', 'Micronesia'), - ('MD', 'Moldova'), - ('MC', 'Monaco'), - ('MN', 'Mongolia'), - ('MS', 'Montserrat'), - ('MA', 'Morocco'), - ('MZ', 'Mozambique'), - ('MM', 'Myanmar'), - ('NA', 'Namibia'), - ('NR', 'Nauru'), - ('NP', 'Nepal'), - ('NL', 'Netherlands'), - ('AN', 'Netherlands Antilles'), - ('NC', 'New Caledonia'), - ('NZ', 'New Zealand'), - ('NI', 'Nicaragua'), - ('NE', 'Niger'), - ('NG', 'Nigeria'), - ('NU', 'Niue'), - ('NF', 'Norfolk Island'), - ('MP', 'Northern Mariana Islands'), - ('NO', 'Norway'), - ('OM', 'Oman'), - ('PK', 'Pakistan'), - ('PW', 'Palau'), - ('PA', 'Panama'), - ('PG', 'Papua New Guinea'), - ('PY', 'Paraguay'), - ('PE', 'Peru'), - ('PH', 'Philippines'), - ('PN', 'Pitcairn'), - ('PL', 'Poland'), - ('PT', 'Portugal'), - ('PR', 'Puerto Rico'), - ('QA', 'Qatar'), - ('RE', 'Reunion'), - ('RO', 'Romania'), - ('RU', 'Russian Federation'), - ('RW', 'Rwanda'), - ('KN', 'Saint Kitts And Nevis'), - ('LC', 'Saint Lucia'), - ('VC', 'St Vincent/Grenadines'), - ('WS', 'Samoa'), - ('SM', 'San Marino'), - ('ST', 'Sao Tome'), - ('SA', 'Saudi Arabia'), - ('SN', 'Senegal'), - ('SC', 'Seychelles'), - ('SL', 'Sierra Leone'), - ('SG', 'Singapore'), - ('SK', 'Slovakia'), - ('SI', 'Slovenia'), - ('SB', 'Solomon Islands'), - ('SO', 'Somalia'), - ('ZA', 'South Africa'), - ('ES', 'Spain'), - ('LK', 'Sri Lanka'), - ('SH', 'St. Helena'), - ('PM', 'St.Pierre'), - ('SD', 'Sudan'), - ('SR', 'Suriname'), - ('SZ', 'Swaziland'), - ('SE', 'Sweden'), - ('CH', 'Switzerland'), - ('SY', 'Syrian Arab Republic'), - ('TW', 'Taiwan'), - ('TJ', 'Tajikistan'), - ('TZ', 'Tanzania'), - ('TH', 'Thailand'), - ('TG', 'Togo'), - ('TK', 'Tokelau'), - ('TO', 'Tonga'), - ('TT', 'Trinidad And Tobago'), - ('TN', 'Tunisia'), - ('TR', 'Turkey'), - ('TM', 'Turkmenistan'), - ('TV', 'Tuvalu'), - ('UG', 'Uganda'), - ('UA', 'Ukraine'), - ('AE', 'United Arab Emirates'), - ('UK', 'United Kingdom'), - ('US', 'United States'), - ('UY', 'Uruguay'), - ('UZ', 'Uzbekistan'), - ('VU', 'Vanuatu'), - ('VA', 'Vatican City State'), - ('VE', 'Venezuela'), - ('VN', 'Viet Nam'), - ('VG', 'Virgin Islands (British)'), - ('VI', 'Virgin Islands (U.S.)'), - ('EH', 'Western Sahara'), - ('YE', 'Yemen'), - ('YU', 'Yugoslavia'), - ('ZR', 'Zaire'), - ('ZM', 'Zambia'), - ('ZW', 'Zimbabwe') -] +def get_language_from_iso(iso: str): + return languages[iso] -def getLanguageDict(): - return lang_dict - - -def getLanguageFromISO(iso): - if iso is None: +def get_language(string): + if string is None: return None - else: - return lang_dict[iso] + + lang = get_language_from_iso(string) + + if lang is None: + try: + return pycountry.languages.lookup(string).name + except: + return None + return lang