diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b40db82 --- /dev/null +++ b/.gitignore @@ -0,0 +1,127 @@ +*.egg-info +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +#pycharm +/.idea diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..234843c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,50 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=.gitignore] + - id: end-of-file-fixer + - id: check-yaml + - id: debug-statements + - id: name-tests-test + - id: requirements-txt-fixer +- repo: https://github.com/tekwizely/pre-commit-golang + rev: v1.0.0-beta.5 + hooks: + - id: go-mod-tidy + - id: go-imports + args: [-w] +- repo: https://github.com/golangci/golangci-lint + rev: v1.46.2 + hooks: + - id: golangci-lint +- repo: https://github.com/asottile/setup-cfg-fmt + rev: v1.20.1 + hooks: + - id: setup-cfg-fmt +- repo: https://github.com/asottile/reorder_python_imports + rev: v3.1.0 + hooks: + - id: reorder-python-imports + exclude: ^(pre_commit/resources/|testing/resources/python3_hooks_repo/) + args: [--py37-plus, --add-import, 'from __future__ import annotations'] +- repo: https://github.com/asottile/pyupgrade + rev: v2.32.1 + hooks: + - id: pyupgrade + args: [--py39-plus] +- repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + args: [--line-length=120] +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 + hooks: + - id: flake8 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.950 + hooks: + - id: mypy + additional_dependencies: [types-Flask, types-requests] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c02309c --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# History +comicapi originates [here](https://github.com/davide-romanini/comicapi), was integrated into [ComicStreamer](https://github.com/davide-romanini/ComicStreamer), was modified in [this fork](https://github.com/kounch/ComicStreamer), and has now been extracted and packaged by yours truly (Iris W). + +# Installation +you can use pip to install this. cbr support is off by default—you'll need to do `pip install rarfile` as well as having "unrar" available. diff --git a/comicapi/__init__.py b/comicapi/__init__.py index 06d5141..1d18349 100644 --- a/comicapi/__init__.py +++ b/comicapi/__init__.py @@ -1 +1,5 @@ +from __future__ import annotations + __author__ = "dromanin" + +__version__ = "2.2.0" diff --git a/comicapi/comet.py b/comicapi/comet.py index 686cc2e..8962708 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -1,18 +1,19 @@ """A class to encapsulate CoMet data""" - +# # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import logging import xml.etree.ElementTree as ET @@ -35,7 +36,6 @@ class CoMet: editor_synonyms = ["editor"] def metadata_from_string(self, string: str) -> GenericMetadata: - tree = ET.ElementTree(ET.fromstring(string)) return self.convert_xml_to_metadata(tree) @@ -126,7 +126,6 @@ class CoMet: return tree def convert_xml_to_metadata(self, tree: ET.ElementTree) -> GenericMetadata: - root = tree.getroot() if root.tag != "comet": @@ -142,24 +141,24 @@ class CoMet: return node.text return None - md.series = get("series") - md.title = get("title") - md.issue = get("issue") - md.volume = get("volume") - md.comments = get("description") - md.publisher = get("publisher") - md.language = get("language") - md.format = get("format") - md.page_count = get("pages") - md.maturity_rating = get("rating") - md.price = get("price") - md.is_version_of = get("isVersionOf") - md.rights = get("rights") - md.identifier = get("identifier") - md.last_mark = get("lastMark") - md.genre = get("genre") # TODO - repeatable field + md.series = utils.xlate(get("series")) + md.title = utils.xlate(get("title")) + md.issue = utils.xlate(get("issue")) + md.volume = utils.xlate(get("volume")) + md.comments = utils.xlate(get("description")) + md.publisher = utils.xlate(get("publisher")) + md.language = utils.xlate(get("language")) + md.format = utils.xlate(get("format")) + md.page_count = utils.xlate(get("pages")) + md.maturity_rating = utils.xlate(get("rating")) + md.price = utils.xlate(get("price")) + md.is_version_of = utils.xlate(get("isVersionOf")) + md.rights = utils.xlate(get("rights")) + md.identifier = utils.xlate(get("identifier")) + md.last_mark = utils.xlate(get("lastMark")) + md.genre = utils.xlate(get("genre")) # TODO - repeatable field - date = get("date") + date = utils.xlate(get("date")) if date is not None: parts = date.split("-") if len(parts) > 0: @@ -167,9 +166,9 @@ class CoMet: if len(parts) > 1: md.month = parts[1] - md.cover_image = get("coverImage") + md.cover_image = utils.xlate(get("coverImage")) - reading_direction = get("readingDirection") + reading_direction = utils.xlate(get("readingDirection")) if reading_direction is not None and reading_direction == "rtl": md.manga = "YesAndRightToLeft" @@ -214,11 +213,9 @@ class CoMet: return True def write_to_external_file(self, filename: str, metadata: GenericMetadata) -> None: - tree = self.convert_metadata_to_xml(metadata) tree.write(filename, encoding="utf-8") def read_from_external_file(self, filename: str) -> GenericMetadata: - tree = ET.parse(filename) return self.convert_xml_to_metadata(tree) diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 59510c8..83b943a 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -1,18 +1,18 @@ """A class to represent a single comic, be it file or folder of images""" - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import io import logging @@ -22,14 +22,28 @@ import platform import struct import subprocess import sys +import tarfile import tempfile import time import zipfile +from typing import cast +from typing import List +from typing import Optional +from typing import Union import natsort import py7zr import wordninja +from comicapi import filenamelexer +from comicapi import filenameparser +from comicapi import utils +from comicapi.comet import CoMet +from comicapi.comicbookinfo import ComicBookInfo +from comicapi.comicinfoxml import ComicInfoXml +from comicapi.genericmetadata import GenericMetadata +from comicapi.genericmetadata import PageType + try: from unrar.cffi import rarfile @@ -44,13 +58,6 @@ try: except ImportError: pil_available = False -from typing import List, Optional, Union, cast - -from comicapi import filenamelexer, filenameparser, utils -from comicapi.comet import CoMet -from comicapi.comicbookinfo import ComicBookInfo -from comicapi.comicinfoxml import ComicInfoXml -from comicapi.genericmetadata import GenericMetadata, PageType logger = logging.getLogger(__name__) if not pil_available: @@ -70,7 +77,7 @@ class UnknownArchiver: """Unknown implementation""" - def __init__(self, path: Union[pathlib.Path, str]) -> None: + def __init__(self, path: pathlib.Path | str) -> None: self.path = path def get_comment(self) -> str: @@ -79,7 +86,7 @@ class UnknownArchiver: def set_comment(self, comment: str) -> bool: return False - def read_file(self, archive_file: str) -> Optional[bytes]: + def read_file(self, archive_file: str) -> bytes | None: return None def write_file(self, archive_file: str, data: bytes) -> bool: @@ -92,112 +99,11 @@ class UnknownArchiver: return [] -class SevenZipArchiver(UnknownArchiver): - - """7Z implementation""" - - def __init__(self, path: Union[pathlib.Path, str]) -> None: - self.path = pathlib.Path(path) - - # @todo: Implement Comment? - def get_comment(self) -> str: - return "" - - def set_comment(self, comment: str) -> bool: - return False - - def read_file(self, archive_file: str) -> bytes: - data = bytes() - try: - with py7zr.SevenZipFile(self.path, "r") as zf: - data = zf.read(archive_file)[archive_file].read() - except py7zr.Bad7zFile as e: - logger.error("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) - raise IOError from e - except Exception as e: - logger.error("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) - raise IOError from e - - return data - - def remove_file(self, archive_file: str) -> bool: - try: - self.rebuild_zip_file([archive_file]) - except: - logger.exception("Failed to remove %s from 7zip archive", archive_file) - return False - else: - return True - - def write_file(self, archive_file: str, data: bytes) -> bool: - # At the moment, no other option but to rebuild the whole - # zip archive w/o the indicated file. Very sucky, but maybe - # another solution can be found - try: - files = self.get_filename_list() - if archive_file in files: - self.rebuild_zip_file([archive_file]) - - # now just add the archive file as a new one - with py7zr.SevenZipFile(self.path, "a") as zf: - zf.writestr(data, archive_file) - return True - except: - logger.exception("Writing zip file failed") - return False - - def get_filename_list(self) -> list[str]: - try: - with py7zr.SevenZipFile(self.path, "r") as zf: - namelist: list[str] = zf.getnames() - - return namelist - except Exception as e: - logger.error("Unable to get 7zip file list [%s]: %s", e, self.path) - return [] - - def rebuild_zip_file(self, exclude_list: list[str]) -> None: - """Zip helper func - - This recompresses the zip archive, without the files in the exclude_list - """ - tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path)) - os.close(tmp_fd) - - try: - with py7zr.SevenZipFile(self.path, "r") as zin: - targets = [f for f in zin.getnames() if f not in exclude_list] - with py7zr.SevenZipFile(self.path, "r") as zin: - with py7zr.SevenZipFile(tmp_name, "w") as zout: - for fname, bio in zin.read(targets).items(): - zout.writef(bio, fname) - except Exception: - logger.exception("Error rebuilding 7zip file: %s", self.path) - - # replace with the new file - os.remove(self.path) - os.rename(tmp_name, self.path) - - def copy_from_archive(self, otherArchive: UnknownArchiver) -> bool: - """Replace the current zip with one copied from another archive""" - try: - with py7zr.SevenZipFile(self.path, "w") as zout: - for fname in otherArchive.get_filename_list(): - data = otherArchive.read_file(fname) - if data is not None: - zout.writestr(data, fname) - except Exception as e: - logger.exception("Error while copying to %s: %s", self.path, e) - return False - else: - return True - - class ZipArchiver(UnknownArchiver): """ZIP implementation""" - def __init__(self, path: Union[pathlib.Path, str]) -> None: + def __init__(self, path: pathlib.Path | str) -> None: self.path = pathlib.Path(path) def get_comment(self) -> str: @@ -216,10 +122,10 @@ class ZipArchiver(UnknownArchiver): data = zf.read(archive_file) except zipfile.BadZipfile as e: logger.error("bad zipfile [%s]: %s :: %s", e, self.path, archive_file) - raise IOError from e + raise OSError from e except Exception as e: logger.error("bad zipfile [%s]: %s :: %s", e, self.path, archive_file) - raise IOError from e + raise OSError from e return data def remove_file(self, archive_file: str) -> bool: @@ -248,7 +154,7 @@ class ZipArchiver(UnknownArchiver): logger.error("writing zip file failed [%s]: %s", e, self.path) return False - def get_filename_list(self) -> List[str]: + def get_filename_list(self) -> list[str]: try: with zipfile.ZipFile(self.path, "r") as zf: namelist = zf.namelist() @@ -257,7 +163,7 @@ class ZipArchiver(UnknownArchiver): logger.error("Unable to get zipfile list [%s]: %s", e, self.path) return [] - def rebuild_zip_file(self, exclude_list: List[str]) -> None: + def rebuild_zip_file(self, exclude_list: list[str]) -> None: """Zip helper func This recompresses the zip archive, without the files in the exclude_list @@ -282,7 +188,7 @@ class ZipArchiver(UnknownArchiver): os.remove(self.path) os.rename(tmp_name, self.path) - def write_zip_comment(self, filename: Union[pathlib.Path, str], comment: str) -> bool: + def write_zip_comment(self, filename: pathlib.Path | str, comment: str) -> bool: """ This is a custom function for writing a comment to a zip file, since the built-in one doesn't seem to work on Windows and Mac OS/X @@ -364,12 +270,217 @@ class ZipArchiver(UnknownArchiver): return True +class TarArchiver(UnknownArchiver): + def __init__(self, path: pathlib.Path | str) -> None: + self.path = path + + def get_comment(self) -> str: + return comment + + def set_comment(self, comment: str) -> bool: + return self.writeTarComment(self.path, comment) + + def read_file(self, archive_file: str) -> bytes | None: + tf = tarfile.TarFile(self.path, "r") + + try: + data = tf.extractfile(archive_file).read() + except tarfile.TarError as e: + errMsg = f"bad tarfile [{e}]: {self.path} :: {archive_file}" + logger.info(errMsg) + tf.close() + raise OSError + except Exception as e: + tf.close() + errMsg = f"bad tarfile [{e}]: {self.path} :: {archive_file}" + logger.info(errMsg) + raise OSError + finally: + tf.close() + return data + + def remove_file(self, archive_file: str) -> bool: + try: + self.rebuild_tar_file([archive_file]) + except: + return False + else: + return True + + def write_file(self, archive_file: str, data: bytes) -> bool: + # At the moment, no other option but to rebuild the whole + # zip archive w/o the indicated file. Very sucky, but maybe + # another solution can be found + try: + self.rebuild_tar_file([archive_file]) + + # now just add the archive file as a new one + tf = tarfile.Tarfile(self.path, mode="a") + tf.writestr(archive_file, data) + tf.close() + return True + except: + return False + + def get_filename_list(self) -> list[str]: + try: + tf = tarfile.TarFile(self.path, "r") + namelist = tf.getnames() + tf.close() + return namelist + except Exception as e: + errMsg = f"Unable to get tarfile list [{e}]: {self.path}" + logger.info(errMsg) + return [] + + # zip helper func + def rebuild_tar_file(self, exclude_list: list[str]) -> None: + """Tar helper func + + This re-creates the tar archive without the files in the exclude list + """ + + # generate temp file + tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path)) + os.close(tmp_fd) + + try: + with tarfile.TarFile(self.path, "r") as tin: + with tarfile.TarFile(tmp_name, "w") as tout: + for item in tin.getmembers(): + buffer = tin.extractfile(item) + if item.name not in exclude_list: + tout.addfile(item, buffer) + except Exception: + logger.exception("Error rebuilding tar file: %s", self.path) + + # replace with the new file + os.remove(self.path) + os.rename(tmp_name, self.path) + + def copy_from_archive(self, other_archive: UnknownArchiver) -> bool: + # Replace the current tar with one copied from another archive + try: + with zipfile.ZipFile(self.path, "w", allowZip64=True) as zout: + for fname in other_archive.get_filename_list(): + data = other_archive.read_file(fname) + if data is not None: + tout.addfile(fname, data) + + except Exception as e: + logger.exception("Error while copying to %s", self.path) + return False + else: + return True + + +class SevenZipArchiver(UnknownArchiver): + + """7Z implementation""" + + def __init__(self, path: pathlib.Path | str) -> None: + self.path = pathlib.Path(path) + + # @todo: Implement Comment? + def get_comment(self) -> str: + return "" + + def set_comment(self, comment: str) -> bool: + return False + + def read_file(self, archive_file: str) -> bytes: + data = bytes() + try: + with py7zr.SevenZipFile(self.path, "r") as zf: + data = zf.read(archive_file)[archive_file].read() + except py7zr.Bad7zFile as e: + logger.error("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) + raise OSError from e + except Exception as e: + logger.error("bad 7zip file [%s]: %s :: %s", e, self.path, archive_file) + raise OSError from e + + return data + + def remove_file(self, archive_file: str) -> bool: + try: + self.rebuild_zip_file([archive_file]) + except: + logger.exception("Failed to remove %s from 7zip archive", archive_file) + return False + else: + return True + + def write_file(self, archive_file: str, data: bytes) -> bool: + # At the moment, no other option but to rebuild the whole + # zip archive w/o the indicated file. Very sucky, but maybe + # another solution can be found + try: + files = self.get_filename_list() + if archive_file in files: + self.rebuild_zip_file([archive_file]) + + # now just add the archive file as a new one + with py7zr.SevenZipFile(self.path, "a") as zf: + zf.writestr(data, archive_file) + return True + except: + logger.exception("Writing zip file failed") + return False + + def get_filename_list(self) -> list[str]: + try: + with py7zr.SevenZipFile(self.path, "r") as zf: + namelist: list[str] = zf.getnames() + + return namelist + except Exception as e: + logger.error("Unable to get 7zip file list [%s]: %s", e, self.path) + return [] + + def rebuild_zip_file(self, exclude_list: list[str]) -> None: + """Zip helper func + + This recompresses the zip archive, without the files in the exclude_list + """ + tmp_fd, tmp_name = tempfile.mkstemp(dir=os.path.dirname(self.path)) + os.close(tmp_fd) + + try: + with py7zr.SevenZipFile(self.path, "r") as zin: + targets = [f for f in zin.getnames() if f not in exclude_list] + with py7zr.SevenZipFile(self.path, "r") as zin: + with py7zr.SevenZipFile(tmp_name, "w") as zout: + for fname, bio in zin.read(targets).items(): + zout.writef(bio, fname) + except Exception: + logger.exception("Error rebuilding 7zip file: %s", self.path) + + # replace with the new file + os.remove(self.path) + os.rename(tmp_name, self.path) + + def copy_from_archive(self, otherArchive: UnknownArchiver) -> bool: + """Replace the current zip with one copied from another archive""" + try: + with py7zr.SevenZipFile(self.path, "w") as zout: + for fname in otherArchive.get_filename_list(): + data = otherArchive.read_file(fname) + if data is not None: + zout.writestr(data, fname) + except Exception as e: + logger.exception("Error while copying to %s: %s", self.path, e) + return False + else: + return True + + class RarArchiver(UnknownArchiver): """RAR implementation""" devnull = None - def __init__(self, path: Union[pathlib.Path, str], rar_exe_path: str) -> None: + def __init__(self, path: pathlib.Path | str, rar_exe_path: str) -> None: self.path = pathlib.Path(path) self.rar_exe_path = rar_exe_path @@ -398,7 +509,7 @@ class RarArchiver(UnknownArchiver): working_dir = os.path.dirname(os.path.abspath(self.path)) # use external program to write comment to Rar archive - proc_args = [self.rar_exe_path, "c", "-w" + working_dir, "-c-", "-z" + tmp_name, str(self.path)] + proc_args = [self.rar_exe_path, "c", "-w" + working_dir, "-c-", "-z" + tmp_name, self.path] subprocess.call( proc_args, startupinfo=self.startupinfo, @@ -441,7 +552,8 @@ class RarArchiver(UnknownArchiver): tries, ) continue - except (OSError, IOError) as e: + + except OSError as e: logger.error("read_file(): [%s] %s:%s attempt #%d", e, self.path, archive_file, tries) time.sleep(1) except Exception as e: @@ -457,9 +569,9 @@ class RarArchiver(UnknownArchiver): if len(entries) == 1: return entries[0][1] - raise IOError + raise OSError - raise IOError + raise OSError def write_file(self, archive_file: str, data: bytes) -> bool: @@ -531,16 +643,16 @@ class RarArchiver(UnknownArchiver): if item.file_size != 0: namelist.append(item.filename) - except (OSError, IOError) as e: + except OSError as e: logger.error(f"get_filename_list(): [{e}] {self.path} attempt #{tries}".format(str(e), self.path, tries)) time.sleep(1) return namelist - def get_rar_obj(self) -> "Optional[rarfile.RarFile]": + def get_rar_obj(self) -> rarfile.RarFile | None: try: rarc = rarfile.RarFile(str(self.path)) - except (OSError, IOError) as e: + except OSError as e: logger.error("getRARObj(): [%s] %s", e, self.path) else: return rarc @@ -552,7 +664,7 @@ class FolderArchiver(UnknownArchiver): """Folder implementation""" - def __init__(self, path: Union[pathlib.Path, str]) -> None: + def __init__(self, path: pathlib.Path | str) -> None: self.path = pathlib.Path(path) self.comment_file_name = "ComicTaggerFolderComment.txt" @@ -569,7 +681,7 @@ class FolderArchiver(UnknownArchiver): try: with open(fname, "rb") as f: data = f.read() - except IOError: + except OSError: logger.exception("Failed to read: %s", fname) return data @@ -600,7 +712,7 @@ class FolderArchiver(UnknownArchiver): def get_filename_list(self) -> list[str]: return self.list_files(self.path) - def list_files(self, folder: Union[pathlib.Path, str]) -> list[str]: + def list_files(self, folder: pathlib.Path | str) -> list[str]: itemlist = [] @@ -616,23 +728,23 @@ class ComicArchive: logo_data = bytes() class ArchiveType: - SevenZip, Zip, Rar, Folder, Pdf, Unknown = list(range(6)) + Zip, Rar, SevenZip, Tar, Folder, Pdf, Unknown = list(range(6)) def __init__( self, - path: Union[pathlib.Path, str], + path: pathlib.Path | str, rar_exe_path: str = "", - default_image_path: Union[pathlib.Path, str, None] = None, + default_image_path: pathlib.Path | str | None = None, ) -> None: - self.cbi_md: Optional[GenericMetadata] = None - self.cix_md: Optional[GenericMetadata] = None - self.comet_filename: Optional[str] = None - self.comet_md: Optional[GenericMetadata] = None - self.has__cbi: Optional[bool] = None - self.has__cix: Optional[bool] = None - self.has__comet: Optional[bool] = None + self.cbi_md: GenericMetadata | None = None + self.cix_md: GenericMetadata | None = None + self.comet_filename: str | None = None + self.comet_md: GenericMetadata | None = None + self._has_cbi: bool | None = None + self._has_cix: bool | None = None + self._has_comet: bool | None = None self.path = pathlib.Path(path) - self.page_count: Optional[int] = None + self.page_count: int | None = None self.page_list: list[str] = [] self.rar_exe_path = rar_exe_path @@ -664,22 +776,24 @@ class ComicArchive: self.archive_type = self.ArchiveType.Zip self.archiver = ZipArchiver(self.path) + elif self.tar_test(): + self.archive_type = self.ArchiveType.Tar + self.archiver = TarArchiver(self.path) + elif self.rar_test(): self.archive_type = self.ArchiveType.Rar self.archiver = RarArchiver(self.path, rar_exe_path=self.rar_exe_path) - if not ComicArchive.logo_data: - fname = self.default_image_path - if fname: - with open(fname, "rb") as fd: - ComicArchive.logo_data = fd.read() + if not ComicArchive.logo_data and self.default_image_path: + with open(self.default_image_path, "rb") as fd: + ComicArchive.logo_data = fd.read() def reset_cache(self) -> None: """Clears the cached data""" - self.has__cix = None - self.has__cbi = None - self.has__comet = None + self._has_cix = None + self._has_cbi = None + self._has_comet = None self.comet_filename = None self.page_count = None self.page_list = [] @@ -687,11 +801,11 @@ class ComicArchive: self.cbi_md = None self.comet_md = None - def load_cache(self, style_list: List[int]) -> None: + def load_cache(self, style_list: list[int]) -> None: for style in style_list: self.read_metadata(style) - def rename(self, path: Union[pathlib.Path, str]) -> None: + def rename(self, path: pathlib.Path | str) -> None: self.path = pathlib.Path(path) self.archiver.path = pathlib.Path(path) @@ -701,6 +815,9 @@ class ComicArchive: def zip_test(self) -> bool: return zipfile.is_zipfile(self.path) + def tar_test(self): + return tarfile.is_tarfile(self.path) + def rar_test(self) -> bool: try: return bool(rarfile.is_rarfile(str(self.path))) @@ -713,6 +830,9 @@ class ComicArchive: def is_zip(self) -> bool: return self.archive_type == self.ArchiveType.Zip + def is_tar(self): + return self.archive_type == self.ArchiveType.Tar + def is_rar(self) -> bool: return self.archive_type == self.ArchiveType.Rar @@ -739,13 +859,13 @@ class ComicArchive: def is_writable_for_style(self, data_style: int) -> bool: - if (self.is_rar() or self.is_sevenzip()) and data_style == MetaDataStyle.CBI: + if (self.is_rar() or self.is_sevenzip() or self.is_tar()) and data_style == MetaDataStyle.CBI: return False return self.is_writable() def seems_to_be_a_comic_archive(self) -> bool: - if (self.is_zip() or self.is_rar() or self.is_sevenzip()) and (self.get_number_of_pages() > 0): + if (self.is_zip() or self.is_rar() or self.is_sevenzip() or self.is_tar()) and (self.get_number_of_pages() > 0): return True return False @@ -797,7 +917,7 @@ class ComicArchive: if filename: try: image_data = self.archiver.read_file(filename) or bytes() - except IOError: + except OSError: logger.exception("Error reading in page. Substituting logo page.") image_data = ComicArchive.logo_data @@ -815,7 +935,7 @@ class ComicArchive: return page_list[index] - def get_scanner_page_index(self) -> Optional[int]: + def get_scanner_page_index(self) -> int | None: scanner_page_index = None # make a guess at the scanner page @@ -837,7 +957,7 @@ class ComicArchive: length_buckets[length] = 1 # sort by most common - sorted_buckets = sorted(iter(length_buckets.items()), key=lambda k_v: (k_v[1], k_v[0]), reverse=True) + sorted_buckets = sorted(length_buckets.items(), key=lambda k, v: (v, k), reverse=True) # statistical mode occurrence is first mode_length = sorted_buckets[0][0] @@ -863,7 +983,7 @@ class ComicArchive: return scanner_page_index - def get_page_name_list(self, sort_list: bool = True) -> List[str]: + def get_page_name_list(self, sort_list: bool = True) -> list[str]: if not self.page_list: # get the list file names in the archive, and sort files: list[str] = self.archiver.get_filename_list() @@ -877,7 +997,7 @@ class ComicArchive: self.page_list = [] for name in files: if ( - os.path.splitext(name)[1].lower() in [".jpg", ".jpeg", ".png", ".gif", ".webp"] + os.path.splitext(name)[1].lower() in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"] and os.path.basename(name)[0] != "." ): self.page_list.append(name) @@ -908,14 +1028,14 @@ class ComicArchive: return self.archiver.get_comment() def has_cbi(self) -> bool: - if self.has__cbi is None: + if self._has_cbi is None: if not self.seems_to_be_a_comic_archive(): - self.has__cbi = False + self._has_cbi = False else: comment = self.archiver.get_comment() - self.has__cbi = ComicBookInfo().validate_string(comment) + self._has_cbi = ComicBookInfo().validate_string(comment) - return self.has__cbi + return self._has_cbi def write_cbi(self, metadata: GenericMetadata) -> bool: if metadata is not None: @@ -923,7 +1043,7 @@ class ComicArchive: cbi_string = ComicBookInfo().string_from_metadata(metadata) write_success = self.archiver.set_comment(cbi_string) if write_success: - self.has__cbi = True + self._has_cbi = True self.cbi_md = metadata self.reset_cache() return write_success @@ -934,7 +1054,7 @@ class ComicArchive: if self.has_cbi(): write_success = self.archiver.set_comment("") if write_success: - self.has__cbi = False + self._has_cbi = False self.cbi_md = None self.reset_cache() return write_success @@ -965,7 +1085,7 @@ class ComicArchive: return b"" try: raw_cix = self.archiver.read_file(self.ci_xml_filename) or b"" - except IOError as e: + except OSError as e: logger.error("Error reading in raw CIX!: %s", e) raw_cix = bytes() return raw_cix @@ -977,7 +1097,7 @@ class ComicArchive: cix_string = ComicInfoXml().string_from_metadata(metadata, xml=raw_cix) write_success = self.archiver.write_file(self.ci_xml_filename, cix_string.encode("utf-8")) if write_success: - self.has__cix = True + self._has_cix = True self.cix_md = metadata self.reset_cache() return write_success @@ -988,22 +1108,22 @@ class ComicArchive: if self.has_cix(): write_success = self.archiver.remove_file(self.ci_xml_filename) if write_success: - self.has__cix = False + self._has_cix = False self.cix_md = None self.reset_cache() return write_success return True def has_cix(self) -> bool: - if self.has__cix is None: + if self._has_cix is None: if not self.seems_to_be_a_comic_archive(): - self.has__cix = False + self._has_cix = False elif self.ci_xml_filename in self.archiver.get_filename_list(): - self.has__cix = True + self._has_cix = True else: - self.has__cix = False - return self.has__cix + self._has_cix = False + return self._has_cix def read_comet(self) -> GenericMetadata: if self.comet_md is None: @@ -1058,7 +1178,7 @@ class ComicArchive: comet_string = CoMet().string_from_metadata(metadata) write_success = self.archiver.write_file(cast(str, self.comet_filename), comet_string.encode("utf-8")) if write_success: - self.has__comet = True + self._has_comet = True self.comet_md = metadata self.reset_cache() return write_success @@ -1069,17 +1189,17 @@ class ComicArchive: if self.has_comet(): write_success = self.archiver.remove_file(cast(str, self.comet_filename)) if write_success: - self.has__comet = False + self._has_comet = False self.comet_md = None self.reset_cache() return write_success return True def has_comet(self) -> bool: - if self.has__comet is None: - self.has__comet = False + if self._has_comet is None: + self._has_comet = False if not self.seems_to_be_a_comic_archive(): - return self.has__comet + return self._has_comet # look at all xml files in root, and search for CoMet data, get first for n in self.archiver.get_filename_list(): @@ -1095,10 +1215,10 @@ class ComicArchive: if CoMet().validate_string(data): # since we found it, save it! self.comet_filename = n - self.has__comet = True + self._has_comet = True break - return self.has__comet + return self._has_comet def apply_archive_info_to_metadata(self, md: GenericMetadata, calc_page_sizes: bool = False) -> None: md.page_count = self.get_number_of_pages() @@ -1147,7 +1267,10 @@ class ComicArchive: if complicated_parser: lex = filenamelexer.Lex(filename) p = filenameparser.Parse( - lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher + lex.items, + remove_c2c=remove_c2c, + remove_fcbd=remove_fcbd, + remove_publisher=remove_publisher, ) metadata.alternate_number = utils.xlate(p.filename_info["alternate"]) metadata.issue = utils.xlate(p.filename_info["issue"]) diff --git a/comicapi/comicbookinfo.py b/comicapi/comicbookinfo.py index 6bdcebb..47b3c45 100644 --- a/comicapi/comicbookinfo.py +++ b/comicapi/comicbookinfo.py @@ -1,24 +1,27 @@ """A class to encapsulate the ComicBookInfo data""" - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import json import logging from collections import defaultdict from datetime import datetime -from typing import Any, Literal, TypedDict, Union +from typing import Any +from typing import Literal +from typing import TypedDict +from typing import Union from comicapi import utils from comicapi.genericmetadata import GenericMetadata @@ -115,11 +118,10 @@ class ComicBookInfo: return metadata def string_from_metadata(self, metadata: GenericMetadata) -> str: - cbi_container = self.create_json_dictionary(metadata) return json.dumps(cbi_container) - def validate_string(self, string: Union[bytes, str]) -> bool: + def validate_string(self, string: bytes | str) -> bool: """Verify that the string actually contains CBI data in JSON format""" try: @@ -165,7 +167,6 @@ class ComicBookInfo: return cbi_container def write_to_external_file(self, filename: str, metadata: GenericMetadata) -> None: - cbi_container = self.create_json_dictionary(metadata) with open(filename, "w", encoding="utf-8") as f: diff --git a/comicapi/comicinfoxml.py b/comicapi/comicinfoxml.py index fd619c2..1edeb6f 100644 --- a/comicapi/comicinfoxml.py +++ b/comicapi/comicinfoxml.py @@ -1,27 +1,31 @@ """A class to encapsulate ComicRack's ComicInfo.xml data""" - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import logging import xml.etree.ElementTree as ET from collections import OrderedDict -from typing import Any, List, Optional, cast +from typing import Any +from typing import cast +from typing import List +from typing import Optional from xml.etree.ElementTree import ElementTree from comicapi import utils -from comicapi.genericmetadata import GenericMetadata, ImageMetadata +from comicapi.genericmetadata import GenericMetadata +from comicapi.genericmetadata import ImageMetadata from comicapi.issuestring import IssueString logger = logging.getLogger(__name__) @@ -37,7 +41,7 @@ class ComicInfoXml: cover_synonyms = ["cover", "covers", "coverartist", "cover artist"] editor_synonyms = ["editor"] - def get_parseable_credits(self) -> List[str]: + def get_parseable_credits(self) -> list[str]: parsable_credits = [] parsable_credits.extend(self.writer_synonyms) parsable_credits.extend(self.penciller_synonyms) @@ -59,7 +63,7 @@ class ComicInfoXml: return str(tree_str) def convert_metadata_to_xml( - self, filename: "ComicInfoXml", metadata: GenericMetadata, xml: bytes = b"" + self, filename: ComicInfoXml, metadata: GenericMetadata, xml: bytes = b"" ) -> ElementTree: # shorthand for the metadata @@ -192,7 +196,7 @@ class ComicInfoXml: if root.tag != "ComicInfo": raise Exception("Not a ComicInfo file") - def get(name: str) -> Optional[str]: + def get(name: str) -> str | None: tag = root.find(name) if tag is None: return None @@ -268,11 +272,9 @@ class ComicInfoXml: return md def write_to_external_file(self, filename: str, metadata: GenericMetadata, xml: bytes = b"") -> None: - tree = self.convert_metadata_to_xml(self, metadata, xml) tree.write(filename, encoding="utf-8", xml_declaration=True) def read_from_external_file(self, filename: str) -> GenericMetadata: - tree = ET.parse(filename) return self.convert_xml_to_metadata(tree) diff --git a/comicapi/data/publishers.json b/comicapi/data/publishers.json index e80915b..9e0a1a1 100644 --- a/comicapi/data/publishers.json +++ b/comicapi/data/publishers.json @@ -127,4 +127,4 @@ "red circle Comics": "Dark Circle Comics", "red circle": "Dark Circle Comics" } -} \ No newline at end of file +} diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py index 7fce422..ac5a4b6 100644 --- a/comicapi/filenamelexer.py +++ b/comicapi/filenamelexer.py @@ -1,8 +1,14 @@ +from __future__ import annotations + import calendar import os import unicodedata -from enum import Enum, auto -from typing import Any, Callable, Optional, Set +from enum import auto +from enum import Enum +from typing import Any +from typing import Callable +from typing import Optional +from typing import Set class ItemType(Enum): @@ -86,7 +92,7 @@ class Item: class Lexer: def __init__(self, string: str) -> None: self.input: str = string # The string being scanned - self.state: Optional[Callable[[Lexer], Optional[Callable]]] = None # The next lexing function to enter + self.state: Callable[[Lexer], Callable | None] | None = None # The next lexing function to enter self.pos: int = -1 # Current position in the input self.start: int = 0 # Start position of this item self.lastPos: int = 0 # Position of most recent item returned by nextItem @@ -168,13 +174,13 @@ class Lexer: # Errorf returns an error token and terminates the scan by passing # Back a nil pointer that will be the next state, terminating self.nextItem. -def errorf(lex: Lexer, message: str) -> Optional[Callable[[Lexer], Optional[Callable]]]: +def errorf(lex: Lexer, message: str) -> Callable[[Lexer], Callable | None] | None: lex.items.append(Item(ItemType.Error, lex.start, message)) return None # Scans the elements inside action delimiters. -def lex_filename(lex: Lexer) -> Optional[Callable[[Lexer], Optional[Callable]]]: +def lex_filename(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: r = lex.get() if r == eof: if lex.paren_depth != 0: @@ -301,7 +307,7 @@ def lex_text(lex: Lexer) -> Callable: return lex_filename -def cal(value: str) -> Set[Any]: +def cal(value: str) -> set[Any]: month_abbr = [i for i, x in enumerate(calendar.month_abbr) if x == value.title()] month_name = [i for i, x in enumerate(calendar.month_name) if x == value.title()] day_abbr = [i for i, x in enumerate(calendar.day_abbr) if x == value.title()] @@ -309,7 +315,7 @@ def cal(value: str) -> Set[Any]: return set(month_abbr + month_name + day_abbr + day_name) -def lex_number(lex: Lexer) -> Optional[Callable[[Lexer], Optional[Callable]]]: +def lex_number(lex: Lexer) -> Callable[[Lexer], Callable | None] | None: if not lex.scan_number(): return errorf(lex, "bad number syntax: " + lex.input[lex.start : lex.pos]) # Complex number logic removed. Messes with math operations without space diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 54c3e9b..7268edf 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -2,34 +2,38 @@ This should probably be re-written, but, well, it mostly works! """ - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +# # Some portions of this code were modified from pyComicMetaThis project # http://code.google.com/p/pycomicmetathis/ +from __future__ import annotations import logging import os import re from operator import itemgetter -from typing import Callable, Match, Optional, TypedDict +from typing import Callable +from typing import Match +from typing import Optional +from typing import TypedDict from urllib.parse import unquote from text2digits import text2digits -from comicapi import filenamelexer, issuestring +from comicapi import filenamelexer +from comicapi import issuestring t2d = text2digits.Text2Digits(add_ordinal_ending=False) t2do = text2digits.Text2Digits(add_ordinal_ending=True) @@ -58,7 +62,7 @@ class FileNameParser: placeholders = [r"[_]", r" +"] for ph in placeholders: string = re.sub(ph, self.repl, string) - return string # .strip() + return string def get_issue_count(self, filename: str, issue_end: int) -> str: @@ -176,13 +180,11 @@ class FileNameParser: # in case there is no issue number, remove some obvious stuff if "--" in filename: - # the pattern seems to be that anything to left of the first "--" - # is the series name followed by issue + # the pattern seems to be that anything to left of the first "--" is the series name followed by issue filename = re.sub(r"--.*", self.repl, filename) elif "__" in filename: - # the pattern seems to be that anything to left of the first "__" - # is the series name followed by issue + # the pattern seems to be that anything to left of the first "__" is the series name followed by issue filename = re.sub(r"__.*", self.repl, filename) filename = filename.replace("+", " ") @@ -343,7 +345,7 @@ class Parser: remove_fcbd: bool = False, remove_publisher: bool = False, ) -> None: - self.state: Optional[Callable[[Parser], Optional[Callable]]] = None + self.state: Callable[[Parser], Callable | None] | None = None self.pos = -1 self.firstItem = True @@ -406,7 +408,7 @@ class Parser: self.state = self.state(self) -def parse(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: +def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: item: filenamelexer.Item = p.get() # We're done, time to do final processing @@ -656,7 +658,7 @@ def parse(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: # TODO: What about more esoteric numbers??? -def parse_issue_number(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: +def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None: item = p.input[p.pos] if "issue" in p.filename_info: @@ -689,7 +691,7 @@ def parse_issue_number(p: Parser) -> Optional[Callable[[Parser], Optional[Callab return parse -def parse_series(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: +def parse_series(p: Parser) -> Callable[[Parser], Callable | None] | None: item = p.input[p.pos] series: list[list[filenamelexer.Item]] = [[]] @@ -854,7 +856,7 @@ def resolve_year(p: Parser) -> None: p.title_parts.remove(selected_year) -def parse_finish(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: +def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: resolve_year(p) # If we don't have an issue try to find it in the series @@ -998,7 +1000,7 @@ def get_remainder(p: Parser) -> str: return remainder.strip() -def parse_info_specifier(p: Parser) -> Optional[Callable[[Parser], Optional[Callable]]]: +def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | None: item = p.input[p.pos] index = p.pos @@ -1053,7 +1055,7 @@ def parse_info_specifier(p: Parser) -> Optional[Callable[[Parser], Optional[Call # Gets 03 in '03 of 6' -def get_number(p: Parser, index: int) -> Optional[filenamelexer.Item]: +def get_number(p: Parser, index: int) -> filenamelexer.Item | None: # Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)' rev = p.input[:index] rev.reverse() diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 9dba196..884aa94 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -5,23 +5,26 @@ tagging schemes and databases, such as ComicVine or GCD. This makes conversion possible, however lossy it might be """ - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import logging -from typing import Any, List, Optional, TypedDict +from typing import Any +from typing import List +from typing import Optional +from typing import TypedDict from comicapi import utils @@ -76,59 +79,59 @@ class GenericMetadata: def __init__(self) -> None: self.is_empty: bool = True - self.tag_origin: Optional[str] = None + self.tag_origin: str | None = None - self.series: Optional[str] = None - self.issue: Optional[str] = None - self.title: Optional[str] = None - self.publisher: Optional[str] = None - self.month: Optional[int] = None - self.year: Optional[int] = None - self.day: Optional[int] = None - self.issue_count: Optional[int] = None - self.volume: Optional[int] = None - self.genre: Optional[str] = None - self.language: Optional[str] = None # 2 letter iso code - self.comments: Optional[str] = None # use same way as Summary in CIX + self.series: str | None = None + self.issue: str | None = None + self.title: str | None = None + self.publisher: str | None = None + self.month: int | None = None + self.year: int | None = None + self.day: int | None = None + self.issue_count: int | None = None + self.volume: int | None = None + self.genre: str | None = None + self.language: str | None = None # 2 letter iso code + self.comments: str | None = None # use same way as Summary in CIX - self.volume_count: Optional[int] = None - self.critical_rating: Optional[str] = None - self.country: Optional[str] = None + self.volume_count: int | None = None + self.critical_rating: str | None = None + self.country: str | None = None - self.alternate_series: Optional[str] = None - self.alternate_number: Optional[str] = None - self.alternate_count: Optional[int] = None - self.imprint: Optional[str] = None - self.notes: Optional[str] = None - self.web_link: Optional[str] = None - self.format: Optional[str] = None - self.manga: Optional[str] = None - self.black_and_white: Optional[bool] = None - self.page_count: Optional[int] = None - self.maturity_rating: Optional[str] = None - self.community_rating: Optional[str] = None + self.alternate_series: str | None = None + self.alternate_number: str | None = None + self.alternate_count: int | None = None + self.imprint: str | None = None + self.notes: str | None = None + self.web_link: str | None = None + self.format: str | None = None + self.manga: str | None = None + self.black_and_white: bool | None = None + self.page_count: int | None = None + self.maturity_rating: str | None = None + self.community_rating: str | None = None - self.story_arc: Optional[str] = None - self.series_group: Optional[str] = None - self.scan_info: Optional[str] = None + self.story_arc: str | None = None + self.series_group: str | None = None + self.scan_info: str | None = None - self.characters: Optional[str] = None - self.teams: Optional[str] = None - self.locations: Optional[str] = None + self.characters: str | None = None + self.teams: str | None = None + self.locations: str | None = None - self.credits: List[CreditMetadata] = [] - self.tags: List[str] = [] - self.pages: List[ImageMetadata] = [] + self.credits: list[CreditMetadata] = [] + self.tags: list[str] = [] + self.pages: list[ImageMetadata] = [] # Some CoMet-only items - self.price: Optional[str] = None - self.is_version_of: Optional[str] = None - self.rights: Optional[str] = None - self.identifier: Optional[str] = None - self.last_mark: Optional[str] = None - self.cover_image: Optional[str] = None + self.price: str | None = None + self.is_version_of: str | None = None + self.rights: str | None = None + self.identifier: str | None = None + self.last_mark: str | None = None + self.cover_image: str | None = None - def overlay(self, new_md: "GenericMetadata") -> None: + def overlay(self, new_md: GenericMetadata) -> None: """Overlay a metadata object on this one That is, when the new object has non-None values, over-write them @@ -198,7 +201,7 @@ class GenericMetadata: if len(new_md.pages) > 0: assign("pages", new_md.pages) - def overlay_credits(self, new_credits: List[CreditMetadata]) -> None: + def overlay_credits(self, new_credits: list[CreditMetadata]) -> None: for c in new_credits: primary = bool("primary" in c and c["primary"]) @@ -220,8 +223,7 @@ class GenericMetadata: self.pages.append(page_dict) def get_archive_page_index(self, pagenum: int) -> int: - # convert the displayed page number to the page index of the file in - # the archive + # convert the displayed page number to the page index of the file in the archive if pagenum < len(self.pages): return int(self.pages[pagenum]["Image"]) diff --git a/comicapi/issuestring.py b/comicapi/issuestring.py index 2e425a6..5c863d2 100644 --- a/comicapi/issuestring.py +++ b/comicapi/issuestring.py @@ -4,21 +4,20 @@ Class for handling the odd permutations of an 'issue number' that the comics industry throws at us. e.g.: "12", "12.1", "0", "-1", "5AU", "100-2" """ - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from __future__ import annotations import logging import unicodedata @@ -28,7 +27,7 @@ logger = logging.getLogger(__name__) class IssueString: - def __init__(self, text: Optional[str]) -> None: + def __init__(self, text: str | None) -> None: # break up the issue number string into 2 parts: the numeric and suffix string. # (assumes that the numeric portion is always first) @@ -52,8 +51,7 @@ class IssueString: # if it's still not numeric at start skip it if text[start].isdigit() or text[start] == ".": - # walk through the string, look for split point (the first - # non-numeric) + # walk through the string, look for split point (the first non-numeric) decimal_count = 0 for idx in range(start, len(text)): if text[idx] not in "0123456789.": @@ -71,8 +69,7 @@ class IssueString: if text[idx - 1] == "." and len(text) != idx: idx = idx - 1 - # if there is no numeric after the minus, make the minus part of - # the suffix + # if there is no numeric after the minus, make the minus part of the suffix if idx == 1 and start == 1: idx = 0 @@ -113,7 +110,7 @@ class IssueString: return num_s - def as_float(self) -> Optional[float]: + def as_float(self) -> float | None: # return the float, with no suffix if len(self.suffix) == 1 and self.suffix.isnumeric(): return (self.num or 0) + unicodedata.numeric(self.suffix) diff --git a/comicapi/utils.py b/comicapi/utils.py index 01d953e..5e8d328 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -1,18 +1,18 @@ """Some generic utilities""" - # Copyright 2012-2014 Anthony Beville - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # http://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import json import logging @@ -21,7 +21,10 @@ import pathlib import re import unicodedata from collections import defaultdict -from typing import Any, List, Optional, Union +from typing import Any +from typing import List +from typing import Optional +from typing import Union import pycountry @@ -32,7 +35,7 @@ class UtilsVars: already_fixed_encoding = False -def get_recursive_filelist(pathlist: List[str]) -> List[str]: +def get_recursive_filelist(pathlist: list[str]) -> list[str]: """Get a recursive list of of all files under all path items in the list""" filelist = [] @@ -55,7 +58,7 @@ def get_recursive_filelist(pathlist: List[str]) -> List[str]: return filelist -def list_to_string(lst: List[Union[str, Any]]) -> str: +def list_to_string(lst: list[str | Any]) -> str: string = "" if lst is not None: for item in lst: @@ -77,7 +80,7 @@ def add_to_path(dirname: str) -> None: os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"] -def which(program: str) -> Optional[str]: +def which(program: str) -> str | None: """Returns path of the executable, if it exists""" def is_exe(fpath: str) -> bool: @@ -173,9 +176,9 @@ def unique_file(file_name: str) -> str: counter += 1 -languages: dict[Optional[str], Optional[str]] = defaultdict(lambda: None) +languages: dict[str | None, str | None] = defaultdict(lambda: None) -countries: dict[Optional[str], Optional[str]] = defaultdict(lambda: None) +countries: dict[str | None, str | None] = defaultdict(lambda: None) for c in pycountry.countries: if "alpha_2" in c._fields: @@ -186,11 +189,11 @@ for lng in pycountry.languages: languages[lng.alpha_2] = lng.name -def get_language_from_iso(iso: Optional[str]) -> Optional[str]: +def get_language_from_iso(iso: str | None) -> str | None: return languages[iso] -def get_language(string: Optional[str]) -> Optional[str]: +def get_language(string: str | None) -> str | None: if string is None: return None @@ -249,7 +252,7 @@ class ImprintDict(dict): else: return (item, self.publisher, True) - def copy(self) -> "ImprintDict": + def copy(self) -> ImprintDict: return ImprintDict(self.publisher, super().copy()) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8ccf4d0 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,31 @@ +[metadata] +name = comicapi +version = 2.2.1 +description = Comic archive (cbr/cbz/cbt) and metadata utilities. Extracted from the ComicTagger project. +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/OzzieIsaacs/comicapi +author = Iris W +maintainer = @OzzieIsaacs +license = Apache-2.0 +license_file = LICENSE +classifiers = + License :: OSI Approved :: Apache Software License + Operating System :: OS Independent + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + +[options] +packages = comicapi +install_requires = + natsort>=3.5.2 +python_requires = >=3.6 + +[options.extras_require] +cbr = + rarfile==2.7 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a03590f --- /dev/null +++ b/setup.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from setuptools import setup + +setup()