From 9c231d7e116e514cfff3245959258e9e90aa47c2 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Mon, 18 Dec 2023 02:37:34 -0800 Subject: [PATCH] Add better page info handling Rename set_default_page_list to apply_default_page_list and apply during read_metadata Add a filename attribute to the ImageMetadata class Mark image_index as required Always sort the page name list, a comic application will never need the unsorted list of names Assign the first result from get_cover_page_index_list to coverImage in CoMet tags Allow an Archiver to be passed to the ComicArchive constructor --- comicapi/comicarchive.py | 23 ++++++---- comicapi/genericmetadata.py | 81 +++++++++++++++++++++------------- comicapi/metadata/comet.py | 21 ++++++--- comicapi/metadata/comicrack.py | 10 ++--- comictaggerlib/cli.py | 2 +- comictaggerlib/taggerwindow.py | 29 ++++++------ tests/comicarchive_test.py | 8 +++- tests/genericmetadata_test.py | 4 +- tests/integration_test.py | 2 +- tests/metadata_test.py | 19 +++++++- 10 files changed, 124 insertions(+), 75 deletions(-) diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 125d351..ad7c1cf 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -81,16 +81,22 @@ class ComicArchive: logo_data = b"" pil_available = True - def __init__(self, path: pathlib.Path | str, default_image_path: pathlib.Path | str | None = None) -> None: + def __init__( + self, path: pathlib.Path | str | Archiver, default_image_path: pathlib.Path | str | None = None + ) -> None: self.md: dict[str, GenericMetadata] = {} - self.path = pathlib.Path(path).absolute() self.page_count: int | None = None self.page_list: list[str] = [] self.reset_cache() self.default_image_path = default_image_path - self.archiver: Archiver = UnknownArchiver.open(self.path) + if isinstance(path, Archiver): + self.path = path.path + self.archiver: Archiver = path + else: + self.path = pathlib.Path(path).absolute() + self.archiver = UnknownArchiver.open(self.path) load_archive_plugins() load_metadata_plugins() @@ -161,7 +167,9 @@ class ComicArchive: def read_metadata(self, style: str) -> GenericMetadata: if style in self.md: return self.md[style] - return metadata_styles[style].get_metadata(self.archiver) + md = metadata_styles[style].get_metadata(self.archiver) + md.apply_default_page_list(self.get_page_name_list()) + return md def read_metadata_string(self, style: str) -> str: return metadata_styles[style].get_metadata_string(self.archiver) @@ -258,14 +266,12 @@ class ComicArchive: return scanner_page_index - def get_page_name_list(self, sort_list: bool = True) -> list[str]: + def get_page_name_list(self) -> list[str]: if not self.page_list: # get the list file names in the archive, and sort files: list[str] = self.archiver.get_filename_list() - # seems like some archive creators are on Windows, and don't know about case-sensitivity! - if sort_list: - files = cast(list[str], utils.os_sorted(files)) + files = cast(list[str], utils.os_sorted(files)) # make a sub-list of image files self.page_list = [] @@ -289,6 +295,7 @@ class ComicArchive: if calc_page_sizes: for index, p in enumerate(md.pages): idx = int(p["image_index"]) + p["filename"] = self.get_page_name(idx) if self.pil_available: try: from PIL import Image diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 506c461..22f89c7 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -26,7 +26,7 @@ import logging from collections.abc import Sequence from typing import Any, TypedDict -from typing_extensions import NamedTuple +from typing_extensions import NamedTuple, Required from comicapi import utils @@ -54,10 +54,11 @@ class PageType: class ImageMetadata(TypedDict, total=False): + filename: str type: str bookmark: str double_page: bool - image_index: int + image_index: Required[int] size: str height: str width: str @@ -286,13 +287,28 @@ class GenericMetadata: else: self.add_credit(c["person"], c["role"], primary) - def set_default_page_list(self, count: int) -> None: + def apply_default_page_list(self, page_list: Sequence[str]) -> None: # generate a default page list, with the first page marked as the cover - for i in range(count): - page_dict = ImageMetadata(image_index=i) - if i == 0: - page_dict["type"] = PageType.FrontCover - self.pages.append(page_dict) + # Create a dictionary of all pages in the metadata + pages = {p["image_index"]: p for p in self.pages} + cover_set = False + # Go through each page in the archive + # The indexes should always match up + # It might be a good idea to validate that each page in `pages` is found + for i, filename in enumerate(page_list): + if i not in pages: + pages[i] = ImageMetadata(image_index=i, filename=filename) + else: + pages[i]["filename"] = filename + + # Check if we know what the cover is + cover_set = pages[i].get("type", None) == PageType.FrontCover or cover_set + + self.pages = [p[1] for p in sorted(pages.items())] + + # Set the cover to the first image if we don't know what the cover is + if not cover_set: + self.pages[0]["type"] = PageType.FrontCover def get_archive_page_index(self, pagenum: int) -> int: # convert the displayed page number to the page index of the file in the archive @@ -486,29 +502,31 @@ md_test: GenericMetadata = GenericMetadata( ], tags=set(), pages=[ - ImageMetadata(image_index=0, height="1280", size="195977", width="800", type=PageType.FrontCover), - ImageMetadata(image_index=1, height="2039", size="611993", width="1327"), - ImageMetadata(image_index=2, height="2039", size="783726", width="1327"), - ImageMetadata(image_index=3, height="2039", size="679584", width="1327"), - ImageMetadata(image_index=4, height="2039", size="788179", width="1327"), - ImageMetadata(image_index=5, height="2039", size="864433", width="1327"), - ImageMetadata(image_index=6, height="2039", size="765606", width="1327"), - ImageMetadata(image_index=7, height="2039", size="876427", width="1327"), - ImageMetadata(image_index=8, height="2039", size="852622", width="1327"), - ImageMetadata(image_index=9, height="2039", size="800205", width="1327"), - ImageMetadata(image_index=10, height="2039", size="746243", width="1326"), - ImageMetadata(image_index=11, height="2039", size="718062", width="1327"), - ImageMetadata(image_index=12, height="2039", size="532179", width="1326"), - ImageMetadata(image_index=13, height="2039", size="686708", width="1327"), - ImageMetadata(image_index=14, height="2039", size="641907", width="1327"), - ImageMetadata(image_index=15, height="2039", size="805388", width="1327"), - ImageMetadata(image_index=16, height="2039", size="668927", width="1326"), - ImageMetadata(image_index=17, height="2039", size="710605", width="1327"), - ImageMetadata(image_index=18, height="2039", size="761398", width="1326"), - ImageMetadata(image_index=19, height="2039", size="743807", width="1327"), - ImageMetadata(image_index=20, height="2039", size="552911", width="1326"), - ImageMetadata(image_index=21, height="2039", size="556827", width="1327"), - ImageMetadata(image_index=22, height="2039", size="675078", width="1326"), + ImageMetadata( + image_index=0, height="1280", size="195977", width="800", type=PageType.FrontCover, filename="!cover.jpg" + ), + ImageMetadata(image_index=1, height="2039", size="611993", width="1327", filename="01.jpg"), + ImageMetadata(image_index=2, height="2039", size="783726", width="1327", filename="02.jpg"), + ImageMetadata(image_index=3, height="2039", size="679584", width="1327", filename="03.jpg"), + ImageMetadata(image_index=4, height="2039", size="788179", width="1327", filename="04.jpg"), + ImageMetadata(image_index=5, height="2039", size="864433", width="1327", filename="05.jpg"), + ImageMetadata(image_index=6, height="2039", size="765606", width="1327", filename="06.jpg"), + ImageMetadata(image_index=7, height="2039", size="876427", width="1327", filename="07.jpg"), + ImageMetadata(image_index=8, height="2039", size="852622", width="1327", filename="08.jpg"), + ImageMetadata(image_index=9, height="2039", size="800205", width="1327", filename="09.jpg"), + ImageMetadata(image_index=10, height="2039", size="746243", width="1326", filename="10.jpg"), + ImageMetadata(image_index=11, height="2039", size="718062", width="1327", filename="11.jpg"), + ImageMetadata(image_index=12, height="2039", size="532179", width="1326", filename="12.jpg"), + ImageMetadata(image_index=13, height="2039", size="686708", width="1327", filename="13.jpg"), + ImageMetadata(image_index=14, height="2039", size="641907", width="1327", filename="14.jpg"), + ImageMetadata(image_index=15, height="2039", size="805388", width="1327", filename="15.jpg"), + ImageMetadata(image_index=16, height="2039", size="668927", width="1326", filename="16.jpg"), + ImageMetadata(image_index=17, height="2039", size="710605", width="1327", filename="17.jpg"), + ImageMetadata(image_index=18, height="2039", size="761398", width="1326", filename="18.jpg"), + ImageMetadata(image_index=19, height="2039", size="743807", width="1327", filename="19.jpg"), + ImageMetadata(image_index=20, height="2039", size="552911", width="1326", filename="20.jpg"), + ImageMetadata(image_index=21, height="2039", size="556827", width="1327", filename="21.jpg"), + ImageMetadata(image_index=22, height="2039", size="675078", width="1326", filename="22.jpg"), ImageMetadata( bookmark="Interview", image_index=23, @@ -516,6 +534,7 @@ md_test: GenericMetadata = GenericMetadata( size="800965", width="1338", type=PageType.Letters, + filename="23.jpg", ), ], price=None, diff --git a/comicapi/metadata/comet.py b/comicapi/metadata/comet.py index 4e2106a..688a6e7 100644 --- a/comicapi/metadata/comet.py +++ b/comicapi/metadata/comet.py @@ -22,7 +22,8 @@ from typing import Any from comicapi import utils from comicapi.archivers import Archiver -from comicapi.genericmetadata import GenericMetadata +from comicapi.comicarchive import ComicArchive +from comicapi.genericmetadata import GenericMetadata, ImageMetadata, PageType from comicapi.metadata import Metadata logger = logging.getLogger(__name__) @@ -107,7 +108,7 @@ class CoMet(Metadata): if self.has_metadata(archive): metadata = archive.read_file(self.file) or b"" if self._validate_bytes(metadata): - return self._metadata_from_bytes(metadata) + return self._metadata_from_bytes(metadata, archive) return GenericMetadata() def get_metadata_string(self, archive: Archiver) -> str: @@ -140,9 +141,9 @@ class CoMet(Metadata): parsable_credits.extend(cls._editor_synonyms) return parsable_credits - def _metadata_from_bytes(self, string: bytes) -> GenericMetadata: + def _metadata_from_bytes(self, string: bytes, archive: Archiver) -> GenericMetadata: tree = ET.ElementTree(ET.fromstring(string)) - return self._convert_xml_to_metadata(tree) + return self._convert_xml_to_metadata(tree, archive) def _bytes_from_metadata(self, metadata: GenericMetadata) -> bytes: tree = self._convert_metadata_to_xml(metadata) @@ -197,7 +198,8 @@ class CoMet(Metadata): date_str += f"-{md.month:02}" assign("date", date_str) - assign("coverImage", md._cover_image) + page = md.get_cover_page_index_list()[0] + assign("coverImage", md.pages[page]["filename"]) # loop thru credits, and build a list for each role that CoMet supports for credit in metadata.credits: @@ -228,7 +230,7 @@ class CoMet(Metadata): tree = ET.ElementTree(root) return tree - def _convert_xml_to_metadata(self, tree: ET.ElementTree) -> GenericMetadata: + def _convert_xml_to_metadata(self, tree: ET.ElementTree, archive: Archiver) -> GenericMetadata: root = tree.getroot() if root.tag != "comet": @@ -262,7 +264,12 @@ class CoMet(Metadata): _, md.month, md.year = utils.parse_date_str(utils.xlate(get("date"))) - md._cover_image = utils.xlate(get("coverImage")) + ca = ComicArchive(archive) + cover_filename = utils.xlate(get("coverImage")) + page_list = ca.get_page_name_list() + if cover_filename in page_list: + cover_index = page_list.index(cover_filename) + md.pages = [ImageMetadata(image_index=cover_index, filename=cover_filename, type=PageType.FrontCover)] reading_direction = utils.xlate(get("readingDirection")) if reading_direction is not None and reading_direction == "rtl": diff --git a/comicapi/metadata/comicrack.py b/comicapi/metadata/comicrack.py index 01a79f6..5e4e3fc 100644 --- a/comicapi/metadata/comicrack.py +++ b/comicapi/metadata/comicrack.py @@ -17,7 +17,7 @@ from __future__ import annotations import logging import xml.etree.ElementTree as ET from collections import OrderedDict -from typing import Any, cast +from typing import Any from comicapi import utils from comicapi.archivers import Archiver @@ -352,16 +352,14 @@ class ComicRack(Metadata): # parse page data now pages_node = root.find("Pages") if pages_node is not None: - for page in pages_node: + for i, page in enumerate(pages_node): p: dict[str, Any] = page.attrib - md_page = ImageMetadata() + md_page = ImageMetadata(image_index=int(p.get("Image", i))) if "Bookmark" in p: md_page["bookmark"] = p["Bookmark"] if "DoublePage" in p: md_page["double_page"] = True if p["DoublePage"].casefold() in ("yes", "true", "1") else False - if "Image" in p: - md_page["image_index"] = int(p["Image"]) if "ImageHeight" in p: md_page["height"] = p["ImageHeight"] if "ImageSize" in p: @@ -371,7 +369,7 @@ class ComicRack(Metadata): if "Type" in p: md_page["type"] = p["Type"] - md.pages.append(cast(ImageMetadata, md_page)) + md.pages.append(md_page) md.is_empty = False diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 0e750f5..295e2a6 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -245,7 +245,7 @@ class CLI: def create_local_metadata(self, ca: ComicArchive) -> GenericMetadata: md = GenericMetadata() - md.set_default_page_list(ca.get_number_of_pages()) + md.apply_default_page_list(ca.get_page_name_list()) # now, overlay the parsed filename info if self.config.Runtime_Options__parse_filename: diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 9bcf76e..3a09a8f 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -679,16 +679,17 @@ class TaggerWindow(QtWidgets.QMainWindow): self.fileSelectionList.add_path_list(self.droppedFiles) event.accept() - def actual_load_current_archive(self) -> None: - if self.metadata.is_empty and self.comic_archive is not None: - self.metadata = self.comic_archive.metadata_from_filename( - self.config[0].Filename_Parsing__complicated_parser, - self.config[0].Filename_Parsing__remove_c2c, - self.config[0].Filename_Parsing__remove_fcbd, - self.config[0].Filename_Parsing__remove_publisher, - ) - if len(self.metadata.pages) == 0 and self.comic_archive is not None: - self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages()) + def update_ui_for_archive(self, parse_filename: bool = True) -> None: + if self.comic_archive is not None: + if self.metadata.is_empty and parse_filename: + self.metadata = self.comic_archive.metadata_from_filename( + self.config[0].Filename_Parsing__complicated_parser, + self.config[0].Filename_Parsing__remove_c2c, + self.config[0].Filename_Parsing__remove_fcbd, + self.config[0].Filename_Parsing__remove_publisher, + ) + + self.metadata.apply_default_page_list(self.comic_archive.get_page_name_list()) self.update_cover_image() @@ -795,15 +796,13 @@ class TaggerWindow(QtWidgets.QMainWindow): def clear_form(self) -> None: # get a minty fresh metadata object self.metadata = GenericMetadata() - if self.comic_archive is not None: - self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages()) - self.page_list_editor.set_data(self.comic_archive, self.metadata.pages) # recursively clear the tab form self.clear_children(self.tabWidget) # clear the dirty flag, since there is nothing in there now to lose self.clear_dirty_flag() + self.update_ui_for_archive(parse_filename=False) def clear_children(self, widget: QtCore.QObject) -> None: if isinstance(widget, (QtWidgets.QLineEdit, QtWidgets.QTextEdit)): @@ -1172,7 +1171,7 @@ class TaggerWindow(QtWidgets.QMainWindow): self.fileSelectionList.update_current_row() self.metadata = self.comic_archive.read_metadata(self.load_data_style) - self.actual_load_current_archive() + self.update_ui_for_archive() else: QtWidgets.QMessageBox.information(self, "Whoops!", "No data to commit!") @@ -2086,7 +2085,7 @@ class TaggerWindow(QtWidgets.QMainWindow): self.exception(f"Failed to load metadata for {self.comic_archive.path}:\n\n{e}") self.metadata = GenericMetadata() - self.actual_load_current_archive() + self.update_ui_for_archive() def file_list_cleared(self) -> None: self.reset_app() diff --git a/tests/comicarchive_test.py b/tests/comicarchive_test.py index 1f4fb8e..996ae34 100644 --- a/tests/comicarchive_test.py +++ b/tests/comicarchive_test.py @@ -44,7 +44,7 @@ def test_metadata_read(cbz, md_saved): def test_save_cr(tmp_comic): md = tmp_comic.read_metadata("cr") - md.set_default_page_list(tmp_comic.get_number_of_pages()) + md.apply_default_page_list(tmp_comic.get_page_name_list()) assert tmp_comic.write_metadata(md, "cr") @@ -53,7 +53,7 @@ def test_save_cr(tmp_comic): def test_save_cbi(tmp_comic): md = tmp_comic.read_metadata("cr") - md.set_default_page_list(tmp_comic.get_number_of_pages()) + md.apply_default_page_list(tmp_comic.get_page_name_list()) assert tmp_comic.write_metadata(md, "cbi") @@ -70,6 +70,10 @@ def test_save_cr_rar(tmp_path, md_saved): assert tmp_comic.write_metadata(comicapi.genericmetadata.md_test, "cr") md = tmp_comic.read_metadata("cr") + + # This is a fake CBR we don't need to care about the pages for this test + md.pages = [] + md_saved.pages = [] assert md == md_saved diff --git a/tests/genericmetadata_test.py b/tests/genericmetadata_test.py index 30ee097..984a370 100644 --- a/tests/genericmetadata_test.py +++ b/tests/genericmetadata_test.py @@ -6,11 +6,11 @@ import comicapi.genericmetadata from testing.comicdata import credits, metadata -def test_set_default_page_list(tmp_path): +def test_apply_default_page_list(tmp_path): md = comicapi.genericmetadata.GenericMetadata() md.overlay(comicapi.genericmetadata.md_test) md.pages = [] - md.set_default_page_list(len(comicapi.genericmetadata.md_test.pages)) + md.apply_default_page_list(["testing"]) assert isinstance(md.pages[0]["image_index"], int) diff --git a/tests/integration_test.py b/tests/integration_test.py index ce751f4..ae6d0f0 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -87,7 +87,7 @@ def test_delete( # Currently we set the default page list on load empty_md = comicapi.genericmetadata.GenericMetadata() - # empty_md.set_default_page_list(tmp_comic.get_number_of_pages()) + empty_md.apply_default_page_list(tmp_comic.get_page_name_list()) # Validate that we got an empty metadata back assert md == empty_md diff --git a/tests/metadata_test.py b/tests/metadata_test.py index 635a59f..c59f205 100644 --- a/tests/metadata_test.py +++ b/tests/metadata_test.py @@ -17,9 +17,24 @@ for x in entry_points(group="comicapi.metadata"): @pytest.mark.parametrize("metadata", metadata_styles) -def test_metadata(mock_version, tmp_comic, md, metadata): +def test_metadata(mock_version, tmp_comic, md_saved, metadata): md_style = metadata(mock_version[0]) supported_attributes = md_style.supported_attributes md_style.set_metadata(comicapi.genericmetadata.md_test, tmp_comic.archiver) written_metadata = md_style.get_metadata(tmp_comic.archiver) - assert written_metadata.get_clean_metadata(*supported_attributes) == md.get_clean_metadata(*supported_attributes) + md = md_saved.get_clean_metadata(*supported_attributes) + + # Hack back in the pages variable because CoMet supports identifying the cover by the filename + if md_style.short_name == "comet": + md.pages = [ + comicapi.genericmetadata.ImageMetadata( + image_index=0, filename="!cover.jpg", type=comicapi.genericmetadata.PageType.FrontCover + ) + ] + written_metadata = written_metadata.get_clean_metadata(*supported_attributes).replace( + pages=written_metadata.pages + ) + else: + written_metadata = written_metadata.get_clean_metadata(*supported_attributes) + + assert written_metadata == md