Add better page info handling

Rename set_default_page_list to apply_default_page_list and apply
 during read_metadata
Add a filename attribute to the ImageMetadata class
Mark image_index as required
Always sort the page name list, a comic application will never need the
 unsorted list of names
Assign the first result from get_cover_page_index_list to coverImage in
 CoMet tags
Allow an Archiver to be passed to the ComicArchive constructor
This commit is contained in:
Timmy Welch 2023-12-18 02:37:34 -08:00
parent 989470772f
commit 9c231d7e11
10 changed files with 124 additions and 75 deletions

View File

@ -81,16 +81,22 @@ class ComicArchive:
logo_data = b""
pil_available = True
def __init__(self, path: pathlib.Path | str, default_image_path: pathlib.Path | str | None = None) -> None:
def __init__(
self, path: pathlib.Path | str | Archiver, default_image_path: pathlib.Path | str | None = None
) -> None:
self.md: dict[str, GenericMetadata] = {}
self.path = pathlib.Path(path).absolute()
self.page_count: int | None = None
self.page_list: list[str] = []
self.reset_cache()
self.default_image_path = default_image_path
self.archiver: Archiver = UnknownArchiver.open(self.path)
if isinstance(path, Archiver):
self.path = path.path
self.archiver: Archiver = path
else:
self.path = pathlib.Path(path).absolute()
self.archiver = UnknownArchiver.open(self.path)
load_archive_plugins()
load_metadata_plugins()
@ -161,7 +167,9 @@ class ComicArchive:
def read_metadata(self, style: str) -> GenericMetadata:
if style in self.md:
return self.md[style]
return metadata_styles[style].get_metadata(self.archiver)
md = metadata_styles[style].get_metadata(self.archiver)
md.apply_default_page_list(self.get_page_name_list())
return md
def read_metadata_string(self, style: str) -> str:
return metadata_styles[style].get_metadata_string(self.archiver)
@ -258,14 +266,12 @@ class ComicArchive:
return scanner_page_index
def get_page_name_list(self, sort_list: bool = True) -> list[str]:
def get_page_name_list(self) -> list[str]:
if not self.page_list:
# get the list file names in the archive, and sort
files: list[str] = self.archiver.get_filename_list()
# seems like some archive creators are on Windows, and don't know about case-sensitivity!
if sort_list:
files = cast(list[str], utils.os_sorted(files))
files = cast(list[str], utils.os_sorted(files))
# make a sub-list of image files
self.page_list = []
@ -289,6 +295,7 @@ class ComicArchive:
if calc_page_sizes:
for index, p in enumerate(md.pages):
idx = int(p["image_index"])
p["filename"] = self.get_page_name(idx)
if self.pil_available:
try:
from PIL import Image

View File

@ -26,7 +26,7 @@ import logging
from collections.abc import Sequence
from typing import Any, TypedDict
from typing_extensions import NamedTuple
from typing_extensions import NamedTuple, Required
from comicapi import utils
@ -54,10 +54,11 @@ class PageType:
class ImageMetadata(TypedDict, total=False):
filename: str
type: str
bookmark: str
double_page: bool
image_index: int
image_index: Required[int]
size: str
height: str
width: str
@ -286,13 +287,28 @@ class GenericMetadata:
else:
self.add_credit(c["person"], c["role"], primary)
def set_default_page_list(self, count: int) -> None:
def apply_default_page_list(self, page_list: Sequence[str]) -> None:
# generate a default page list, with the first page marked as the cover
for i in range(count):
page_dict = ImageMetadata(image_index=i)
if i == 0:
page_dict["type"] = PageType.FrontCover
self.pages.append(page_dict)
# Create a dictionary of all pages in the metadata
pages = {p["image_index"]: p for p in self.pages}
cover_set = False
# Go through each page in the archive
# The indexes should always match up
# It might be a good idea to validate that each page in `pages` is found
for i, filename in enumerate(page_list):
if i not in pages:
pages[i] = ImageMetadata(image_index=i, filename=filename)
else:
pages[i]["filename"] = filename
# Check if we know what the cover is
cover_set = pages[i].get("type", None) == PageType.FrontCover or cover_set
self.pages = [p[1] for p in sorted(pages.items())]
# Set the cover to the first image if we don't know what the cover is
if not cover_set:
self.pages[0]["type"] = PageType.FrontCover
def get_archive_page_index(self, pagenum: int) -> int:
# convert the displayed page number to the page index of the file in the archive
@ -486,29 +502,31 @@ md_test: GenericMetadata = GenericMetadata(
],
tags=set(),
pages=[
ImageMetadata(image_index=0, height="1280", size="195977", width="800", type=PageType.FrontCover),
ImageMetadata(image_index=1, height="2039", size="611993", width="1327"),
ImageMetadata(image_index=2, height="2039", size="783726", width="1327"),
ImageMetadata(image_index=3, height="2039", size="679584", width="1327"),
ImageMetadata(image_index=4, height="2039", size="788179", width="1327"),
ImageMetadata(image_index=5, height="2039", size="864433", width="1327"),
ImageMetadata(image_index=6, height="2039", size="765606", width="1327"),
ImageMetadata(image_index=7, height="2039", size="876427", width="1327"),
ImageMetadata(image_index=8, height="2039", size="852622", width="1327"),
ImageMetadata(image_index=9, height="2039", size="800205", width="1327"),
ImageMetadata(image_index=10, height="2039", size="746243", width="1326"),
ImageMetadata(image_index=11, height="2039", size="718062", width="1327"),
ImageMetadata(image_index=12, height="2039", size="532179", width="1326"),
ImageMetadata(image_index=13, height="2039", size="686708", width="1327"),
ImageMetadata(image_index=14, height="2039", size="641907", width="1327"),
ImageMetadata(image_index=15, height="2039", size="805388", width="1327"),
ImageMetadata(image_index=16, height="2039", size="668927", width="1326"),
ImageMetadata(image_index=17, height="2039", size="710605", width="1327"),
ImageMetadata(image_index=18, height="2039", size="761398", width="1326"),
ImageMetadata(image_index=19, height="2039", size="743807", width="1327"),
ImageMetadata(image_index=20, height="2039", size="552911", width="1326"),
ImageMetadata(image_index=21, height="2039", size="556827", width="1327"),
ImageMetadata(image_index=22, height="2039", size="675078", width="1326"),
ImageMetadata(
image_index=0, height="1280", size="195977", width="800", type=PageType.FrontCover, filename="!cover.jpg"
),
ImageMetadata(image_index=1, height="2039", size="611993", width="1327", filename="01.jpg"),
ImageMetadata(image_index=2, height="2039", size="783726", width="1327", filename="02.jpg"),
ImageMetadata(image_index=3, height="2039", size="679584", width="1327", filename="03.jpg"),
ImageMetadata(image_index=4, height="2039", size="788179", width="1327", filename="04.jpg"),
ImageMetadata(image_index=5, height="2039", size="864433", width="1327", filename="05.jpg"),
ImageMetadata(image_index=6, height="2039", size="765606", width="1327", filename="06.jpg"),
ImageMetadata(image_index=7, height="2039", size="876427", width="1327", filename="07.jpg"),
ImageMetadata(image_index=8, height="2039", size="852622", width="1327", filename="08.jpg"),
ImageMetadata(image_index=9, height="2039", size="800205", width="1327", filename="09.jpg"),
ImageMetadata(image_index=10, height="2039", size="746243", width="1326", filename="10.jpg"),
ImageMetadata(image_index=11, height="2039", size="718062", width="1327", filename="11.jpg"),
ImageMetadata(image_index=12, height="2039", size="532179", width="1326", filename="12.jpg"),
ImageMetadata(image_index=13, height="2039", size="686708", width="1327", filename="13.jpg"),
ImageMetadata(image_index=14, height="2039", size="641907", width="1327", filename="14.jpg"),
ImageMetadata(image_index=15, height="2039", size="805388", width="1327", filename="15.jpg"),
ImageMetadata(image_index=16, height="2039", size="668927", width="1326", filename="16.jpg"),
ImageMetadata(image_index=17, height="2039", size="710605", width="1327", filename="17.jpg"),
ImageMetadata(image_index=18, height="2039", size="761398", width="1326", filename="18.jpg"),
ImageMetadata(image_index=19, height="2039", size="743807", width="1327", filename="19.jpg"),
ImageMetadata(image_index=20, height="2039", size="552911", width="1326", filename="20.jpg"),
ImageMetadata(image_index=21, height="2039", size="556827", width="1327", filename="21.jpg"),
ImageMetadata(image_index=22, height="2039", size="675078", width="1326", filename="22.jpg"),
ImageMetadata(
bookmark="Interview",
image_index=23,
@ -516,6 +534,7 @@ md_test: GenericMetadata = GenericMetadata(
size="800965",
width="1338",
type=PageType.Letters,
filename="23.jpg",
),
],
price=None,

View File

@ -22,7 +22,8 @@ from typing import Any
from comicapi import utils
from comicapi.archivers import Archiver
from comicapi.genericmetadata import GenericMetadata
from comicapi.comicarchive import ComicArchive
from comicapi.genericmetadata import GenericMetadata, ImageMetadata, PageType
from comicapi.metadata import Metadata
logger = logging.getLogger(__name__)
@ -107,7 +108,7 @@ class CoMet(Metadata):
if self.has_metadata(archive):
metadata = archive.read_file(self.file) or b""
if self._validate_bytes(metadata):
return self._metadata_from_bytes(metadata)
return self._metadata_from_bytes(metadata, archive)
return GenericMetadata()
def get_metadata_string(self, archive: Archiver) -> str:
@ -140,9 +141,9 @@ class CoMet(Metadata):
parsable_credits.extend(cls._editor_synonyms)
return parsable_credits
def _metadata_from_bytes(self, string: bytes) -> GenericMetadata:
def _metadata_from_bytes(self, string: bytes, archive: Archiver) -> GenericMetadata:
tree = ET.ElementTree(ET.fromstring(string))
return self._convert_xml_to_metadata(tree)
return self._convert_xml_to_metadata(tree, archive)
def _bytes_from_metadata(self, metadata: GenericMetadata) -> bytes:
tree = self._convert_metadata_to_xml(metadata)
@ -197,7 +198,8 @@ class CoMet(Metadata):
date_str += f"-{md.month:02}"
assign("date", date_str)
assign("coverImage", md._cover_image)
page = md.get_cover_page_index_list()[0]
assign("coverImage", md.pages[page]["filename"])
# loop thru credits, and build a list for each role that CoMet supports
for credit in metadata.credits:
@ -228,7 +230,7 @@ class CoMet(Metadata):
tree = ET.ElementTree(root)
return tree
def _convert_xml_to_metadata(self, tree: ET.ElementTree) -> GenericMetadata:
def _convert_xml_to_metadata(self, tree: ET.ElementTree, archive: Archiver) -> GenericMetadata:
root = tree.getroot()
if root.tag != "comet":
@ -262,7 +264,12 @@ class CoMet(Metadata):
_, md.month, md.year = utils.parse_date_str(utils.xlate(get("date")))
md._cover_image = utils.xlate(get("coverImage"))
ca = ComicArchive(archive)
cover_filename = utils.xlate(get("coverImage"))
page_list = ca.get_page_name_list()
if cover_filename in page_list:
cover_index = page_list.index(cover_filename)
md.pages = [ImageMetadata(image_index=cover_index, filename=cover_filename, type=PageType.FrontCover)]
reading_direction = utils.xlate(get("readingDirection"))
if reading_direction is not None and reading_direction == "rtl":

View File

@ -17,7 +17,7 @@ from __future__ import annotations
import logging
import xml.etree.ElementTree as ET
from collections import OrderedDict
from typing import Any, cast
from typing import Any
from comicapi import utils
from comicapi.archivers import Archiver
@ -352,16 +352,14 @@ class ComicRack(Metadata):
# parse page data now
pages_node = root.find("Pages")
if pages_node is not None:
for page in pages_node:
for i, page in enumerate(pages_node):
p: dict[str, Any] = page.attrib
md_page = ImageMetadata()
md_page = ImageMetadata(image_index=int(p.get("Image", i)))
if "Bookmark" in p:
md_page["bookmark"] = p["Bookmark"]
if "DoublePage" in p:
md_page["double_page"] = True if p["DoublePage"].casefold() in ("yes", "true", "1") else False
if "Image" in p:
md_page["image_index"] = int(p["Image"])
if "ImageHeight" in p:
md_page["height"] = p["ImageHeight"]
if "ImageSize" in p:
@ -371,7 +369,7 @@ class ComicRack(Metadata):
if "Type" in p:
md_page["type"] = p["Type"]
md.pages.append(cast(ImageMetadata, md_page))
md.pages.append(md_page)
md.is_empty = False

View File

@ -245,7 +245,7 @@ class CLI:
def create_local_metadata(self, ca: ComicArchive) -> GenericMetadata:
md = GenericMetadata()
md.set_default_page_list(ca.get_number_of_pages())
md.apply_default_page_list(ca.get_page_name_list())
# now, overlay the parsed filename info
if self.config.Runtime_Options__parse_filename:

View File

@ -679,16 +679,17 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.fileSelectionList.add_path_list(self.droppedFiles)
event.accept()
def actual_load_current_archive(self) -> None:
if self.metadata.is_empty and self.comic_archive is not None:
self.metadata = self.comic_archive.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,
)
if len(self.metadata.pages) == 0 and self.comic_archive is not None:
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
def update_ui_for_archive(self, parse_filename: bool = True) -> None:
if self.comic_archive is not None:
if self.metadata.is_empty and parse_filename:
self.metadata = self.comic_archive.metadata_from_filename(
self.config[0].Filename_Parsing__complicated_parser,
self.config[0].Filename_Parsing__remove_c2c,
self.config[0].Filename_Parsing__remove_fcbd,
self.config[0].Filename_Parsing__remove_publisher,
)
self.metadata.apply_default_page_list(self.comic_archive.get_page_name_list())
self.update_cover_image()
@ -795,15 +796,13 @@ class TaggerWindow(QtWidgets.QMainWindow):
def clear_form(self) -> None:
# get a minty fresh metadata object
self.metadata = GenericMetadata()
if self.comic_archive is not None:
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
self.page_list_editor.set_data(self.comic_archive, self.metadata.pages)
# recursively clear the tab form
self.clear_children(self.tabWidget)
# clear the dirty flag, since there is nothing in there now to lose
self.clear_dirty_flag()
self.update_ui_for_archive(parse_filename=False)
def clear_children(self, widget: QtCore.QObject) -> None:
if isinstance(widget, (QtWidgets.QLineEdit, QtWidgets.QTextEdit)):
@ -1172,7 +1171,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.fileSelectionList.update_current_row()
self.metadata = self.comic_archive.read_metadata(self.load_data_style)
self.actual_load_current_archive()
self.update_ui_for_archive()
else:
QtWidgets.QMessageBox.information(self, "Whoops!", "No data to commit!")
@ -2086,7 +2085,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.exception(f"Failed to load metadata for {self.comic_archive.path}:\n\n{e}")
self.metadata = GenericMetadata()
self.actual_load_current_archive()
self.update_ui_for_archive()
def file_list_cleared(self) -> None:
self.reset_app()

View File

@ -44,7 +44,7 @@ def test_metadata_read(cbz, md_saved):
def test_save_cr(tmp_comic):
md = tmp_comic.read_metadata("cr")
md.set_default_page_list(tmp_comic.get_number_of_pages())
md.apply_default_page_list(tmp_comic.get_page_name_list())
assert tmp_comic.write_metadata(md, "cr")
@ -53,7 +53,7 @@ def test_save_cr(tmp_comic):
def test_save_cbi(tmp_comic):
md = tmp_comic.read_metadata("cr")
md.set_default_page_list(tmp_comic.get_number_of_pages())
md.apply_default_page_list(tmp_comic.get_page_name_list())
assert tmp_comic.write_metadata(md, "cbi")
@ -70,6 +70,10 @@ def test_save_cr_rar(tmp_path, md_saved):
assert tmp_comic.write_metadata(comicapi.genericmetadata.md_test, "cr")
md = tmp_comic.read_metadata("cr")
# This is a fake CBR we don't need to care about the pages for this test
md.pages = []
md_saved.pages = []
assert md == md_saved

View File

@ -6,11 +6,11 @@ import comicapi.genericmetadata
from testing.comicdata import credits, metadata
def test_set_default_page_list(tmp_path):
def test_apply_default_page_list(tmp_path):
md = comicapi.genericmetadata.GenericMetadata()
md.overlay(comicapi.genericmetadata.md_test)
md.pages = []
md.set_default_page_list(len(comicapi.genericmetadata.md_test.pages))
md.apply_default_page_list(["testing"])
assert isinstance(md.pages[0]["image_index"], int)

View File

@ -87,7 +87,7 @@ def test_delete(
# Currently we set the default page list on load
empty_md = comicapi.genericmetadata.GenericMetadata()
# empty_md.set_default_page_list(tmp_comic.get_number_of_pages())
empty_md.apply_default_page_list(tmp_comic.get_page_name_list())
# Validate that we got an empty metadata back
assert md == empty_md

View File

@ -17,9 +17,24 @@ for x in entry_points(group="comicapi.metadata"):
@pytest.mark.parametrize("metadata", metadata_styles)
def test_metadata(mock_version, tmp_comic, md, metadata):
def test_metadata(mock_version, tmp_comic, md_saved, metadata):
md_style = metadata(mock_version[0])
supported_attributes = md_style.supported_attributes
md_style.set_metadata(comicapi.genericmetadata.md_test, tmp_comic.archiver)
written_metadata = md_style.get_metadata(tmp_comic.archiver)
assert written_metadata.get_clean_metadata(*supported_attributes) == md.get_clean_metadata(*supported_attributes)
md = md_saved.get_clean_metadata(*supported_attributes)
# Hack back in the pages variable because CoMet supports identifying the cover by the filename
if md_style.short_name == "comet":
md.pages = [
comicapi.genericmetadata.ImageMetadata(
image_index=0, filename="!cover.jpg", type=comicapi.genericmetadata.PageType.FrontCover
)
]
written_metadata = written_metadata.get_clean_metadata(*supported_attributes).replace(
pages=written_metadata.pages
)
else:
written_metadata = written_metadata.get_clean_metadata(*supported_attributes)
assert written_metadata == md