diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 6a7a398..721b0e3 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -368,7 +368,7 @@ class RarArchiver(UnknownArchiver): def __init__(self, path: pathlib.Path | str, rar_exe_path: str = "rar") -> None: super().__init__(path) - self.rar_exe_path = rar_exe_path + self.rar_exe_path = shutil.which(rar_exe_path) or "" # windows only, keeps the cmd.exe from popping up if platform.system() == "Windows": @@ -687,7 +687,7 @@ class ComicArchive: self.page_count: int | None = None self.page_list: list[str] = [] - self.rar_exe_path = rar_exe_path + self.rar_exe_path = shutil.which(rar_exe_path or "rar") or "" self.ci_xml_filename = "ComicInfo.xml" self.comet_default_filename = "CoMet.xml" self.reset_cache() @@ -746,7 +746,12 @@ class ComicArchive: self.read_metadata(style) def rename(self, path: pathlib.Path | str) -> None: - self.path = pathlib.Path(path) + new_path = pathlib.Path(path).absolute() + if new_path == self.path: + return + os.makedirs(new_path.parent, 0o777, True) + shutil.move(path, new_path) + self.path = new_path self.archiver.path = pathlib.Path(path) def sevenzip_test(self) -> bool: @@ -863,7 +868,7 @@ class ComicArchive: def get_page_name(self, index: int) -> str: if index is None: - return None + return "" page_list = self.get_page_name_list() @@ -1148,8 +1153,8 @@ class ComicArchive: for n in self.archiver.get_filename_list(): if os.path.dirname(n) == "" and os.path.splitext(n)[1].casefold() == ".xml": # read in XML file, and validate it + data = "" try: - data = "" d = self.archiver.read_file(n) if d: data = d.decode("utf-8") @@ -1247,10 +1252,10 @@ class ComicArchive: return metadata - def export_as_zip(self, zipfilename: str) -> bool: + def export_as_zip(self, zip_filename: pathlib.Path | str) -> bool: if self.archive_type == self.ArchiveType.Zip: # nothing to do, we're already a zip return True - zip_archiver = ZipArchiver(zipfilename) + zip_archiver = ZipArchiver(zip_filename) return zip_archiver.copy_from_archive(self.archiver) diff --git a/comicapi/comicbookinfo.py b/comicapi/comicbookinfo.py index 9201dfb..88223d6 100644 --- a/comicapi/comicbookinfo.py +++ b/comicapi/comicbookinfo.py @@ -98,13 +98,11 @@ class ComicBookInfo: metadata.critical_rating = utils.xlate(cbi["rating"], True) metadata.credits = cbi["credits"] - metadata.tags = cbi["tags"] + metadata.tags = set(cbi["tags"]) if cbi["tags"] is not None else set() # make sure credits and tags are at least empty lists and not None if metadata.credits is None: metadata.credits = [] - if metadata.tags is None: - metadata.tags = [] # need the language string to be ISO if metadata.language is not None: @@ -133,7 +131,7 @@ class ComicBookInfo: cbi_container = CBIContainer( { - "appID": "ComicTagger/" + "1.0.0", + "appID": "ComicTagger/1.0.0", "lastModified": str(datetime.now()), "ComicBookInfo/1.0": {}, } diff --git a/comicapi/comicinfoxml.py b/comicapi/comicinfoxml.py index bade43a..10a599a 100644 --- a/comicapi/comicinfoxml.py +++ b/comicapi/comicinfoxml.py @@ -54,13 +54,11 @@ class ComicInfoXml: return self.convert_xml_to_metadata(tree) def string_from_metadata(self, metadata: GenericMetadata, xml: bytes = b"") -> str: - tree = self.convert_metadata_to_xml(self, metadata, xml) + tree = self.convert_metadata_to_xml(metadata, xml) tree_str = ET.tostring(tree.getroot(), encoding="utf-8", xml_declaration=True).decode("utf-8") return str(tree_str) - def convert_metadata_to_xml( - self, filename: ComicInfoXml, metadata: GenericMetadata, xml: bytes = b"" - ) -> ElementTree: + def convert_metadata_to_xml(self, metadata: GenericMetadata, xml: bytes = b"") -> ElementTree: # shorthand for the metadata md = metadata @@ -261,6 +259,8 @@ class ComicInfoXml: p: dict[str, Any] = page.attrib if "Image" in p: p["Image"] = int(p["Image"]) + if "DoublePage" in p: + p["DoublePage"] = True if p["DoublePage"].casefold() in ("yes", "true", "1") else False md.pages.append(cast(ImageMetadata, p)) md.is_empty = False @@ -268,7 +268,7 @@ class ComicInfoXml: return md def write_to_external_file(self, filename: str, metadata: GenericMetadata, xml: bytes = b"") -> None: - tree = self.convert_metadata_to_xml(self, metadata, xml) + tree = self.convert_metadata_to_xml(metadata, xml) tree.write(filename, encoding="utf-8", xml_declaration=True) def read_from_external_file(self, filename: str) -> GenericMetadata: diff --git a/comicapi/filenamelexer.py b/comicapi/filenamelexer.py index c8d134a..c35d83f 100644 --- a/comicapi/filenamelexer.py +++ b/comicapi/filenamelexer.py @@ -1,5 +1,5 @@ # Extracted and mutilated from https://github.com/lordwelch/wsfmt -# Which was extracted and mutliated from https://github.com/golang/go/tree/master/src/text/template/parse +# Which was extracted and mutilated from https://github.com/golang/go/tree/master/src/text/template/parse from __future__ import annotations import calendar @@ -138,7 +138,7 @@ class Lexer: # AcceptRun consumes a run of runes from the valid set. def accept_run(self, valid: str) -> None: while self.get() in valid: - pass + continue self.backup() diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 393ffb4..c69102e 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -114,7 +114,7 @@ class FileNameParser: # remove any "of NN" phrase with spaces (problem: this could break on # some titles) - filename = re.sub(r"of [\d]+", self.repl, filename) + filename = re.sub(r"of \d+", self.repl, filename) # we should now have a cleaned up filename version with all the words in # the same positions as original filename @@ -143,7 +143,7 @@ class FileNameParser: # first look for a word with "#" followed by digits with optional suffix # this is almost certainly the issue number for w in reversed(word_list): - if re.match(r"#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + if re.match(r"#-?((\d*\.\d+|\d+)(\w*))", w[0]): found = True break @@ -151,7 +151,7 @@ class FileNameParser: # list if not found: w = word_list[-1] - if re.match(r"[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + if re.match(r"-?((\d*\.\d+|\d+)(\w*))", w[0]): found = True # now try to look for a # followed by any characters @@ -245,7 +245,7 @@ class FileNameParser: if match: year = match.group() # remove non-digits - year = re.sub(r"[^0-9]", "", year) + year = re.sub(r"\D", "", year) return year def get_remainder(self, filename: str, year: str, count: str, volume: str, issue_end: int) -> str: @@ -332,7 +332,7 @@ eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "") # Extracted and mutilated from https://github.com/lordwelch/wsfmt -# Which was extracted and mutliated from https://github.com/golang/go/tree/master/src/text/template/parse +# Which was extracted and mutilated from https://github.com/golang/go/tree/master/src/text/template/parse class Parser: """docstring for FilenameParser""" diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 244ae9a..ccbc1c1 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -20,8 +20,9 @@ possible, however lossy it might be # limitations under the License. from __future__ import annotations +import copy +import dataclasses import logging -from dataclasses import dataclass, field from typing import Any, TypedDict from comicapi import utils @@ -65,7 +66,7 @@ class CreditMetadata(TypedDict): primary: bool -@dataclass +@dataclasses.dataclass class GenericMetadata: writer_synonyms = ["writer", "plotter", "scripter"] penciller_synonyms = ["artist", "penciller", "penciler", "breakdowns"] @@ -115,9 +116,9 @@ class GenericMetadata: teams: str | None = None locations: str | None = None - credits: list[CreditMetadata] = field(default_factory=list) - tags: list[str] = field(default_factory=list) - pages: list[ImageMetadata] = field(default_factory=list) + credits: list[CreditMetadata] = dataclasses.field(default_factory=list) + tags: set[str] = dataclasses.field(default_factory=set) + pages: list[ImageMetadata] = dataclasses.field(default_factory=list) # Some CoMet-only items price: str | None = None @@ -133,6 +134,14 @@ class GenericMetadata: self.is_empty = False break + def copy(self) -> GenericMetadata: + return copy.deepcopy(self) + + def replace(self, /, **kwargs: Any) -> GenericMetadata: + tmp = self.copy() + tmp.__dict__.update(kwargs) + return tmp + def overlay(self, new_md: GenericMetadata) -> None: """Overlay a metadata object on this one @@ -244,7 +253,7 @@ class GenericMetadata: def add_credit(self, person: str, role: str, primary: bool = False) -> None: - credit: CreditMetadata = {"person": person, "role": role, "primary": primary} + credit = CreditMetadata(person=person, role=role, primary=primary) # look to see if it's not already there... found = False @@ -360,91 +369,91 @@ class GenericMetadata: self.imprint = imprint -md_test = GenericMetadata() - -md_test.is_empty = False -md_test.tag_origin = None -md_test.series = "Cory Doctorow's Futuristic Tales of the Here and Now" -md_test.issue = "1" -md_test.title = "Anda's Game" -md_test.publisher = "IDW Publishing" -md_test.month = 10 -md_test.year = 2007 -md_test.day = 1 -md_test.issue_count = 6 -md_test.volume = 1 -md_test.genre = "Sci-Fi" -md_test.language = "en" -md_test.comments = ( - "For 12-year-old Anda, getting paid real money to kill the characters of players who were cheating" - " in her favorite online computer game was a win-win situation. Until she found out who was paying her," - " and what those characters meant to the livelihood of children around the world." +md_test: GenericMetadata = GenericMetadata( + is_empty=False, + tag_origin=None, + series="Cory Doctorow's Futuristic Tales of the Here and Now", + issue="1", + title="Anda's Game", + publisher="IDW Publishing", + month=10, + year=2007, + day=1, + issue_count=6, + volume=1, + genre="Sci-Fi", + language="en", + comments=( + "For 12-year-old Anda, getting paid real money to kill the characters of players who were cheating" + " in her favorite online computer game was a win-win situation. Until she found out who was paying her," + " and what those characters meant to the livelihood of children around the world." + ), + volume_count=None, + critical_rating=3.0, + country=None, + alternate_series="Tales", + alternate_number="2", + alternate_count=7, + imprint="craphound.com", + notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]", + web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/", + format="Series", + manga="No", + black_and_white=None, + page_count=24, + maturity_rating="Everyone 10+", + story_arc="Here and Now", + series_group="Futuristic Tales", + scan_info="(CC BY-NC-SA 3.0)", + characters="Anda", + teams="Fahrenheit", + locations="lonely cottage ", + credits=[ + CreditMetadata(primary=False, person="Dara Naraghi", role="Writer"), + CreditMetadata(primary=False, person="Esteve Polls", role="Penciller"), + CreditMetadata(primary=False, person="Esteve Polls", role="Inker"), + CreditMetadata(primary=False, person="Neil Uyetake", role="Letterer"), + CreditMetadata(primary=False, person="Sam Kieth", role="Cover"), + CreditMetadata(primary=False, person="Ted Adams", role="Editor"), + ], + tags=set(), + pages=[ + ImageMetadata(Image=0, ImageHeight="1280", ImageSize="195977", ImageWidth="800", Type=PageType.FrontCover), + ImageMetadata(Image=1, ImageHeight="2039", ImageSize="611993", ImageWidth="1327"), + ImageMetadata(Image=2, ImageHeight="2039", ImageSize="783726", ImageWidth="1327"), + ImageMetadata(Image=3, ImageHeight="2039", ImageSize="679584", ImageWidth="1327"), + ImageMetadata(Image=4, ImageHeight="2039", ImageSize="788179", ImageWidth="1327"), + ImageMetadata(Image=5, ImageHeight="2039", ImageSize="864433", ImageWidth="1327"), + ImageMetadata(Image=6, ImageHeight="2039", ImageSize="765606", ImageWidth="1327"), + ImageMetadata(Image=7, ImageHeight="2039", ImageSize="876427", ImageWidth="1327"), + ImageMetadata(Image=8, ImageHeight="2039", ImageSize="852622", ImageWidth="1327"), + ImageMetadata(Image=9, ImageHeight="2039", ImageSize="800205", ImageWidth="1327"), + ImageMetadata(Image=10, ImageHeight="2039", ImageSize="746243", ImageWidth="1326"), + ImageMetadata(Image=11, ImageHeight="2039", ImageSize="718062", ImageWidth="1327"), + ImageMetadata(Image=12, ImageHeight="2039", ImageSize="532179", ImageWidth="1326"), + ImageMetadata(Image=13, ImageHeight="2039", ImageSize="686708", ImageWidth="1327"), + ImageMetadata(Image=14, ImageHeight="2039", ImageSize="641907", ImageWidth="1327"), + ImageMetadata(Image=15, ImageHeight="2039", ImageSize="805388", ImageWidth="1327"), + ImageMetadata(Image=16, ImageHeight="2039", ImageSize="668927", ImageWidth="1326"), + ImageMetadata(Image=17, ImageHeight="2039", ImageSize="710605", ImageWidth="1327"), + ImageMetadata(Image=18, ImageHeight="2039", ImageSize="761398", ImageWidth="1326"), + ImageMetadata(Image=19, ImageHeight="2039", ImageSize="743807", ImageWidth="1327"), + ImageMetadata(Image=20, ImageHeight="2039", ImageSize="552911", ImageWidth="1326"), + ImageMetadata(Image=21, ImageHeight="2039", ImageSize="556827", ImageWidth="1327"), + ImageMetadata(Image=22, ImageHeight="2039", ImageSize="675078", ImageWidth="1326"), + ImageMetadata( + Bookmark="Interview", + Image=23, + ImageHeight="2032", + ImageSize="800965", + ImageWidth="1338", + Type=PageType.Letters, + ), + ], + price=None, + is_version_of=None, + rights=None, + identifier=None, + last_mark=None, + cover_image=None, ) -md_test.volume_count = None -md_test.critical_rating = 3.0 -md_test.country = None -md_test.alternate_series = "Tales" -md_test.alternate_number = "2" -md_test.alternate_count = 7 -md_test.imprint = "craphound.com" -md_test.notes = "Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]" -md_test.web_link = "https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/" -md_test.format = "Series" -md_test.manga = "No" -md_test.black_and_white = None -md_test.page_count = 24 -md_test.maturity_rating = "Everyone 10+" -md_test.story_arc = "Here and Now" -md_test.series_group = "Futuristic Tales" -md_test.scan_info = "(CC BY-NC-SA 3.0)" -md_test.characters = "Anda" -md_test.teams = "Fahrenheit" -md_test.locations = "lonely cottage " -md_test.credits = [ - CreditMetadata({"primary": False, "person": "Dara Naraghi", "role": "Writer"}), - CreditMetadata({"primary": False, "person": "Esteve Polls", "role": "Penciller"}), - CreditMetadata({"primary": False, "person": "Esteve Polls", "role": "Inker"}), - CreditMetadata({"primary": False, "person": "Neil Uyetake", "role": "Letterer"}), - CreditMetadata({"primary": False, "person": "Sam Kieth", "role": "Cover"}), - CreditMetadata({"primary": False, "person": "Ted Adams", "role": "Editor"}), -] -md_test.tags = [] -md_test.pages = [ - {"Image": 0, "ImageHeight": "1280", "ImageSize": "195977", "ImageWidth": "800", "Type": PageType.FrontCover}, - {"Image": 1, "ImageHeight": "2039", "ImageSize": "611993", "ImageWidth": "1327"}, - {"Image": 2, "ImageHeight": "2039", "ImageSize": "783726", "ImageWidth": "1327"}, - {"Image": 3, "ImageHeight": "2039", "ImageSize": "679584", "ImageWidth": "1327"}, - {"Image": 4, "ImageHeight": "2039", "ImageSize": "788179", "ImageWidth": "1327"}, - {"Image": 5, "ImageHeight": "2039", "ImageSize": "864433", "ImageWidth": "1327"}, - {"Image": 6, "ImageHeight": "2039", "ImageSize": "765606", "ImageWidth": "1327"}, - {"Image": 7, "ImageHeight": "2039", "ImageSize": "876427", "ImageWidth": "1327"}, - {"Image": 8, "ImageHeight": "2039", "ImageSize": "852622", "ImageWidth": "1327"}, - {"Image": 9, "ImageHeight": "2039", "ImageSize": "800205", "ImageWidth": "1327"}, - {"Image": 10, "ImageHeight": "2039", "ImageSize": "746243", "ImageWidth": "1326"}, - {"Image": 11, "ImageHeight": "2039", "ImageSize": "718062", "ImageWidth": "1327"}, - {"Image": 12, "ImageHeight": "2039", "ImageSize": "532179", "ImageWidth": "1326"}, - {"Image": 13, "ImageHeight": "2039", "ImageSize": "686708", "ImageWidth": "1327"}, - {"Image": 14, "ImageHeight": "2039", "ImageSize": "641907", "ImageWidth": "1327"}, - {"Image": 15, "ImageHeight": "2039", "ImageSize": "805388", "ImageWidth": "1327"}, - {"Image": 16, "ImageHeight": "2039", "ImageSize": "668927", "ImageWidth": "1326"}, - {"Image": 17, "ImageHeight": "2039", "ImageSize": "710605", "ImageWidth": "1327"}, - {"Image": 18, "ImageHeight": "2039", "ImageSize": "761398", "ImageWidth": "1326"}, - {"Image": 19, "ImageHeight": "2039", "ImageSize": "743807", "ImageWidth": "1327"}, - {"Image": 20, "ImageHeight": "2039", "ImageSize": "552911", "ImageWidth": "1326"}, - {"Image": 21, "ImageHeight": "2039", "ImageSize": "556827", "ImageWidth": "1327"}, - {"Image": 22, "ImageHeight": "2039", "ImageSize": "675078", "ImageWidth": "1326"}, - { - "Bookmark": "Interview", - "Image": 23, - "ImageHeight": "2032", - "ImageSize": "800965", - "ImageWidth": "1338", - "Type": PageType.Letters, - }, -] -md_test.price = None -md_test.is_version_of = None -md_test.rights = None -md_test.identifier = None -md_test.last_mark = None -md_test.cover_image = None diff --git a/comicapi/issuestring.py b/comicapi/issuestring.py index c468fc8..5911bd9 100644 --- a/comicapi/issuestring.py +++ b/comicapi/issuestring.py @@ -113,6 +113,4 @@ class IssueString: # return the float, with no suffix if len(self.suffix) == 1 and self.suffix.isnumeric(): return (self.num or 0) + unicodedata.numeric(self.suffix) - - return 0.5 return self.num diff --git a/comicapi/utils.py b/comicapi/utils.py index a23bc87..fe674ee 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -26,6 +26,7 @@ from shutil import which # noqa: F401 from typing import Any, Mapping import pycountry +import thefuzz.fuzz logger = logging.getLogger(__name__) @@ -108,6 +109,10 @@ def remove_articles(text: str) -> str: "the", "the", "with", + "ms", + "mrs", + "mr", + "dr", ] new_text = "" for word in text.split(" "): @@ -121,26 +126,48 @@ def remove_articles(text: str) -> str: def sanitize_title(text: str, basic: bool = False) -> str: # normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2 - text = unicodedata.normalize("NFKD", text) - # comicvine keeps apostrophes a part of the word - text = text.replace("'", "") - text = text.replace('"', "") - if not basic: - # comicvine ignores punctuation and accents, TODO: only remove punctuation accents and similar - text = re.sub(r"[^A-Za-z0-9]+", " ", text) + text = unicodedata.normalize("NFKD", text).casefold() + if basic: + # comicvine keeps apostrophes a part of the word + text = text.replace("'", "") + text = text.replace('"', "") + else: + # comicvine ignores punctuation and accents + # remove all characters that are not a letter, separator (space) or number + # replace any "dash punctuation" with a space + # makes sure that batman-superman and self-proclaimed stay separate words + text = "".join( + c if not unicodedata.category(c) in ("Pd",) else " " + for c in text + if unicodedata.category(c)[0] in "LZN" or unicodedata.category(c) in ("Pd",) + ) # remove extra space and articles and all lower case - text = remove_articles(text).casefold().strip() + text = remove_articles(text).strip() return text -def unique_file(file_name: str) -> str: +def titles_match(search_title: str, record_title: str, threshold: int = 90) -> int: + sanitized_search = sanitize_title(search_title) + sanitized_record = sanitize_title(record_title) + ratio = thefuzz.fuzz.ratio(sanitized_search, sanitized_record) + logger.debug( + "search title: %s ; record title: %s ; ratio: %d ; match threshold: %d", + search_title, + record_title, + ratio, + threshold, + ) + return ratio >= threshold + + +def unique_file(file_name: pathlib.Path) -> pathlib.Path: + name = file_name.name counter = 1 - file_name_parts = os.path.splitext(file_name) while True: - if not os.path.lexists(file_name): + if not file_name.exists(): return file_name - file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1] + file_name = file_name.with_name(name + " (" + str(counter) + ")") counter += 1 @@ -204,7 +231,7 @@ class ImprintDict(dict): if the key does not exist the key is returned as the publisher unchanged """ - def __init__(self, publisher: str, mapping=(), **kwargs) -> None: + def __init__(self, publisher: str, mapping: tuple | Mapping = (), **kwargs: dict) -> None: super().__init__(mapping, **kwargs) self.publisher = publisher diff --git a/comictaggerlib/applicationlogwindow.py b/comictaggerlib/applicationlogwindow.py index f37fa26..9a3c2e2 100644 --- a/comictaggerlib/applicationlogwindow.py +++ b/comictaggerlib/applicationlogwindow.py @@ -12,18 +12,18 @@ logger = logging.getLogger(__name__) class QTextEditLogger(QtCore.QObject, logging.Handler): qlog = QtCore.pyqtSignal(str) - def __init__(self, formatter: logging.Formatter, level: int): + def __init__(self, formatter: logging.Formatter, level: int) -> None: super().__init__() self.setFormatter(formatter) self.setLevel(level) - def emit(self, record): + def emit(self, record: logging.LogRecord) -> None: msg = self.format(record) self.qlog.emit(msg.strip()) class ApplicationLogWindow(QtWidgets.QDialog): - def __init__(self, log_handler: QTextEditLogger, parent=None): + def __init__(self, log_handler: QTextEditLogger, parent: QtCore.QObject = None) -> None: super().__init__(parent) uic.loadUi(ComicTaggerSettings.get_ui_file("logwindow.ui"), self) @@ -43,7 +43,7 @@ class ApplicationLogWindow(QtWidgets.QDialog): self._button.clicked.connect(self.test) self.textEdit.setTabStopDistance(self.textEdit.tabStopDistance() * 2) - def test(self): + def test(self) -> None: logger.debug("damn, a bug") logger.info("something to remember") logger.warning("that's not right") diff --git a/comictaggerlib/autotagstartwindow.py b/comictaggerlib/autotagstartwindow.py index a48d418..c26e8cd 100644 --- a/comictaggerlib/autotagstartwindow.py +++ b/comictaggerlib/autotagstartwindow.py @@ -17,7 +17,7 @@ from __future__ import annotations import logging -from PyQt5 import QtCore, QtGui, QtWidgets, uic +from PyQt5 import QtCore, QtWidgets, uic from comictaggerlib.settings import ComicTaggerSettings @@ -39,7 +39,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.cbxSpecifySearchString.setChecked(False) self.cbxSplitWords.setChecked(False) - self.leNameLengthMatchTolerance.setText(str(self.settings.id_length_delta_thresh)) + self.sbNameMatchSearchThresh.setValue(self.settings.id_series_match_identify_thresh) self.leSearchString.setEnabled(False) self.cbxSaveOnLowConfidence.setChecked(self.settings.save_on_low_confidence) @@ -49,13 +49,12 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.cbxRemoveAfterSuccess.setChecked(self.settings.remove_archive_after_successful_match) self.cbxAutoImprint.setChecked(self.settings.auto_imprint) - nlmt_tip = """ The Name Length Match Tolerance is for eliminating automatic - search matches that are too long compared to your series name search. The higher + nlmt_tip = """The Name Match Ratio Threshold: Auto-Identify is for eliminating automatic + search matches that are too long compared to your series name search. The lower it is, the more likely to have a good match, but each search will take longer and - use more bandwidth. Too low, and only the very closest lexical matches will be - explored.""" + use more bandwidth. Too high, and only the very closest matches will be explored.""" - self.leNameLengthMatchTolerance.setToolTip(nlmt_tip) + self.sbNameMatchSearchThresh.setToolTip(nlmt_tip) ss_tip = """ The series search string specifies the search string to be used for all selected archives. @@ -65,9 +64,6 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.leSearchString.setToolTip(ss_tip) self.cbxSpecifySearchString.setToolTip(ss_tip) - validator = QtGui.QIntValidator(0, 99, self) - self.leNameLengthMatchTolerance.setValidator(validator) - self.cbxSpecifySearchString.stateChanged.connect(self.search_string_toggle) self.auto_save_on_low = False @@ -76,7 +72,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.ignore_leading_digits_in_filename = False self.remove_after_success = False self.search_string = "" - self.name_length_match_tolerance = self.settings.id_length_delta_thresh + self.name_length_match_tolerance = self.settings.id_series_match_search_thresh self.split_words = self.cbxSplitWords.isChecked() def search_string_toggle(self) -> None: @@ -91,7 +87,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.assume_issue_one = self.cbxAssumeIssueOne.isChecked() self.ignore_leading_digits_in_filename = self.cbxIgnoreLeadingDigitsInFilename.isChecked() self.remove_after_success = self.cbxRemoveAfterSuccess.isChecked() - self.name_length_match_tolerance = int(self.leNameLengthMatchTolerance.text()) + self.name_length_match_tolerance = int(self.leNameMatchThresh.text()) self.split_words = self.cbxSplitWords.isChecked() # persist some settings diff --git a/comictaggerlib/cbltransformer.py b/comictaggerlib/cbltransformer.py index d858e3a..34194c3 100644 --- a/comictaggerlib/cbltransformer.py +++ b/comictaggerlib/cbltransformer.py @@ -32,7 +32,7 @@ class CBLTransformer: # helper funcs def append_to_tags_if_unique(item: str) -> None: if item.casefold() not in (tag.casefold() for tag in self.metadata.tags): - self.metadata.tags.append(item) + self.metadata.tags.add(item) def add_string_list_to_tags(str_list: str | None) -> None: if str_list: diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 84681b0..8c20967 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -20,7 +20,7 @@ import argparse import json import logging import os -import shutil +import pathlib import sys from pprint import pprint @@ -28,7 +28,7 @@ from comicapi import utils from comicapi.comicarchive import ComicArchive, MetaDataStyle from comicapi.genericmetadata import GenericMetadata from comictaggerlib.cbltransformer import CBLTransformer -from comictaggerlib.filerenamer import FileRenamer +from comictaggerlib.filerenamer import FileRenamer, get_rename_dir from comictaggerlib.issueidentifier import IssueIdentifier from comictaggerlib.resulttypes import IssueResult, MultipleMatch, OnlineMatchResults from comictaggerlib.settings import ComicTaggerSettings @@ -514,21 +514,18 @@ def process_file_cli( ) return - folder = os.path.dirname(os.path.abspath(filename)) - if settings.rename_move_dir and len(settings.rename_dir.strip()) > 3: - folder = settings.rename_dir.strip() + folder = get_rename_dir(ca, settings.rename_dir if settings.rename_move_dir else None) - new_abs_path = utils.unique_file(os.path.join(folder, new_name)) + full_path = folder / new_name - if os.path.join(folder, new_name) == os.path.abspath(filename): + if full_path == ca.path: print(msg_hdr + "Filename is already good!", file=sys.stderr) return suffix = "" if not opts.dryrun: # rename the file - os.makedirs(os.path.dirname(new_abs_path), 0o777, True) - shutil.move(filename, new_abs_path) + ca.rename(utils.unique_file(full_path)) else: suffix = " (dry-run, no change)" @@ -539,18 +536,18 @@ def process_file_cli( if batch_mode: msg_hdr = f"{ca.path}: " - if not ca.is_rar(): - logger.error(msg_hdr + "Archive is not a RAR.") + if ca.is_zip(): + logger.error(msg_hdr + "Archive is already a zip file.") return - rar_file = os.path.abspath(os.path.abspath(filename)) - new_file = os.path.splitext(rar_file)[0] + ".cbz" + filename_path = pathlib.Path(filename).absolute() + new_file = filename_path.with_suffix(".cbz") - if opts.abort_on_conflict and os.path.lexists(new_file): - print(msg_hdr + f"{os.path.split(new_file)[1]} already exists in the that folder.") + if opts.abort_on_conflict and new_file.exists(): + print(msg_hdr + f"{new_file.name} already exists in the that folder.") return - new_file = utils.unique_file(os.path.join(new_file)) + new_file = utils.unique_file(new_file) delete_success = False export_success = False @@ -559,16 +556,14 @@ def process_file_cli( export_success = True if opts.delete_after_zip_export: try: - os.unlink(rar_file) - except OSError: - logger.exception(msg_hdr + "Error deleting original RAR after export") - delete_success = False - else: + filename_path.unlink(missing_ok=True) delete_success = True + except OSError: + logger.exception(msg_hdr + "Error deleting original archive after export") + delete_success = False else: # last export failed, so remove the zip, if it exists - if os.path.lexists(new_file): - os.remove(new_file) + new_file.unlink(missing_ok=True) else: msg = msg_hdr + f"Dry-run: Would try to create {os.path.split(new_file)[1]}" if opts.delete_after_zip_export: diff --git a/comictaggerlib/coverimagewidget.py b/comictaggerlib/coverimagewidget.py index 3300ed2..4c06410 100644 --- a/comictaggerlib/coverimagewidget.py +++ b/comictaggerlib/coverimagewidget.py @@ -92,6 +92,7 @@ class CoverImageWidget(QtWidgets.QWidget): ) -> None: super().__init__(parent) + self.cover_fetcher = ImageFetcher() uic.loadUi(ComicTaggerSettings.get_ui_file("coverimagewidget.ui"), self) reduce_widget_font_size(self.label) @@ -195,7 +196,7 @@ class CoverImageWidget(QtWidgets.QWidget): self.update_content() - def primary_url_fetch_complete(self, primary_url: str, thumb_url: str | None) -> None: + def primary_url_fetch_complete(self, primary_url: str, thumb_url: str | None = None) -> None: self.url_list.append(str(primary_url)) self.imageIndex = 0 self.imageCount = len(self.url_list) diff --git a/comictaggerlib/filerenamer.py b/comictaggerlib/filerenamer.py index fd41e38..975aacc 100644 --- a/comictaggerlib/filerenamer.py +++ b/comictaggerlib/filerenamer.py @@ -25,12 +25,22 @@ from typing import Any, cast from pathvalidate import sanitize_filename +from comicapi.comicarchive import ComicArchive from comicapi.genericmetadata import GenericMetadata from comicapi.issuestring import IssueString logger = logging.getLogger(__name__) +def get_rename_dir(ca: ComicArchive, rename_dir: str | pathlib.Path | None) -> pathlib.Path: + folder = ca.path.parent.absolute() + if rename_dir is not None: + if isinstance(rename_dir, str): + rename_dir = rename_dir.strip() + folder = pathlib.Path(rename_dir).absolute() + return folder + + class MetadataFormatter(string.Formatter): def __init__(self, smart_cleanup: bool = False, platform: str = "auto") -> None: super().__init__() @@ -80,17 +90,9 @@ class MetadataFormatter(string.Formatter): field_name = field_name.casefold() # this is some markup, find the object and do the formatting - # handle arg indexing when empty field_names are given. - if field_name == "": - if auto_arg_index is False: - raise ValueError("cannot switch from manual field specification to automatic field numbering") - field_name = str(auto_arg_index) - auto_arg_index += 1 - elif field_name.isdigit(): - if auto_arg_index: - raise ValueError("cannot switch from manual field specification to automatic field numbering") - # disable auto arg incrementing, if it gets used later on, then an exception will be raised - auto_arg_index = False + # handle arg indexing when digit field_names are given. + if field_name.isdigit(): + raise ValueError("cannot use a number as a field name") # given the field_name, find the object it references # and the argument it came from @@ -101,8 +103,8 @@ class MetadataFormatter(string.Formatter): obj = self.convert_field(obj, conversion) # type: ignore # expand the format spec, if needed - format_spec, auto_arg_index = self._vformat( - cast(str, format_spec), args, kwargs, used_args, recursion_depth - 1, auto_arg_index=auto_arg_index + format_spec, _ = self._vformat( + cast(str, format_spec), args, kwargs, used_args, recursion_depth - 1, auto_arg_index=False ) # format the object and append to the result @@ -118,7 +120,7 @@ class MetadataFormatter(string.Formatter): fmt_obj = str(sanitize_filename(fmt_obj, platform=self.platform)) result.append(fmt_obj) - return "".join(result), auto_arg_index + return "".join(result), False class FileRenamer: @@ -168,16 +170,17 @@ class FileRenamer: md_dict["month_name"] = "" md_dict["month_abbr"] = "" - for Component in pathlib.PureWindowsPath(template).parts: + new_basename = "" + for component in pathlib.PureWindowsPath(template).parts: if ( self.platform.casefold() in ["universal", "windows"] or sys.platform.casefold() in ["windows"] ) and self.smart_cleanup: # colons get special treatment - Component = Component.replace(": ", " - ") - Component = Component.replace(":", "-") + component = component.replace(": ", " - ") + component = component.replace(":", "-") new_basename = str( - sanitize_filename(fmt.vformat(Component, args=[], kwargs=Default(md_dict)), platform=self.platform) + sanitize_filename(fmt.vformat(component, args=[], kwargs=Default(md_dict)), platform=self.platform) ).strip() new_name = os.path.join(new_name, new_basename) diff --git a/comictaggerlib/fileselectionlist.py b/comictaggerlib/fileselectionlist.py index aeb6dd4..385ae18 100644 --- a/comictaggerlib/fileselectionlist.py +++ b/comictaggerlib/fileselectionlist.py @@ -17,13 +17,16 @@ from __future__ import annotations import logging import os +import platform from typing import Callable, cast from PyQt5 import QtCore, QtWidgets, uic from comicapi import utils from comicapi.comicarchive import ComicArchive +from comictaggerlib.optionalmsgdialog import OptionalMessageDialog from comictaggerlib.settings import ComicTaggerSettings +from comictaggerlib.settingswindow import linuxRarHelp, macRarHelp, windowsRarHelp from comictaggerlib.ui.qtutils import center_window_on_parent, reduce_widget_font_size logger = logging.getLogger(__name__) @@ -88,6 +91,7 @@ class FileSelectionList(QtWidgets.QWidget): self.addAction(self.separator) self.dirty_flag_verification = dirty_flag_verification + self.rar_ro_shown = False def get_sorting(self) -> tuple[int, int]: col = self.twList.horizontalHeader().sortIndicatorSection() @@ -190,6 +194,7 @@ class FileSelectionList(QtWidgets.QWidget): QtCore.QCoreApplication.processEvents() first_added = None + rar_added = False self.twList.setSortingEnabled(False) for idx, f in enumerate(filelist): QtCore.QCoreApplication.processEvents() @@ -200,8 +205,12 @@ class FileSelectionList(QtWidgets.QWidget): center_window_on_parent(progdialog) QtCore.QCoreApplication.processEvents() row = self.add_path_item(f) - if first_added is None and row is not None: - first_added = row + if row is not None: + ca = self.get_archive_by_row(row) + if ca and ca.is_rar(): + rar_added = True + if first_added is None: + first_added = row progdialog.hide() QtCore.QCoreApplication.processEvents() @@ -216,6 +225,9 @@ class FileSelectionList(QtWidgets.QWidget): else: QtWidgets.QMessageBox.information(self, "File/Folder Open", "No readable comic archives were found.") + if rar_added and not utils.which(self.settings.rar_exe_path or "rar"): + self.rar_ro_message() + self.twList.setSortingEnabled(True) # Adjust column size @@ -229,6 +241,26 @@ class FileSelectionList(QtWidgets.QWidget): if self.twList.columnWidth(FileSelectionList.folderColNum) > 200: self.twList.setColumnWidth(FileSelectionList.folderColNum, 200) + def rar_ro_message(self) -> None: + if not self.rar_ro_shown: + if platform.system() == "Windows": + rar_help = windowsRarHelp + + elif platform.system() == "Darwin": + rar_help = macRarHelp + + else: + rar_help = linuxRarHelp + + OptionalMessageDialog.msg_no_checkbox( + self, + "RAR Files are Read-Only", + "It looks like you have opened a RAR/CBR archive,\n" + "however ComicTagger cannot currently write to them without the rar program and are marked read only!\n\n" + f"{rar_help}", + ) + self.rar_ro_shown = True + def is_list_dupe(self, path: str) -> bool: return self.get_current_list_row(path) >= 0 @@ -344,7 +376,7 @@ class FileSelectionList(QtWidgets.QWidget): try: fi.ca.read_cix() except Exception: - ... + pass fi.ca.has_cbi() def get_selected_archive_list(self) -> list[ComicArchive]: diff --git a/comictaggerlib/imagefetcher.py b/comictaggerlib/imagefetcher.py index f88d66a..a229ac4 100644 --- a/comictaggerlib/imagefetcher.py +++ b/comictaggerlib/imagefetcher.py @@ -38,7 +38,7 @@ logger = logging.getLogger(__name__) class ImageFetcherException(Exception): - pass + ... def fetch_complete(image_data: bytes | QtCore.QByteArray) -> None: diff --git a/comictaggerlib/imagehasher.py b/comictaggerlib/imagehasher.py index 0d21203..f8662ff 100755 --- a/comictaggerlib/imagehasher.py +++ b/comictaggerlib/imagehasher.py @@ -49,7 +49,7 @@ class ImageHasher: def average_hash(self) -> int: try: - image = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert("L") + image = self.image.resize((self.width, self.height), Image.Resampling.LANCZOS).convert("L") except Exception: logger.exception("average_hash error") return 0 diff --git a/comictaggerlib/imagepopup.py b/comictaggerlib/imagepopup.py index ff72c51..1c63cfa 100644 --- a/comictaggerlib/imagepopup.py +++ b/comictaggerlib/imagepopup.py @@ -51,7 +51,7 @@ class ImagePopup(QtWidgets.QDialog): self.clientBgPixmap = bg.scaled( screen_size.width(), screen_size.height(), - QtCore.Qt.AspectRatioMode.KeepAspectRatio, + QtCore.Qt.AspectRatioMode.IgnoreAspectRatio, QtCore.Qt.SmoothTransformation, ) self.setMask(self.clientBgPixmap.mask()) diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index 35704a4..b83c0b8 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -59,11 +59,11 @@ class Score(TypedDict): class IssueIdentifierNetworkError(Exception): - pass + ... class IssueIdentifierCancelled(Exception): - pass + ... class IssueIdentifier: @@ -98,7 +98,7 @@ class IssueIdentifier: # used to eliminate series names that are too long based on our search # string - self.length_delta_thresh = settings.id_length_delta_thresh + self.series_match_thresh = settings.id_series_match_identify_thresh # used to eliminate unlikely publishers self.publisher_filter = [s.strip().casefold() for s in settings.id_publisher_filter.split(",")] @@ -122,8 +122,8 @@ class IssueIdentifier: def set_additional_metadata(self, md: GenericMetadata) -> None: self.additional_metadata = md - def set_name_length_delta_threshold(self, delta: int) -> None: - self.length_delta_thresh = delta + def set_name_series_match_threshold(self, delta: int) -> None: + self.series_match_thresh = delta def set_publisher_filter(self, flt: list[str]) -> None: self.publisher_filter = flt @@ -177,35 +177,29 @@ class IssueIdentifier: def get_search_keys(self) -> SearchKeys: ca = self.comic_archive - search_keys: SearchKeys = { - "series": None, - "issue_number": None, - "month": None, - "year": None, - "issue_count": None, - } if ca is None: return None - + search_keys: SearchKeys if self.only_use_additional_meta_data: - search_keys["series"] = self.additional_metadata.series - search_keys["issue_number"] = self.additional_metadata.issue - search_keys["year"] = self.additional_metadata.year - search_keys["month"] = self.additional_metadata.month - search_keys["issue_count"] = self.additional_metadata.issue_count + search_keys = SearchKeys( + series=self.additional_metadata.series, + issue_number=self.additional_metadata.issue, + year=self.additional_metadata.year, + month=self.additional_metadata.month, + issue_count=self.additional_metadata.issue_count, + ) return search_keys # see if the archive has any useful meta data for searching with - if ca.has_cix(): - try: + try: + if ca.has_cix(): internal_metadata = ca.read_cix() - except Exception as e: - logger.error("Failed to load metadata for %s: %s", ca.path, e) - elif ca.has_cbi(): - internal_metadata = ca.read_cbi() - else: - internal_metadata = ca.read_cbi() + else: + internal_metadata = ca.read_cbi() + except Exception as e: + internal_metadata = GenericMetadata() + logger.error("Failed to load metadata for %s: %s", ca.path, e) # try to get some metadata from filename md_from_filename = ca.metadata_from_filename( @@ -215,45 +209,22 @@ class IssueIdentifier: self.settings.remove_publisher, ) + working_md = md_from_filename.copy() + + working_md.overlay(internal_metadata) + working_md.overlay(self.additional_metadata) + # preference order: # 1. Additional metadata # 1. Internal metadata # 1. Filename metadata - - if self.additional_metadata.series is not None: - search_keys["series"] = self.additional_metadata.series - elif internal_metadata.series is not None: - search_keys["series"] = internal_metadata.series - else: - search_keys["series"] = md_from_filename.series - - if self.additional_metadata.issue is not None: - search_keys["issue_number"] = self.additional_metadata.issue - elif internal_metadata.issue is not None: - search_keys["issue_number"] = internal_metadata.issue - else: - search_keys["issue_number"] = md_from_filename.issue - - if self.additional_metadata.year is not None: - search_keys["year"] = self.additional_metadata.year - elif internal_metadata.year is not None: - search_keys["year"] = internal_metadata.year - else: - search_keys["year"] = md_from_filename.year - - if self.additional_metadata.month is not None: - search_keys["month"] = self.additional_metadata.month - elif internal_metadata.month is not None: - search_keys["month"] = internal_metadata.month - else: - search_keys["month"] = md_from_filename.month - - if self.additional_metadata.issue_count is not None: - search_keys["issue_count"] = self.additional_metadata.issue_count - elif internal_metadata.issue_count is not None: - search_keys["issue_count"] = internal_metadata.issue_count - else: - search_keys["issue_count"] = md_from_filename.issue_count + search_keys = SearchKeys( + series=working_md.series, + issue_number=working_md.issue, + year=working_md.year, + month=working_md.month, + issue_count=working_md.issue_count, + ) return search_keys @@ -294,9 +265,7 @@ class IssueIdentifier: if self.cover_url_callback is not None: self.cover_url_callback(url_image_data) - remote_cover_list = [] - - remote_cover_list.append(Score({"url": primary_img_url, "hash": self.calculate_hash(url_image_data)})) + remote_cover_list = [Score(url=primary_img_url, hash=self.calculate_hash(url_image_data))] if self.cancel: raise IssueIdentifierCancelled @@ -317,7 +286,7 @@ class IssueIdentifier: if self.cover_url_callback is not None: self.cover_url_callback(alt_url_image_data) - remote_cover_list.append(Score({"url": alt_url, "hash": self.calculate_hash(alt_url_image_data)})) + remote_cover_list.append(Score(url=alt_url, hash=self.calculate_hash(alt_url_image_data))) if self.cancel: raise IssueIdentifierCancelled @@ -332,9 +301,7 @@ class IssueIdentifier: for local_cover_hash in local_cover_hash_list: for remote_cover_item in remote_cover_list: score = ImageHasher.hamming_distance(local_cover_hash, remote_cover_item["hash"]) - score_list.append( - Score({"score": score, "url": remote_cover_item["url"], "hash": remote_cover_item["hash"]}) - ) + score_list.append(Score(score=score, url=remote_cover_item["url"], hash=remote_cover_item["hash"])) if use_log: self.log_msg(score, False) @@ -431,15 +398,13 @@ class IssueIdentifier: if int(keys["year"]) < int(item["start_year"]): date_approved = False - # assume that our search name is close to the actual name, say - # within ,e.g. 5 chars - # sanitize both the search string and the result so that - # we are comparing the same type of data - shortened_key = utils.sanitize_title(keys["series"]) - shortened_item_name = utils.sanitize_title(item["name"]) - if len(shortened_item_name) < (len(shortened_key) + self.length_delta_thresh): - length_approved = True - + aliases = [] + if item["aliases"]: + aliases = item["aliases"].split("\n") + for name in [item["name"], *aliases]: + if utils.titles_match(keys["series"], name, self.series_match_thresh): + length_approved = True + break # remove any series from publishers on the filter if item["publisher"] is not None: publisher = item["publisher"] diff --git a/comictaggerlib/issueselectionwindow.py b/comictaggerlib/issueselectionwindow.py index 1ec92ce..2cb3b60 100644 --- a/comictaggerlib/issueselectionwindow.py +++ b/comictaggerlib/issueselectionwindow.py @@ -138,18 +138,18 @@ class IssueSelectionWindow(QtWidgets.QDialog): if len(parts) > 1: item_text = parts[0] + "-" + parts[1] - QTW_item = QtWidgets.QTableWidgetItem(item_text) - QTW_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, item_text) - QTW_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) - self.twList.setItem(row, 1, QTW_item) + qtw_item = QtWidgets.QTableWidgetItem(item_text) + qtw_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, item_text) + qtw_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) + self.twList.setItem(row, 1, qtw_item) item_text = record["name"] if item_text is None: item_text = "" - QTW_item = QtWidgets.QTableWidgetItem(item_text) - QTW_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, item_text) - QTW_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) - self.twList.setItem(row, 2, QTW_item) + qtw_item = QtWidgets.QTableWidgetItem(item_text) + qtw_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, item_text) + qtw_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) + self.twList.setItem(row, 2, qtw_item) if ( IssueString(record["issue_number"]).as_string().casefold() diff --git a/comictaggerlib/main.py b/comictaggerlib/main.py index 97e0bac..f119782 100755 --- a/comictaggerlib/main.py +++ b/comictaggerlib/main.py @@ -93,7 +93,7 @@ try: except ImportError as e: def show_exception_box(log_msg: str) -> None: - pass + ... logger.error(str(e)) qt_available = False @@ -116,7 +116,7 @@ def update_publishers() -> None: def ctmain() -> None: opts = parse_cmd_line() - SETTINGS = ComicTaggerSettings(opts.config_path) + settings = ComicTaggerSettings(opts.config_path) os.makedirs(ComicTaggerSettings.get_settings_folder() / "logs", exist_ok=True) stream_handler = logging.StreamHandler() @@ -149,18 +149,18 @@ def ctmain() -> None: for pkg in sorted(importlib_metadata.distributions(), key=lambda x: x.name): logger.debug("%s\t%s", pkg.name, pkg.version) - talker_api = ComicTalker(SETTINGS.comic_info_source) + talker_api = ComicTalker(settings.comic_info_source) utils.load_publishers() update_publishers() if not qt_available and not opts.no_gui: opts.no_gui = True - logger.warn("PyQt5 is not available. ComicTagger is limited to command-line mode.") + logger.warning("PyQt5 is not available. ComicTagger is limited to command-line mode.") if opts.no_gui: try: - cli.cli_mode(opts, SETTINGS, talker_api) + cli.cli_mode(opts, settings, talker_api) except Exception: logger.exception("CLI mode failed") else: @@ -196,7 +196,7 @@ def ctmain() -> None: QtWidgets.QApplication.processEvents() try: - tagger_window = TaggerWindow(opts.files, SETTINGS, talker_api, opts=opts) + tagger_window = TaggerWindow(opts.files, settings, talker_api, opts=opts) tagger_window.setWindowIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("app.png"))) tagger_window.show() diff --git a/comictaggerlib/optionalmsgdialog.py b/comictaggerlib/optionalmsgdialog.py index 6ba86b2..23c2de3 100644 --- a/comictaggerlib/optionalmsgdialog.py +++ b/comictaggerlib/optionalmsgdialog.py @@ -112,3 +112,14 @@ class OptionalMessageDialog(QtWidgets.QDialog): d.exec() return d.was_accepted, d.theCheckBox.isChecked() + + @staticmethod + def msg_no_checkbox( + parent: QtWidgets.QWidget, title: str, msg: str, checked: bool = False, check_text: str = "" + ) -> bool: + + d = OptionalMessageDialog(parent, StyleMessage, title, msg, checked=checked, check_text=check_text) + d.theCheckBox.hide() + + d.exec() + return d.theCheckBox.isChecked() diff --git a/comictaggerlib/pagelisteditor.py b/comictaggerlib/pagelisteditor.py index ca93d4f..85530ee 100644 --- a/comictaggerlib/pagelisteditor.py +++ b/comictaggerlib/pagelisteditor.py @@ -120,10 +120,10 @@ class PageListEditor(QtWidgets.QWidget): if show_shortcut: text = text + " (" + shortcut + ")" self.cbPageType.addItem(text, user_data) - actionItem = QtWidgets.QAction(shortcut, self) - actionItem.triggered.connect(lambda: self.select_page_type_item(self.cbPageType.findData(user_data))) - actionItem.setShortcut(shortcut) - self.addAction(actionItem) + action_item = QtWidgets.QAction(shortcut, self) + action_item.triggered.connect(lambda: self.select_page_type_item(self.cbPageType.findData(user_data))) + action_item.setShortcut(shortcut) + self.addAction(action_item) def select_page_type_item(self, idx: int) -> None: if self.cbPageType.isEnabled(): @@ -133,19 +133,19 @@ class PageListEditor(QtWidgets.QWidget): def get_new_indexes(self, movement: int) -> list[tuple[int, int]]: selection = self.listWidget.selectionModel().selectedRows() selection.sort(reverse=movement > 0) - newindexes: list[int] = [] - oldindexes: list[int] = [] + new_indexes: list[int] = [] + old_indexes: list[int] = [] for x in selection: current = x.row() - oldindexes.append(current) + old_indexes.append(current) if 0 <= current + movement <= self.listWidget.count() - 1: - if len(newindexes) < 1 or current + movement != newindexes[-1]: + if len(new_indexes) < 1 or current + movement != new_indexes[-1]: current += movement - newindexes.append(current) - oldindexes.sort() - newindexes.sort() - return list(zip(newindexes, oldindexes)) + new_indexes.append(current) + old_indexes.sort() + new_indexes.sort() + return list(zip(new_indexes, old_indexes)) def set_selection(self, indexes: list[tuple[int, int]]) -> list[tuple[int, int]]: selection_ranges: list[tuple[int, int]] = [] diff --git a/comictaggerlib/renamewindow.py b/comictaggerlib/renamewindow.py index 7f5af28..a34df90 100644 --- a/comictaggerlib/renamewindow.py +++ b/comictaggerlib/renamewindow.py @@ -16,15 +16,13 @@ from __future__ import annotations import logging -import os -import shutil -from typing import TypedDict from PyQt5 import QtCore, QtWidgets, uic from comicapi import utils from comicapi.comicarchive import ComicArchive, MetaDataStyle -from comictaggerlib.filerenamer import FileRenamer +from comicapi.genericmetadata import GenericMetadata +from comictaggerlib.filerenamer import FileRenamer, get_rename_dir from comictaggerlib.settings import ComicTaggerSettings from comictaggerlib.settingswindow import SettingsWindow from comictaggerlib.ui.qtutils import center_window_on_parent @@ -33,11 +31,6 @@ from comictalker.comictalker import ComicTalker logger = logging.getLogger(__name__) -class RenameItem(TypedDict): - archive: ComicArchive - new_name: str - - class RenameWindow(QtWidgets.QDialog): def __init__( self, @@ -64,35 +57,28 @@ class RenameWindow(QtWidgets.QDialog): self.talker_api = talker_api self.comic_archive_list = comic_archive_list self.data_style = data_style - self.rename_list: list[RenameItem] = [] + self.rename_list: list[str] = [] self.btnSettings.clicked.connect(self.modify_settings) self.renamer = FileRenamer(None, platform="universal" if self.settings.rename_strict else "auto") - self.config_renamer() self.do_preview() - def config_renamer(self) -> None: + def config_renamer(self, ca: ComicArchive, md: GenericMetadata | None = None) -> str: self.renamer.set_template(self.settings.rename_template) self.renamer.set_issue_zero_padding(self.settings.rename_issue_number_padding) self.renamer.set_smart_cleanup(self.settings.rename_use_smart_string_cleanup) - def do_preview(self) -> None: - self.twList.setRowCount(0) - - self.twList.setSortingEnabled(False) - - for ca in self.comic_archive_list: - - new_ext = ca.path.suffix # default - if self.settings.rename_extension_based_on_archive: - if ca.is_sevenzip(): - new_ext = ".cb7" - elif ca.is_zip(): - new_ext = ".cbz" - elif ca.is_rar(): - new_ext = ".cbr" + new_ext = ca.path.suffix # default + if self.settings.rename_extension_based_on_archive: + if ca.is_sevenzip(): + new_ext = ".cb7" + elif ca.is_zip(): + new_ext = ".cbz" + elif ca.is_rar(): + new_ext = ".cbr" + if md is None: md = ca.read_metadata(self.data_style) if md.is_empty: md = ca.metadata_from_filename( @@ -101,9 +87,17 @@ class RenameWindow(QtWidgets.QDialog): self.settings.remove_fcbd, self.settings.remove_publisher, ) - self.renamer.set_metadata(md) - self.renamer.move = self.settings.rename_move_dir + self.renamer.set_metadata(md) + self.renamer.move = self.settings.rename_move_dir + return new_ext + def do_preview(self) -> None: + self.twList.setRowCount(0) + + self.twList.setSortingEnabled(False) + + for ca in self.comic_archive_list: + new_ext = self.config_renamer(ca) try: new_name = self.renamer.determine_name(new_ext) except Exception as e: @@ -125,13 +119,13 @@ class RenameWindow(QtWidgets.QDialog): old_name_item = QtWidgets.QTableWidgetItem() new_name_item = QtWidgets.QTableWidgetItem() - item_text = os.path.split(ca.path)[0] + item_text = str(ca.path.parent) folder_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) self.twList.setItem(row, 0, folder_item) folder_item.setText(item_text) folder_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, item_text) - item_text = os.path.split(ca.path)[1] + item_text = str(ca.path.name) old_name_item.setFlags(QtCore.Qt.ItemFlag.ItemIsSelectable | QtCore.Qt.ItemFlag.ItemIsEnabled) self.twList.setItem(row, 1, old_name_item) old_name_item.setText(item_text) @@ -142,13 +136,7 @@ class RenameWindow(QtWidgets.QDialog): new_name_item.setText(new_name) new_name_item.setData(QtCore.Qt.ItemDataRole.ToolTipRole, new_name) - dict_item = RenameItem( - { - "archive": ca, - "new_name": new_name, - } - ) - self.rename_list.append(dict_item) + self.rename_list.append(new_name) # Adjust column sizes self.twList.setVisible(False) @@ -165,7 +153,6 @@ class RenameWindow(QtWidgets.QDialog): settingswin.show_rename_tab() settingswin.exec() if settingswin.result(): - self.config_renamer() self.do_preview() def accept(self) -> None: @@ -177,34 +164,29 @@ class RenameWindow(QtWidgets.QDialog): center_window_on_parent(prog_dialog) QtCore.QCoreApplication.processEvents() - for idx, item in enumerate(self.rename_list): + for idx, comic in enumerate(zip(self.comic_archive_list, self.rename_list)): QtCore.QCoreApplication.processEvents() if prog_dialog.wasCanceled(): break idx += 1 prog_dialog.setValue(idx) - prog_dialog.setLabelText(item["new_name"]) + prog_dialog.setLabelText(comic[1]) center_window_on_parent(prog_dialog) QtCore.QCoreApplication.processEvents() - folder = os.path.dirname(os.path.abspath(item["archive"].path)) - if self.settings.rename_move_dir and len(self.settings.rename_dir.strip()) > 3: - folder = self.settings.rename_dir.strip() + folder = get_rename_dir(comic[0], self.settings.rename_dir if self.settings.rename_move_dir else None) - new_abs_path = utils.unique_file(os.path.join(folder, item["new_name"])) + full_path = folder / comic[1] - if os.path.join(folder, item["new_name"]) == item["archive"].path: - logger.info(item["new_name"], "Filename is already good!") + if full_path == comic[0].path: + logger.info("%s: Filename is already good!", comic[1]) continue - if not item["archive"].is_writable(check_rar_status=False): + if not comic[0].is_writable(check_rar_status=False): continue - os.makedirs(os.path.dirname(new_abs_path), 0o777, True) - shutil.move(item["archive"].path, new_abs_path) - - item["archive"].rename(new_abs_path) + comic[0].rename(utils.unique_file(full_path)) prog_dialog.hide() QtCore.QCoreApplication.processEvents() diff --git a/comictaggerlib/settings.py b/comictaggerlib/settings.py index 39ed6d7..df4d3a4 100644 --- a/comictaggerlib/settings.py +++ b/comictaggerlib/settings.py @@ -102,7 +102,8 @@ class ComicTaggerSettings: self.last_filelist_sorted_order = 0 # identifier settings - self.id_length_delta_thresh = 5 + self.id_series_match_search_thresh = 90 + self.id_series_match_identify_thresh = 91 self.id_publisher_filter = "Panini Comics, Abril, Planeta DeAgostini, Editorial Televisa, Dino Comics" self.comic_info_source = "comicvine" # Default to CV as should always be present @@ -181,10 +182,12 @@ class ComicTaggerSettings: elif os.path.exists(r"C:\Program Files (x86)\WinRAR\Rar.exe"): self.rar_exe_path = r"C:\Program Files (x86)\WinRAR\Rar.exe" else: + if os.path.exists("/opt/homebrew/bin"): + utils.add_to_path("/opt/homebrew/bin") # see if it's in the path of unix user rarpath = utils.which("rar") if rarpath is not None: - self.rar_exe_path = rarpath + self.rar_exe_path = "rar" if self.rar_exe_path != "": self.save() if self.rar_exe_path != "": @@ -236,8 +239,10 @@ class ComicTaggerSettings: if self.config.has_option("auto", "last_filelist_sorted_order"): self.last_filelist_sorted_order = self.config.getint("auto", "last_filelist_sorted_order") - if self.config.has_option("identifier", "id_length_delta_thresh"): - self.id_length_delta_thresh = self.config.getint("identifier", "id_length_delta_thresh") + if self.config.has_option("identifier", "id_series_match_search_thresh"): + self.id_series_match_search_thresh = self.config.getint("identifier", "id_series_match_search_thresh") + if self.config.has_option("identifier", "id_series_match_identify_thresh"): + self.id_series_match_identify_thresh = self.config.getint("identifier", "id_series_match_identify_thresh") if self.config.has_option("identifier", "id_publisher_filter"): self.id_publisher_filter = self.config.get("identifier", "id_publisher_filter") if self.config.has_option("identifier", "always_use_publisher_filter"): @@ -364,7 +369,8 @@ class ComicTaggerSettings: if not self.config.has_section("identifier"): self.config.add_section("identifier") - self.config.set("identifier", "id_length_delta_thresh", self.id_length_delta_thresh) + self.config.set("identifier", "id_series_match_search_thresh", self.id_series_match_search_thresh) + self.config.set("identifier", "id_series_match_identify_thresh", self.id_series_match_identify_thresh) self.config.set("identifier", "id_publisher_filter", self.id_publisher_filter) self.config.set("identifier", "always_use_publisher_filter", self.always_use_publisher_filter) diff --git a/comictaggerlib/settingswindow.py b/comictaggerlib/settingswindow.py index 43ecfdf..55ff693 100644 --- a/comictaggerlib/settingswindow.py +++ b/comictaggerlib/settingswindow.py @@ -18,6 +18,7 @@ from __future__ import annotations import html import logging import os +import pathlib import platform from PyQt5 import QtCore, QtGui, QtWidgets, uic @@ -33,29 +34,29 @@ from comictalker.comictalker import ComicTalker logger = logging.getLogger(__name__) windowsRarHelp = """ -

To write to CBR/RAR archives, - you will need to have the tools from - - WINRar - installed. (ComicTagger only uses the command-line rar tool, - which is free to use.)

+

To write to CBR/RAR archives, + you will need to have the tools from + + WINRar + installed. (ComicTagger only uses the command-line rar tool.) +

""" linuxRarHelp = """ -

To write to CBR/RAR archives, - you will need to have the shareware rar tool from RARLab installed. - Your package manager should have rar (e.g. "apt-get install rar"). If not, download it - - here, - and install in your path.

- """ +

To write to CBR/RAR archives, + you will need to have the shareware rar tool from RARLab installed. + Your package manager should have rar (e.g. "apt-get install rar"). If not, download it + + here, + and install in your path.

+ """ macRarHelp = """ -

To write to CBR/RAR archives, - you will need the rar tool. The easiest way to get this is - to install - homebrew. -

Once homebrew is installed, run: brew install caskroom/cask/rar +

To write to CBR/RAR archives, + you will need the rar tool. The easiest way to get this is + to install + homebrew. +

Once homebrew is installed, run: brew install caskroom/cask/rar """ @@ -105,7 +106,7 @@ Accepts the following variables: {cover artist} (string) {editor} (string) {tags} (list of str) -{pages} (list of dict({'Image': string(int), 'Type': string, 'Bookmark': string, 'DoublePage': string})) +{pages} (list of dict({'Image': string(int), 'Type': string, 'Bookmark': string, 'DoublePage': boolean})) CoMet-only items: {price} (float) @@ -155,13 +156,16 @@ class SettingsWindow(QtWidgets.QDialog): self.lblDefaultSettings.setText("Revert to default " + self.name.casefold()) self.btnResetSettings.setText("Default " + self.name) - nldt_tip = """The Default Name Length Match Tolerance is for eliminating automatic - search matches that are too long compared to your series name search. The higher + nmit_tip = """The Name Match Ratio Threshold: Auto-Identify is for eliminating automatic + search matches that are too long compared to your series name search. The lower it is, the more likely to have a good match, but each search will take longer and - use more bandwidth. Too low, and only the very closest lexical matches will be - explored.""" + use more bandwidth. Too high, and only the very closest matches will be explored.""" + nmst_tip = """The Name Match Ratio Threshold: Search is for reducing the total + number of results that are returned from a search. The lower it is, the more pages will + be returned (max 5 pages or 500 results)""" - self.leNameLengthDeltaThresh.setToolTip(nldt_tip) + self.sbNameMatchIdentifyThresh.setToolTip(nmit_tip) + self.sbNameMatchSearchThresh.setToolTip(nmst_tip) pbl_tip = """ The Publisher Filter is for eliminating automatic matches to certain publishers @@ -173,22 +177,21 @@ class SettingsWindow(QtWidgets.QDialog): validator = QtGui.QIntValidator(1, 4, self) self.leIssueNumPadding.setValidator(validator) - validator = QtGui.QIntValidator(0, 99, self) - self.leNameLengthDeltaThresh.setValidator(validator) - self.leRenameTemplate.setToolTip(f"
{html.escape(template_tooltip)}
") self.settings_to_form() self.rename_error: Exception | None = None self.rename_test() + self.dir_test() self.btnBrowseRar.clicked.connect(self.select_rar) self.btnClearCache.clicked.connect(self.clear_cache) self.btnResetSettings.clicked.connect(self.reset_settings) self.btnTemplateHelp.clicked.connect(self.show_template_help) - self.leRenameTemplate.textEdited.connect(self.rename__test) + self.leRenameTemplate.textEdited.connect(self._rename_test) self.cbxMoveFiles.clicked.connect(self.rename_test) + self.cbxMoveFiles.clicked.connect(self.dir_test) self.cbxRenameStrict.clicked.connect(self.rename_test) - self.leDirectory.textEdited.connect(self.rename_test) + self.leDirectory.textEdited.connect(self.dir_test) self.cbxComplicatedParser.clicked.connect(self.switch_parser) self.sources: dict = {} @@ -282,9 +285,14 @@ class SettingsWindow(QtWidgets.QDialog): self.cobxInfoSource.setCurrentIndex(self.cobxInfoSource.findData(self.settings.comic_info_source)) def rename_test(self) -> None: - self.rename__test(self.leRenameTemplate.text()) + self._rename_test(self.leRenameTemplate.text()) - def rename__test(self, template: str) -> None: + def dir_test(self) -> None: + self.lblDir.setText( + str(pathlib.Path(self.leDirectory.text().strip()).absolute()) if self.cbxMoveFiles.isChecked() else "" + ) + + def _rename_test(self, template: str) -> None: fr = FileRenamer(md_test, platform="universal" if self.cbxRenameStrict.isChecked() else "auto") fr.move = self.cbxMoveFiles.isChecked() fr.set_template(template) @@ -307,7 +315,8 @@ class SettingsWindow(QtWidgets.QDialog): def settings_to_form(self) -> None: # Copy values from settings to form self.leRarExePath.setText(self.settings.rar_exe_path) - self.leNameLengthDeltaThresh.setText(str(self.settings.id_length_delta_thresh)) + self.sbNameMatchIdentifyThresh.setValue(self.settings.id_series_match_identify_thresh) + self.sbNameMatchSearchThresh.setValue(self.settings.id_series_match_search_thresh) self.tePublisherFilter.setPlainText(self.settings.id_publisher_filter) self.cbxCheckForNewVersion.setChecked(self.settings.check_for_new_version) @@ -364,15 +373,13 @@ class SettingsWindow(QtWidgets.QDialog): if self.settings.rar_exe_path: utils.add_to_path(os.path.dirname(self.settings.rar_exe_path)) - if not str(self.leNameLengthDeltaThresh.text()).isdigit(): - self.leNameLengthDeltaThresh.setText("0") - if not str(self.leIssueNumPadding.text()).isdigit(): self.leIssueNumPadding.setText("0") self.settings.check_for_new_version = self.cbxCheckForNewVersion.isChecked() - self.settings.id_length_delta_thresh = int(self.leNameLengthDeltaThresh.text()) + self.settings.id_series_match_identify_thresh = self.sbNameMatchIdentifyThresh.value() + self.settings.id_series_match_search_thresh = self.sbNameMatchSearchThresh.value() self.settings.id_publisher_filter = str(self.tePublisherFilter.toPlainText()) self.settings.comic_info_source = str(self.cobxInfoSource.itemData(self.cobxInfoSource.currentIndex())) # Also change current talker_api object diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 53bfb9c..18e0e2e 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -26,7 +26,7 @@ import pprint import re import sys import webbrowser -from typing import Any, Callable, cast +from typing import Any, Callable, Iterable, cast from urllib.parse import urlparse import natsort @@ -449,16 +449,16 @@ Have fun! def repackage_archive(self) -> None: ca_list = self.fileSelectionList.get_selected_archive_list() - rar_count = 0 + non_zip_count = 0 for ca in ca_list: - if ca.is_rar(): - rar_count += 1 + if not ca.is_zip(): + non_zip_count += 1 - if rar_count == 0: + if non_zip_count == 0: QtWidgets.QMessageBox.information( - self, self.tr("Export as Zip Archive"), self.tr("No RAR archives selected!") + self, self.tr("Export as Zip Archive"), self.tr("Only ZIP archives are selected!") ) - logger.warning("Export as Zip Archive. No RAR archives selected") + logger.warning("Export as Zip Archive. Only ZIP archives are selected") return if not self.dirty_flag_verification( @@ -467,12 +467,12 @@ Have fun! ): return - if rar_count != 0: + if non_zip_count != 0: EW = ExportWindow( self, self.settings, ( - f"You have selected {rar_count} archive(s) to export to Zip format. " + f"You have selected {non_zip_count} archive(s) to export to Zip format. " """ New archives will be created in the same folder as the original. Please choose options below, and select OK. @@ -484,7 +484,7 @@ Have fun! if not EW.exec(): return - prog_dialog = QtWidgets.QProgressDialog("", "Cancel", 0, rar_count, self) + prog_dialog = QtWidgets.QProgressDialog("", "Cancel", 0, non_zip_count, self) prog_dialog.setWindowTitle("Exporting as ZIP") prog_dialog.setWindowModality(QtCore.Qt.WindowModality.ApplicationModal) prog_dialog.setMinimumDuration(300) @@ -499,7 +499,7 @@ Have fun! success_count = 0 for ca in ca_list: - if ca.is_rar(): + if not ca.is_zip(): QtCore.QCoreApplication.processEvents() if prog_dialog.wasCanceled(): break @@ -509,30 +509,30 @@ Have fun! center_window_on_parent(prog_dialog) QtCore.QCoreApplication.processEvents() - original_path = os.path.abspath(ca.path) - export_name = os.path.splitext(original_path)[0] + ".cbz" + export_name = ca.path.with_suffix(".cbz") + export = True - if os.path.lexists(export_name): + if export_name.exists(): if EW.fileConflictBehavior == ExportConflictOpts.dontCreate: - export_name = "" + export = False skipped_list.append(ca.path) elif EW.fileConflictBehavior == ExportConflictOpts.createUnique: export_name = utils.unique_file(export_name) - if export_name: + if export: if ca.export_as_zip(export_name): success_count += 1 if EW.addToList: - new_archives_to_add.append(export_name) + new_archives_to_add.append(str(export_name)) if EW.deleteOriginal: archives_to_remove.append(ca) - os.unlink(ca.path) + ca.path.unlink(missing_ok=True) else: # last export failed, so remove the zip, if it exists failed_list.append(ca.path) - if os.path.lexists(export_name): - os.remove(export_name) + if export_name.exists(): + export_name.unlink(missing_ok=True) prog_dialog.hide() QtCore.QCoreApplication.processEvents() @@ -542,13 +542,13 @@ Have fun! summary = f"Successfully created {success_count} Zip archive(s)." if len(skipped_list) > 0: summary += ( - f"\n\nThe following {len(skipped_list)} RAR archive(s) were skipped due to file name conflicts:\n" + f"\n\nThe following {len(skipped_list)} archive(s) were skipped due to file name conflicts:\n" ) for f in skipped_list: summary += f"\t{f}\n" if len(failed_list) > 0: summary += ( - f"\n\nThe following {len(failed_list)} RAR archive(s) failed to export due to read/write errors:\n" + f"\n\nThe following {len(failed_list)} archive(s) failed to export due to read/write errors:\n" ) for f in failed_list: summary += f"\t{f}\n" @@ -947,8 +947,8 @@ Have fun! tmp = self.teTags.toPlainText() if tmp is not None: - def strip_list(i: list[str]) -> list[str]: - return [x.strip() for x in i] + def strip_list(i: Iterable[str]) -> set[str]: + return {x.strip() for x in i} md.tags = strip_list(tmp.split(",")) @@ -1017,7 +1017,7 @@ Have fun! self.query_online(autoselect=True) - def literal_search(self): + def literal_search(self) -> None: self.query_online(autoselect=False, literal=True) def query_online(self, autoselect: bool = False, literal: bool = False) -> None: @@ -1162,7 +1162,6 @@ Have fun! if self.save_data_style == MetaDataStyle.CIX: # loop over credit table, mark selected rows - r = 0 for r in range(self.twCredits.rowCount()): if str(self.twCredits.item(r, 1).text()).casefold() not in cix_credits: self.twCredits.item(r, 1).setBackground(inactive_brush) @@ -1173,7 +1172,6 @@ Have fun! if self.save_data_style == MetaDataStyle.CBI: # loop over credit table, make all active color - r = 0 for r in range(self.twCredits.rowCount()): self.twCredits.item(r, 0).setBackground(active_brush) self.twCredits.item(r, 1).setBackground(active_brush) @@ -1357,16 +1355,11 @@ Have fun! def open_web_link(self) -> None: if self.leWebLink is not None: web_link = self.leWebLink.text().strip() - valid = False try: result = urlparse(web_link) - valid = all([result.scheme in ["http", "https"], result.netloc]) - except ValueError: - pass - - if valid: + all([result.scheme in ["http", "https"], result.netloc]) webbrowser.open_new_tab(web_link) - else: + except ValueError: QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid.")) def show_settings(self) -> None: @@ -1374,8 +1367,7 @@ Have fun! settingswin = SettingsWindow(self, self.settings, self.talker_api) settingswin.setModal(True) settingswin.exec() - if settingswin.result(): - pass + settingswin.result() def set_app_position(self) -> None: if self.settings.last_main_window_width != 0: @@ -1750,7 +1742,7 @@ Have fun! ii.cover_page_index = md.get_cover_page_index_list()[0] if self.atprogdialog is not None: ii.set_cover_url_callback(self.atprogdialog.set_test_image) - ii.set_name_length_delta_threshold(dlg.name_length_match_tolerance) + ii.set_name_series_match_threshold(dlg.name_length_match_tolerance) matches: list[IssueResult] = ii.search() @@ -1950,7 +1942,7 @@ Have fun! QtWidgets.QMessageBox.information(self, self.tr("Auto-Tag Summary"), self.tr(summary)) logger.info(summary) - def exception(self, message): + def exception(self, message: str) -> None: errorbox = QtWidgets.QMessageBox() errorbox.setText(message) errorbox.exec() @@ -2067,6 +2059,7 @@ Have fun! "File Rename", "If you rename files now, unsaved data in the form will be lost. Are you sure?" ): + # TODO Check talker required dlg = RenameWindow(self, ca_list, self.load_data_style, self.settings, self.talker_api) dlg.setModal(True) if dlg.exec() and self.comic_archive is not None: diff --git a/comictaggerlib/ui/TemplateHelp.ui b/comictaggerlib/ui/TemplateHelp.ui index 8be434b..5ecc5b3 100644 --- a/comictaggerlib/ui/TemplateHelp.ui +++ b/comictaggerlib/ui/TemplateHelp.ui @@ -106,7 +106,7 @@ tr:nth-child(even) { <tr><td>{cover artist}</td><td>(string)</td></tr> <tr><td>{editor}</td><td>(string)</td></tr> <tr><td>{tags}</td><td>list of str</td></tr> - <tr><td>{pages}</td><td>list of dict({'Image': string(int), 'Type': string, 'Bookmark': string, 'DoublePage': string})</td></tr> + <tr><td>{pages}</td><td>list of dict({'Image': string(int), 'Type': string, 'Bookmark': string, 'DoublePage': boolean})</td></tr> <tr><td>{price}</td><td>float</td></tr> <tr><td>{is_version_of}</td><td>string</td></tr> <tr><td>{rights}</td><td>string</td></tr> diff --git a/comictaggerlib/ui/settingswindow.ui b/comictaggerlib/ui/settingswindow.ui index 55aaa80..2753b6f 100644 --- a/comictaggerlib/ui/settingswindow.ui +++ b/comictaggerlib/ui/settingswindow.ui @@ -139,7 +139,7 @@ - <html><head/><body><p>These settings are for the automatic issue identifier which searches online for matches. They will not affect &quot;manual&quot; searching.</p><p>Hover the mouse over an entry field for more info.</p></body></html> + <html><head/><body><p>These settings are for the automatic issue identifier which searches online for matches. </p><p>Hover the mouse over an entry field for more info.</p></body></html> true @@ -164,47 +164,28 @@ - Default Name Length Match Tolerance: + Default Name Match Ratio Threshold: Search: - - - - - 0 - 0 - + + + + Default Name Match Ratio Threshold: Auto-Identify: - - - 50 - 16777215 - - - - + + Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - + - Publisher Filter: + Always use Publisher Filter on "manual" searches: - - - - 0 - 0 - - - - - <html><head/><body><p>Applies the <span style=" font-weight:600;">Publisher Filter</span> on all searches.<br/>The search window has a dynamic toggle to show the unfiltered results.</p></body></html> @@ -214,10 +195,58 @@ - - + + - Always use Publisher Filter on "manual" searches: + Publisher Filter: + + + + + + + + 0 + 0 + + + + + + + + + 60 + 16777215 + + + + % + + + 1 + + + 100 + + + + + + + + 60 + 16777215 + + + + % + + + 1 + + + 100 @@ -591,6 +620,9 @@ By default only removes restricted characters and filenames for the current Oper + + + diff --git a/comictaggerlib/volumeselectionwindow.py b/comictaggerlib/volumeselectionwindow.py index b8c58c2..6eab33c 100644 --- a/comictaggerlib/volumeselectionwindow.py +++ b/comictaggerlib/volumeselectionwindow.py @@ -19,7 +19,7 @@ import itertools import logging from collections import deque -from PyQt5 import QtCore, QtWidgets, uic +from PyQt5 import QtCore, QtGui, QtWidgets, uic from PyQt5.QtCore import pyqtSignal from comicapi import utils @@ -43,7 +43,14 @@ class SearchThread(QtCore.QThread): searchComplete = pyqtSignal() progressUpdate = pyqtSignal(int, int) - def __init__(self, talker_api: ComicTalker, series_name: str, refresh: bool, literal: bool = False) -> None: + def __init__( + self, + talker_api: ComicTalker, + series_name: str, + refresh: bool, + literal: bool = False, + series_match_thresh: int = 90, + ) -> None: QtCore.QThread.__init__(self) self.talker_api = talker_api self.series_name = series_name @@ -52,6 +59,7 @@ class SearchThread(QtCore.QThread): self.ct_error = False self.ct_search_results: list[ComicVolume] = [] self.literal = literal + self.series_match_thresh = series_match_thresh def run(self) -> None: try: @@ -326,7 +334,9 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.progdialog.canceled.connect(self.search_canceled) self.progdialog.setModal(True) self.progdialog.setMinimumDuration(300) - self.search_thread = SearchThread(self.talker_api, self.series_name, refresh, self.literal) + self.search_thread = SearchThread( + self.talker_api, self.series_name, refresh, self.literal, self.settings.id_series_match_search_thresh + ) self.search_thread.searchComplete.connect(self.search_complete) self.search_thread.progressUpdate.connect(self.search_progress_update) self.search_thread.start() @@ -408,7 +418,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog): deques: list[deque[ComicVolume]] = [deque(), deque(), deque()] - def categorize(result): + def categorize(result: ComicVolume) -> int: # We don't remove anything on this one so that we only get exact matches if utils.sanitize_title(result["name"], True).casefold() == sanitized_no_articles: return 0 @@ -468,15 +478,16 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.twList.selectRow(0) self.twList.resizeColumnsToContents() - if not self.ct_search_results: - QtCore.QCoreApplication.processEvents() - QtWidgets.QMessageBox.information(self, "Search Result", "No matches found!") - QtCore.QTimer.singleShot(200, self.close_me) + def showEvent(self, event: QtGui.QShowEvent) -> None: + if not self.ct_search_results: + QtCore.QCoreApplication.processEvents() + QtWidgets.QMessageBox.information(self, "Search Result", "No matches found!") + QtCore.QTimer.singleShot(200, self.close_me) - if self.immediate_autoselect and self.ct_search_results: - # defer the immediate autoselect so this dialog has time to pop up - QtCore.QCoreApplication.processEvents() - QtCore.QTimer.singleShot(10, self.do_immediate_autoselect) + elif self.immediate_autoselect: + # defer the immediate autoselect so this dialog has time to pop up + QtCore.QCoreApplication.processEvents() + QtCore.QTimer.singleShot(10, self.do_immediate_autoselect) def do_immediate_autoselect(self) -> None: self.immediate_autoselect = False diff --git a/comictalker/comiccacher.py b/comictalker/comiccacher.py index cfc9d39..79b9d3b 100644 --- a/comictalker/comiccacher.py +++ b/comictalker/comiccacher.py @@ -84,7 +84,8 @@ class ComicCacher: + "image_url TEXT," + "description TEXT," + "timestamp DATE DEFAULT (datetime('now','localtime'))," - + "source_name TEXT NOT NULL)" + + "source_name TEXT NOT NULL," + + "aliases TEXT)" # Newline separated ) cur.execute( @@ -96,6 +97,7 @@ class ComicCacher: + "start_year INT," + "timestamp DATE DEFAULT (datetime('now','localtime')), " + "source_name TEXT NOT NULL," + + "aliases TEXT," # Newline separated + "PRIMARY KEY (id, source_name))" ) @@ -105,6 +107,7 @@ class ComicCacher: + "url_list TEXT," + "timestamp DATE DEFAULT (datetime('now','localtime')), " + "source_name TEXT NOT NULL," + + "aliases TEXT," # Newline separated + "PRIMARY KEY (issue_id, source_name))" ) @@ -121,11 +124,11 @@ class ComicCacher: + "description TEXT," + "timestamp DATE DEFAULT (datetime('now','localtime')), " + "source_name TEXT NOT NULL," + + "aliases TEXT," # Newline separated + "PRIMARY KEY (id, source_name))" ) def add_search_results(self, source_name: str, search_term: str, ct_search_results: list[ComicVolume]) -> None: - con = lite.connect(self.db_file) with con: @@ -133,16 +136,18 @@ class ComicCacher: cur = con.cursor() # remove all previous entries with this search term - cur.execute("DELETE FROM VolumeSearchCache WHERE search_term = ?", [search_term.casefold()]) + cur.execute( + "DELETE FROM VolumeSearchCache WHERE search_term = ? AND source_name = ?", + [search_term.casefold(), source_name], + ) # now add in new results for record in ct_search_results: cur.execute( "INSERT INTO VolumeSearchCache " - + "(source_name, search_term, id, name, start_year, publisher, count_of_issues, image_url, " - + "description) " - + "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", + + "(source_name, search_term, id, name, start_year, publisher, count_of_issues, image_url, description, aliases) " + + "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ( source_name, search_term.casefold(), @@ -153,11 +158,11 @@ class ComicCacher: record["count_of_issues"], record["image"], record["description"], + record["aliases"], ), ) def get_search_results(self, source_name: str, search_term: str) -> list[ComicVolume]: - results = [] con = lite.connect(self.db_file) with con: @@ -184,7 +189,7 @@ class ComicCacher: description=record[7], publisher=record[4], image=record[6], - # "source": record[9], # Not needed? + aliases=record[10], ) results.append(result) @@ -202,7 +207,7 @@ class ComicCacher: # remove all previous entries with this search term cur.execute("DELETE FROM AltCovers WHERE issue_id=? AND source_name=?", [issue_id, source_name]) - url_list_str = ", ".join(url_list) + url_list_str = ",".join(url_list) # now add in new record cur.execute( "INSERT INTO AltCovers (source_name, issue_id, url_list) VALUES(?, ?, ?)", @@ -227,16 +232,12 @@ class ComicCacher: return [] url_list_str = row[0] - if len(url_list_str) == 0: + if not url_list_str: return [] - raw_list = url_list_str.split(",") - url_list = [] - for item in raw_list: - url_list.append(str(item).strip()) + url_list = str(url_list_str).split(",") return url_list def add_volume_info(self, source_name: str, volume_record: ComicVolume) -> None: - con = lite.connect(self.db_file) with con: @@ -253,11 +254,11 @@ class ComicCacher: "count_of_issues": volume_record["count_of_issues"], "start_year": volume_record["start_year"], "timestamp": timestamp, + "aliases": volume_record["aliases"], } self.upsert(cur, "volumes", data) def add_volume_issues_info(self, source_name: str, volume_id: int, volume_issues: list[ComicIssue]) -> None: - con = lite.connect(self.db_file) with con: @@ -280,11 +281,11 @@ class ComicCacher: "thumb_url": issue["image_thumb"], "description": issue["description"], "timestamp": timestamp, + "aliases": issue["aliases"], } self.upsert(cur, "issues", data) def get_volume_info(self, volume_id: int, source_name: str) -> ComicVolume | None: - result: ComicVolume | None = None con = lite.connect(self.db_file) @@ -298,7 +299,8 @@ class ComicCacher: # fetch cur.execute( - "SELECT source_name,id,name,publisher,count_of_issues,start_year FROM Volumes WHERE id=? AND source_name=?", + "SELECT source_name,id,name,publisher,count_of_issues,start_year,aliases FROM Volumes" + " WHERE id=? AND source_name=?", [volume_id, source_name], ) @@ -309,18 +311,17 @@ class ComicCacher: # since ID is primary key, there is only one row result = ComicVolume( - # source_name: row[0], id=row[1], name=row[2], count_of_issues=row[4], start_year=row[5], publisher=row[3], + aliases=row[6], ) return result def get_volume_issues_info(self, volume_id: int, source_name: str) -> list[ComicIssue]: - con = lite.connect(self.db_file) with con: cur = con.cursor() @@ -336,8 +337,8 @@ class ComicCacher: cur.execute( ( - "SELECT source_name,id,name,issue_number,site_detail_url,cover_date,super_url,thumb_url,description" - + " FROM Issues WHERE volume_id=? AND source_name=?" + "SELECT source_name,id,name,issue_number,site_detail_url,cover_date,super_url,thumb_url,description,aliases" + " FROM Issues WHERE volume_id=? AND source_name=?" ), [volume_id, source_name], ) @@ -352,9 +353,9 @@ class ComicCacher: site_detail_url=row[4], cover_date=row[5], image=row[6], - image_thumb=row[7], description=row[8], volume={"id": volume_id, "name": row[2]}, + aliases=row[9], ) results.append(record) @@ -362,7 +363,13 @@ class ComicCacher: return results def add_issue_select_details( - self, issue_id: int, image_url: str, thumb_image_url: str, cover_date: str, site_detail_url: str + self, + source_name: str, + issue_id: int, + image_url: str, + thumb_image_url: str, + cover_date: str, + site_detail_url: str, ) -> None: con = lite.connect(self.db_file) @@ -374,6 +381,7 @@ class ComicCacher: data = { "id": issue_id, + "source_name": source_name, "super_url": image_url, "thumb_url": thumb_image_url, "cover_date": cover_date, @@ -390,7 +398,7 @@ class ComicCacher: con.text_factory = str cur.execute( - "SELECT super_url,thumb_url,cover_date,site_detail_url FROM Issues WHERE id=? " + "AND source_name=?", + "SELECT super_url,thumb_url,cover_date,site_detail_url FROM Issues WHERE id=? AND source_name=?", [issue_id, source_name], ) row = cur.fetchone() diff --git a/comictalker/resulttypes.py b/comictalker/resulttypes.py index 967e0e2..cac96d3 100644 --- a/comictalker/resulttypes.py +++ b/comictalker/resulttypes.py @@ -11,6 +11,7 @@ class SelectDetails(TypedDict): class ComicVolume(TypedDict, total=False): + aliases: str # Newline separated count_of_issues: int description: str id: Required[int] @@ -21,6 +22,7 @@ class ComicVolume(TypedDict, total=False): class ComicIssue(TypedDict, total=False): + aliases: str # Newline separated cover_date: str description: str id: int diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 82406b5..2241fd9 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -99,6 +99,7 @@ class CVPersonCredits(TypedDict): class CVVolumeResults(TypedDict): + aliases: str count_of_issues: int description: str id: int @@ -128,6 +129,7 @@ class CVResult(TypedDict): class CVIssuesResults(TypedDict): + aliases: str cover_date: str description: str id: int @@ -186,7 +188,7 @@ class CVIssueDetailResults(TypedDict): class ComicVineTalker(TalkerBase): - def __init__(self) -> None: + def __init__(self, series_match_thresh: int = 90) -> None: super().__init__() self.source_details = source_details = SourceDetails( name="Comic Vine", @@ -256,11 +258,9 @@ class ComicVineTalker(TalkerBase): ), }, ) - # Identity name for the information source self.source_name = self.source_details.id self.source_name_friendly = self.source_details.name - # Overwrite any source_details.options that have saved settings source_settings = ComicTaggerSettings.get_source_settings( self.source_name, self.source_details.settings_options @@ -268,7 +268,6 @@ class ComicVineTalker(TalkerBase): if not source_settings: # No saved settings, do something? ... - self.wait_for_rate_limit = source_details.settings_options["wait_on_ratelimit"]["value"] self.wait_for_rate_limit_time = source_details.settings_options["ratelimit_waittime"]["value"] @@ -277,6 +276,8 @@ class ComicVineTalker(TalkerBase): self.api_key = source_details.settings_options["api_key"]["value"] self.api_base_url = source_details.settings_options["url_root"]["value"] + self.series_match_thresh = series_match_thresh + # Used for async cover loading etc. if qt_available: self.nam = QtNetwork.QNetworkAccessManager() @@ -387,6 +388,7 @@ class ComicVineTalker(TalkerBase): formatted_results.append( ComicVolume( + aliases=record["aliases"], count_of_issues=record.get("count_of_issues", 0), description=record.get("description", ""), id=record["id"], @@ -412,6 +414,7 @@ class ComicVineTalker(TalkerBase): formatted_results.append( ComicIssue( + aliases=record["aliases"], cover_date=record.get("cover_date", ""), description=record.get("description", ""), id=record["id"], @@ -452,7 +455,7 @@ class ComicVineTalker(TalkerBase): "format": "json", "resources": "volume", "query": search_series_name, - "field_list": "volume,name,id,start_year,publisher,image,description,count_of_issues", + "field_list": "volume,name,id,start_year,publisher,image,description,count_of_issues,aliases", "page": 1, "limit": 100, } @@ -470,10 +473,8 @@ class ComicVineTalker(TalkerBase): # ORed together, and we get thousands of results. Good news is the # results are sorted by relevance, so we can be smart about halting the search. # 1. Don't fetch more than some sane amount of pages. - max_results = 500 - # 2. Halt when not all of our search terms are present in a result - # 3. Halt when the results contain more (plus threshold) words than our search - result_word_count_max = len(search_series_name.split()) + 3 + # 2. Halt when any result on the current page is less than or equal to a set ratio using thefuzz + max_results = 500 # 5 pages total_result_count = min(total_result_count, max_results) @@ -488,23 +489,14 @@ class ComicVineTalker(TalkerBase): callback(current_result_count, total_result_count) # see if we need to keep asking for more pages... - stop_searching = False while current_result_count < total_result_count: if not literal: - # Sanitize the series name for comicvine searching, comicvine search ignore symbols - last_result = utils.sanitize_title(search_results[-1]["name"]) - - # See if the last result's name has all the of the search terms. - # If not, break out of this, loop, we're done. - for term in search_series_name.split(): - if term not in last_result: - stop_searching = True - break - - # Also, stop searching when the word count of last results is too much longer than our search terms list - if len(last_result) > result_word_count_max: - stop_searching = True + # Stop searching once any entry falls below the threshold + stop_searching = any( + not utils.titles_match(search_series_name, volume["name"], self.series_match_thresh) + for volume in cast(list[CVVolumeResults], cv_response["results"]) + ) if stop_searching: break @@ -522,17 +514,6 @@ class ComicVineTalker(TalkerBase): if callback is not None: callback(current_result_count, total_result_count) - # Literal searches simply return the matches no extra processing is doneo - if not literal: - # Remove any search results that don't contain all the search terms (iterate backwards for easy removal) - for record in reversed(search_results): - # Sanitize the series name for comicvine searching, comicvine search ignore symbols - record_name = utils.sanitize_title(record["name"]) - for term in search_series_name.split(): - if term not in record_name: - search_results.remove(record) - break - # Format result to ComicSearchResult formatted_search_results = self.format_search_results(search_results) @@ -573,7 +554,7 @@ class ComicVineTalker(TalkerBase): params = { "api_key": self.api_key, "format": "json", - "field_list": "name,id,start_year,publisher,count_of_issues", + "field_list": "name,id,start_year,publisher,count_of_issues,aliases", } cv_response = self.get_cv_content(volume_url, params) @@ -597,7 +578,7 @@ class ComicVineTalker(TalkerBase): "api_key": self.api_key, "filter": "volume:" + str(series_id), "format": "json", - "field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description", + "field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases", "offset": 0, } cv_response = self.get_cv_content(self.api_base_url + "/issues/", params) @@ -644,11 +625,11 @@ class ComicVineTalker(TalkerBase): params: dict[str, str | int] = { "api_key": self.api_key, "format": "json", - "field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description", + "field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases", "filter": flt, } - cv_response = self.get_cv_content(self.api_base_url + "/issues", params) + cv_response = self.get_cv_content(self.api_base_url + "/issues/", params) current_result_count = cv_response["number_of_page_results"] total_result_count = cv_response["number_of_total_results"] @@ -680,7 +661,7 @@ class ComicVineTalker(TalkerBase): f_record = None for record in issues_list_results: - if IssueString(issue_number).as_string() is None: + if not IssueString(issue_number).as_string(): issue_number = "1" if ( IssueString(record["issue_number"]).as_string().casefold() @@ -710,6 +691,7 @@ class ComicVineTalker(TalkerBase): volume_results = self.fetch_partial_volume_data(issue_results["volume"]["id"]) + # Now, map the Comic Vine data to generic metadata md = self.map_cv_data_to_metadata(volume_results, issue_results) md.is_empty = False return md @@ -949,12 +931,12 @@ class ComicVineTalker(TalkerBase): cv_response = self.get_cv_content(issue_url, params) results = cast(CVIssueDetailResults, cv_response["results"]) - details: SelectDetails = { - "image_url": results["image"]["super_url"], - "thumb_image_url": results["image"]["thumb_url"], - "cover_date": results["cover_date"], - "site_detail_url": results["site_detail_url"], - } + details = SelectDetails( + image_url=results["image"]["super_url"], + thumb_image_url=results["image"]["thumb_url"], + cover_date=results["cover_date"], + site_detail_url=results["site_detail_url"], + ) if ( details["image_url"] is not None @@ -981,7 +963,7 @@ class ComicVineTalker(TalkerBase): self, issue_id: int, image_url: str, thumb_url: str, cover_date: str, page_url: str ) -> None: cvc = ComicCacher() - cvc.add_issue_select_details(issue_id, image_url, thumb_url, cover_date, page_url) + cvc.add_issue_select_details(self.source_name, issue_id, image_url, thumb_url, cover_date, page_url) def fetch_alternate_cover_urls(self, issue_id: int, issue_page_url: str) -> list[str]: url_list = self.fetch_cached_alternate_cover_urls(issue_id) diff --git a/requirements.txt b/requirements.txt index f39d387..037b3ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,6 @@ py7zr pycountry requests==2.* text2digits +thefuzz[speedup]>=0.19.0 typing_extensions wordninja diff --git a/testing/comicdata.py b/testing/comicdata.py new file mode 100644 index 0000000..9be6477 --- /dev/null +++ b/testing/comicdata.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +import comicapi.genericmetadata +import comictaggerlib.resulttypes +from comicapi import utils + +search_results = [ + comictaggerlib.resulttypes.CVVolumeResults( + count_of_issues=1, + description="this is a description", + id=1, + image={"super_url": "https://test.org/image/1"}, + name="test", + publisher=comictaggerlib.resulttypes.CVPublisher(name="test"), + start_year="", # This is currently submitted as a string and returned as an int + aliases=None, + ), + comictaggerlib.resulttypes.CVVolumeResults( + count_of_issues=1, + description="this is a description", + id=1, + image={"super_url": "https://test.org/image/2"}, + name="test 2", + publisher=comictaggerlib.resulttypes.CVPublisher(name="test"), + start_year="", # This is currently submitted as a string and returned as an int + aliases=None, + ), +] + +alt_covers = [ + {"issue_id": 1, "url_list": ["https://test.org/image/1"]}, + {"issue_id": 2, "url_list": ["https://test.org/image/2"]}, +] + +select_details = [ + { + "issue_id": 1, + "image_url": "https://test.org/image/1", + "thumb_image_url": "https://test.org/thumb/1", + "cover_date": "1998", + "site_detail_url": "https://test.org/1", + }, + { + "issue_id": 2, + "image_url": "https://test.org/image/2", + "thumb_image_url": "https://test.org/thumb/2", + "cover_date": "1998", + "site_detail_url": "https://test.org/2", + }, +] + +# Used to test GenericMetadata.overlay +metadata = [ + ( + comicapi.genericmetadata.GenericMetadata(series="test", issue="2", title="never"), + comicapi.genericmetadata.md_test.replace(series="test", issue="2", title="never"), + ), + ( + comicapi.genericmetadata.GenericMetadata(series="", issue="2", title="never"), + comicapi.genericmetadata.md_test.replace(series=None, issue="2", title="never"), + ), + ( + comicapi.genericmetadata.GenericMetadata(), + comicapi.genericmetadata.md_test.copy(), + ), +] + +metadata_keys = [ + ( + comicapi.genericmetadata.GenericMetadata(), + { + "issue_count": 6, + "issue_number": "1", + "month": 10, + "series": "Cory Doctorow's Futuristic Tales of the Here and Now", + "year": 2007, + }, + ), + ( + comicapi.genericmetadata.GenericMetadata(series="test"), + { + "issue_count": 6, + "issue_number": "1", + "month": 10, + "series": "test", + "year": 2007, + }, + ), + ( + comicapi.genericmetadata.GenericMetadata(series="test", issue="3"), + { + "issue_count": 6, + "issue_number": "3", + "month": 10, + "series": "test", + "year": 2007, + }, + ), +] + +credits = [ + ("writer", "Dara Naraghi"), + ("writeR", "Dara Naraghi"), +] + +imprints = [ + ("marvel", ("", "Marvel")), + ("marvel comics", ("", "Marvel")), + ("aircel", ("Aircel Comics", "Marvel")), +] + +additional_imprints = [ + ("test", ("Test", "Marvel")), + ("temp", ("Temp", "DC Comics")), +] + +all_imprints = imprints + additional_imprints + +seed_imprints = { + "Marvel": utils.ImprintDict( + "Marvel", + { + "marvel comics": "", + "aircel": "Aircel Comics", + }, + ) +} + +additional_seed_imprints = { + "Marvel": utils.ImprintDict("Marvel", {"test": "Test"}), + "DC Comics": utils.ImprintDict("DC Comics", {"temp": "Temp"}), +} + +all_seed_imprints = { + "Marvel": seed_imprints["Marvel"].copy(), + "DC Comics": additional_seed_imprints["DC Comics"].copy(), +} +all_seed_imprints["Marvel"].update(additional_seed_imprints["Marvel"]) + +conflicting_seed_imprints = {"Marvel": {"test": "Never"}} diff --git a/testing/comicvine.py b/testing/comicvine.py new file mode 100644 index 0000000..c2ca298 --- /dev/null +++ b/testing/comicvine.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +from typing import Any + +import comicapi.genericmetadata +import comictaggerlib.comicvinetalker + + +def filter_field_list(cv_result, kwargs): + if "field_list" in kwargs["params"]: + for key in list(cv_result.keys()): + if key not in kwargs["params"]["field_list"]: + del cv_result[key] + + +cv_issue_result: dict[str, Any] = { + "error": "OK", + "limit": 1, + "offset": 0, + "number_of_page_results": 1, + "number_of_total_results": 1, + "status_code": 1, + "results": { + "aliases": None, + "api_detail_url": "https://comicvine.gamespot.com/api/issue/4000-140529/", + "associated_images": [], + "character_credits": [], + "character_died_in": [], + "concept_credits": [], + "cover_date": "2007-10-01", + "date_added": "2008-10-16 05:25:47", + "date_last_updated": "2010-06-09 18:05:49", + "deck": None, + "description": "For 12-year-old Anda, getting paid real money to kill the characters of players who were cheating in her favorite online computer game was a win-win situation. Until she found out who was paying her, and what those characters meant to the livelihood of children around the world.", + "first_appearance_characters": None, + "first_appearance_concepts": None, + "first_appearance_locations": None, + "first_appearance_objects": None, + "first_appearance_storyarcs": None, + "first_appearance_teams": None, + "has_staff_review": False, + "id": 140529, + "image": { + "icon_url": "https://comicvine.gamespot.com/a/uploads/square_avatar/0/574/585444-109004_20080707014047_large.jpg", + "medium_url": "https://comicvine.gamespot.com/a/uploads/scale_medium/0/574/585444-109004_20080707014047_large.jpg", + "screen_url": "https://comicvine.gamespot.com/a/uploads/screen_medium/0/574/585444-109004_20080707014047_large.jpg", + "screen_large_url": "https://comicvine.gamespot.com/a/uploads/screen_kubrick/0/574/585444-109004_20080707014047_large.jpg", + "small_url": "https://comicvine.gamespot.com/a/uploads/scale_small/0/574/585444-109004_20080707014047_large.jpg", + "super_url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg", + "thumb_url": "https://comicvine.gamespot.com/a/uploads/scale_avatar/0/574/585444-109004_20080707014047_large.jpg", + "tiny_url": "https://comicvine.gamespot.com/a/uploads/square_mini/0/574/585444-109004_20080707014047_large.jpg", + "original_url": "https://comicvine.gamespot.com/a/uploads/original/0/574/585444-109004_20080707014047_large.jpg", + "image_tags": "All Images", + }, + "issue_number": "1", + "location_credits": [], + "name": "Anda's Game", + "object_credits": [], + "person_credits": [ + { + "api_detail_url": "https://comicvine.gamespot.com/api/person/4040-56410/", + "id": 56410, + "name": "Dara Naraghi", + "site_detail_url": "https://comicvine.gamespot.com/dara-naraghi/4040-56410/", + "role": "writer", + }, + { + "api_detail_url": "https://comicvine.gamespot.com/api/person/4040-57222/", + "id": 57222, + "name": "Esteve Polls", + "site_detail_url": "https://comicvine.gamespot.com/esteve-polls/4040-57222/", + "role": "artist", + }, + { + "api_detail_url": "https://comicvine.gamespot.com/api/person/4040-48472/", + "id": 48472, + "name": "Neil Uyetake", + "site_detail_url": "https://comicvine.gamespot.com/neil-uyetake/4040-48472/", + "role": "letterer", + }, + { + "api_detail_url": "https://comicvine.gamespot.com/api/person/4040-5329/", + "id": 5329, + "name": "Sam Kieth", + "site_detail_url": "https://comicvine.gamespot.com/sam-kieth/4040-5329/", + "role": "cover", + }, + { + "api_detail_url": "https://comicvine.gamespot.com/api/person/4040-58534/", + "id": 58534, + "name": "Ted Adams", + "site_detail_url": "https://comicvine.gamespot.com/ted-adams/4040-58534/", + "role": "editor", + }, + ], + "site_detail_url": "https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/", + "store_date": None, + "story_arc_credits": [], + "team_credits": [], + "team_disbanded_in": [], + "volume": { + "api_detail_url": "https://comicvine.gamespot.com/api/volume/4050-23437/", + "id": 23437, + "name": "Cory Doctorow's Futuristic Tales of the Here and Now", + "site_detail_url": "https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4050-23437/", + }, + }, + "version": "1.0", +} + +cv_volume_result: dict[str, Any] = { + "error": "OK", + "limit": 1, + "offset": 0, + "number_of_page_results": 1, + "number_of_total_results": 1, + "status_code": 1, + "results": { + "aliases": None, + "api_detail_url": "https://comicvine.gamespot.com/api/volume/4050-23437/", + "count_of_issues": 6, + "date_added": "2008-10-16 05:25:47", + "date_last_updated": "2012-01-18 17:21:57", + "deck": None, + "description": "

Writer and BoingBoing.net co-editor Cory Doctorow has won acclaim for his science-fiction writing as well as his Creative Commons presentation of his material. Now, IDW Publishing is proud to present six standalone stories adapted from Doctorow's work, each featuring cover art by some of comics' top talents.

", + "id": 23437, + "image": { + "icon_url": "https://comicvine.gamespot.com/a/uploads/square_avatar/0/574/585444-109004_20080707014047_large.jpg", + "medium_url": "https://comicvine.gamespot.com/a/uploads/scale_medium/0/574/585444-109004_20080707014047_large.jpg", + "screen_url": "https://comicvine.gamespot.com/a/uploads/screen_medium/0/574/585444-109004_20080707014047_large.jpg", + "screen_large_url": "https://comicvine.gamespot.com/a/uploads/screen_kubrick/0/574/585444-109004_20080707014047_large.jpg", + "small_url": "https://comicvine.gamespot.com/a/uploads/scale_small/0/574/585444-109004_20080707014047_large.jpg", + "super_url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg", + "thumb_url": "https://comicvine.gamespot.com/a/uploads/scale_avatar/0/574/585444-109004_20080707014047_large.jpg", + "tiny_url": "https://comicvine.gamespot.com/a/uploads/square_mini/0/574/585444-109004_20080707014047_large.jpg", + "original_url": "https://comicvine.gamespot.com/a/uploads/original/0/574/585444-109004_20080707014047_large.jpg", + "image_tags": "All Images", + }, + "name": "Cory Doctorow's Futuristic Tales of the Here and Now", + "publisher": { + "api_detail_url": "https://comicvine.gamespot.com/api/publisher/4010-1190/", + "id": 1190, + "name": "IDW Publishing", + }, + "site_detail_url": "https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4050-23437/", + "start_year": "2007", + }, + "version": "1.0", +} +cv_not_found = { + "error": "Object Not Found", + "limit": 0, + "offset": 0, + "number_of_page_results": 0, + "number_of_total_results": 0, + "status_code": 101, + "results": [], +} +date = comictaggerlib.comicvinetalker.ComicVineTalker().parse_date_str(cv_issue_result["results"]["cover_date"]) + +cv_md = comicapi.genericmetadata.GenericMetadata( + is_empty=False, + tag_origin=None, + series=cv_issue_result["results"]["volume"]["name"], + issue=cv_issue_result["results"]["issue_number"], + title=cv_issue_result["results"]["name"], + publisher=cv_volume_result["results"]["publisher"]["name"], + month=date[1], + year=date[2], + day=date[0], + issue_count=None, + volume=None, + genre=None, + language=None, + comments=comictaggerlib.comicvinetalker.ComicVineTalker().cleanup_html( + cv_issue_result["results"]["description"], False + ), + volume_count=None, + critical_rating=None, + country=None, + alternate_series=None, + alternate_number=None, + alternate_count=None, + imprint=None, + notes="Tagged with ComicTagger 1.4.4a9.dev20 using info from Comic Vine on 2022-07-11 17:42:41. [Issue ID 140529]", + web_link=cv_issue_result["results"]["site_detail_url"], + format=None, + manga=None, + black_and_white=None, + page_count=None, + maturity_rating=None, + story_arc=None, + series_group=None, + scan_info=None, + characters="", + teams="", + locations="", + credits=[ + comicapi.genericmetadata.CreditMetadata(person=x["name"], role=x["role"].title(), primary=False) + for x in cv_issue_result["results"]["person_credits"] + ], + tags=set(), + pages=[], + price=None, + is_version_of=None, + rights=None, + identifier=None, + last_mark=None, + cover_image=None, +) + + +class MockResponse: + """Mocks the response object from requests""" + + def __init__(self, result: dict[str, Any], content=None) -> None: + self.status_code = 200 + self.result = result + self.content = content + + def json(self) -> dict[str, list]: + return self.result diff --git a/tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz b/testing/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz similarity index 100% rename from tests/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz rename to testing/data/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz diff --git a/tests/data/fake_cbr.cbr b/testing/data/fake_cbr.cbr similarity index 100% rename from tests/data/fake_cbr.cbr rename to testing/data/fake_cbr.cbr diff --git a/testing/filenames.py b/testing/filenames.py index 1e4c3ee..57902a6 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -11,6 +11,16 @@ format is """ from __future__ import annotations +import os +import os.path +import pathlib +from contextlib import nullcontext as does_not_raise + +import pytest + +datadir = pathlib.Path(__file__).parent / "data" +cbz_path = datadir / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz" + fnames = [ ( "batman 3 title (DC).cbz", @@ -731,89 +741,125 @@ rnames = [ False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series} #{issue} - {title} ({year})({price})", # price should be none, test no space between ')(' False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series} #{issue} - {title} ({year}) ({price})", # price should be none, test double space ') (' False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series} #{issue} - {title} ({year})", False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series}: {title} #{issue} ({year})", # on windows the ':' is replaced False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001 (2007).cbz", + does_not_raise(), ), ( "{series}: {title} #{issue} ({year})", # on linux the ':' is preserved False, "Linux", "Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz", + does_not_raise(), ), ( "{publisher}/ {series} #{issue} - {title} ({year})", # leading whitespace is removed when moving True, "universal", "IDW Publishing/Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{publisher}/ {series} #{issue} - {title} ({year})", # leading whitespace is removed when only renaming False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( r"{publisher}\ {series} #{issue} - {title} ({year})", # backslashes separate directories False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series} # {issue} - {title} ({year})", # double spaces are reduced to one False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now # 001 - Anda's Game (2007).cbz", + does_not_raise(), ), ( "{series} # {issue} - {locations} ({year})", False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now # 001 - lonely cottage (2007).cbz", + does_not_raise(), ), ( "{series} #{issue} - {title} - {WriteR}, {EDITOR} ({year})", # fields are case in-sensitive False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game - Dara Naraghi, Ted Adams (2007).cbz", + does_not_raise(), ), ( "{series} v{price} #{issue} ({year})", # Remove previous text if value is "" False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 (2007).cbz", + does_not_raise(), ), ( "{series} {price} #{issue} ({year})", # Ensure that a single space remains False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now #001 (2007).cbz", + does_not_raise(), ), ( "{series} - {title}{price} #{issue} ({year})", # Ensure removal before None values only impacts literal text False, "universal", "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001 (2007).cbz", + does_not_raise(), + ), + ( + "{series} - {title} {test} #{issue} ({year})", # Test non-existent key + False, + "universal", + "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game {test} #001 (2007).cbz", + does_not_raise(), + ), + ( + "{series} - {title} {1} #{issue} ({year})", # Test numeric key + False, + "universal", + "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game {test} #001 (2007).cbz", + pytest.raises(ValueError), ), ] + +rfnames = [ + (None, lambda x: x.path.parent.absolute()), + ("", lambda x: pathlib.Path(os.getcwd())), + ("test", lambda x: (pathlib.Path(os.getcwd()) / "test")), + (pathlib.Path(os.getcwd()) / "test", lambda x: pathlib.Path(os.getcwd()) / "test"), +] diff --git a/tests/autoimprint_test.py b/tests/autoimprint_test.py index 6fae3b6..bd76702 100644 --- a/tests/autoimprint_test.py +++ b/tests/autoimprint_test.py @@ -3,59 +3,7 @@ from __future__ import annotations import pytest from comicapi import utils - -imprints = [ - ("marvel", ("", "Marvel")), - ("marvel comics", ("", "Marvel")), - ("aircel", ("Aircel Comics", "Marvel")), -] - -additional_imprints = [ - ("test", ("Test", "Marvel")), - ("temp", ("Temp", "DC Comics")), -] - -all_imprints = imprints + additional_imprints - -seed = { - "Marvel": utils.ImprintDict( - "Marvel", - { - "marvel comics": "", - "aircel": "Aircel Comics", - }, - ) -} - -additional_seed = { - "Marvel": utils.ImprintDict("Marvel", {"test": "Test"}), - "DC Comics": utils.ImprintDict("DC Comics", {"temp": "Temp"}), -} - -all_seed = { - "Marvel": seed["Marvel"].copy(), - "DC Comics": additional_seed["DC Comics"].copy(), -} -all_seed["Marvel"].update(additional_seed["Marvel"]) - -conflicting_seed = {"Marvel": {"test": "Never"}} - - -# manually seeds publishers -@pytest.fixture -def seed_publishers(monkeypatch): - publisher_seed = {} - for publisher, imprint in seed.items(): - publisher_seed[publisher] = imprint - monkeypatch.setattr(utils, "publishers", publisher_seed) - - -@pytest.fixture -def seed_all_publishers(monkeypatch): - publisher_seed = {} - for publisher, imprint in all_seed.items(): - publisher_seed[publisher] = imprint - monkeypatch.setattr(utils, "publishers", publisher_seed) +from testing.comicdata import additional_seed_imprints, all_imprints, conflicting_seed_imprints, imprints, seed_imprints # test that that an empty list returns the input unchanged @@ -73,14 +21,14 @@ def test_get_publisher(publisher: str, expected: tuple[str, str], seed_publisher # tests that update_publishers will initially set values @pytest.mark.parametrize("publisher, expected", imprints) def test_set_publisher(publisher: str, expected: tuple[str, str]): - utils.update_publishers(seed) + utils.update_publishers(seed_imprints) assert expected == utils.get_publisher(publisher) # tests that update_publishers will add to existing values @pytest.mark.parametrize("publisher, expected", all_imprints) def test_update_publisher(publisher: str, expected: tuple[str, str], seed_publishers): - utils.update_publishers(additional_seed) + utils.update_publishers(additional_seed_imprints) assert expected == utils.get_publisher(publisher) @@ -88,6 +36,6 @@ def test_update_publisher(publisher: str, expected: tuple[str, str], seed_publis def test_conflict_publisher(seed_all_publishers): assert ("Test", "Marvel") == utils.get_publisher("test") - utils.update_publishers(conflicting_seed) + utils.update_publishers(conflicting_seed_imprints) assert ("Never", "Marvel") == utils.get_publisher("test") diff --git a/tests/comicarchive_test.py b/tests/comicarchive_test.py index 25618a8..a57f655 100644 --- a/tests/comicarchive_test.py +++ b/tests/comicarchive_test.py @@ -1,20 +1,17 @@ from __future__ import annotations -import pathlib import shutil import pytest import comicapi.comicarchive import comicapi.genericmetadata - -thisdir = pathlib.Path(__file__).parent -cbz_path = thisdir / "data" / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz" +from testing.filenames import datadir @pytest.mark.xfail(not comicapi.comicarchive.rar_support, reason="rar support") def test_getPageNameList(): - c = comicapi.comicarchive.ComicArchive(thisdir / "data" / "fake_cbr.cbr") + c = comicapi.comicarchive.ComicArchive(datadir / "fake_cbr.cbr") pageNameList = c.get_page_name_list() assert pageNameList == [ @@ -27,67 +24,42 @@ def test_getPageNameList(): ] -def test_set_default_page_list(tmp_path): - md = comicapi.genericmetadata.GenericMetadata() - md.overlay(comicapi.genericmetadata.md_test) - md.pages = [] - md.set_default_page_list(len(comicapi.genericmetadata.md_test.pages)) - - assert isinstance(md.pages[0]["Image"], int) - - -def test_page_type_read(): - c = comicapi.comicarchive.ComicArchive(cbz_path) - md = c.read_cix() +def test_page_type_read(cbz): + md = cbz.read_cix() assert isinstance(md.pages[0]["Type"], str) -def test_metadata_read(): - c = comicapi.comicarchive.ComicArchive( - thisdir / "data" / "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz" - ) - md = c.read_cix() +def test_metadata_read(cbz): + md = cbz.read_cix() assert md == comicapi.genericmetadata.md_test -def test_save_cix(tmp_path): - comic_path = tmp_path / cbz_path.name - shutil.copy(cbz_path, comic_path) +def test_save_cix(tmp_comic): + md = tmp_comic.read_cix() + md.set_default_page_list(tmp_comic.get_number_of_pages()) - c = comicapi.comicarchive.ComicArchive(comic_path) - md = c.read_cix() - md.set_default_page_list(c.get_number_of_pages()) + assert tmp_comic.write_cix(md) - assert c.write_cix(md) - - md = c.read_cix() + md = tmp_comic.read_cix() -def test_page_type_save(tmp_path): - comic_path = tmp_path / cbz_path.name - - shutil.copy(cbz_path, comic_path) - - c = comicapi.comicarchive.ComicArchive(comic_path) - md = c.read_cix() +def test_page_type_save(tmp_comic): + md = tmp_comic.read_cix() t = md.pages[0] t["Type"] = "" - assert c.write_cix(md) + assert tmp_comic.write_cix(md) - md = c.read_cix() + md = tmp_comic.read_cix() -def test_invalid_zip(tmp_path): - comic_path = tmp_path / cbz_path.name +def test_invalid_zip(tmp_comic): + with open(tmp_comic.path, mode="b+r") as f: + f.write(b"PK\000\000") - with open(cbz_path, mode="b+r") as f: - comic_path.write_bytes(b"PK\003\004" + f.read()[4:].replace(b"PK\003\004", b"PK\000\000")) - - c = comicapi.comicarchive.ComicArchive(comic_path) - - assert not c.write_cix(comicapi.genericmetadata.md_test) + result = tmp_comic.write_cix(comicapi.genericmetadata.md_test) + assert not result archivers = [ @@ -102,10 +74,9 @@ archivers = [ @pytest.mark.parametrize("archiver", archivers) -def test_copy_to_archive(archiver, tmp_path): - comic_path = tmp_path / cbz_path.with_suffix("").name +def test_copy_from_archive(archiver, tmp_path, cbz): + comic_path = tmp_path / cbz.path.with_suffix("").name - cbz = comicapi.comicarchive.ComicArchive(cbz_path) archive = archiver(comic_path) assert archive.copy_from_archive(cbz.archiver) @@ -117,12 +88,3 @@ def test_copy_to_archive(archiver, tmp_path): md = comic_archive.read_cix() assert md == comicapi.genericmetadata.md_test - - md = comicapi.genericmetadata.GenericMetadata() - md.overlay(comicapi.genericmetadata.md_test) - md.series = "test" - - assert comic_archive.write_cix(md) - - test_md = comic_archive.read_cix() - assert md == test_md diff --git a/tests/comiccacher_test.py b/tests/comiccacher_test.py new file mode 100644 index 0000000..5ae7cd1 --- /dev/null +++ b/tests/comiccacher_test.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import pytest + +import comictaggerlib.comiccacher +import comictaggerlib.resulttypes +from testing.comicdata import alt_covers, search_results, select_details + + +def test_create_cache(settings): + comictaggerlib.comiccacher.ComicCacher() + assert (settings.get_settings_folder() / "settings").exists() + + +def test_search_results(comic_cache): + comic_cache.add_search_results( + "test", + "test search", + search_results, + ) + assert search_results == comic_cache.get_search_results("test", "test search") + + +@pytest.mark.parametrize("alt_cover", alt_covers) +def test_alt_covers(comic_cache, alt_cover): + comic_cache.add_alt_covers(**alt_cover, source_name="test") + assert alt_cover["url_list"] == comic_cache.get_alt_covers(issue_id=alt_cover["issue_id"], source_name="test") + + +@pytest.mark.parametrize("volume_info", search_results) +def test_volume_info(comic_cache, volume_info): + comic_cache.add_volume_info(cv_volume_record=volume_info, source_name="test") + vi = volume_info.copy() + del vi["description"] + del vi["image"] + assert vi == comic_cache.get_volume_info(volume_id=volume_info["id"], source_name="test") + + +@pytest.mark.parametrize("details", select_details) +def test_issue_select_details(comic_cache, details): + comic_cache.add_issue_select_details(**details, source_name="test") + det = details.copy() + del det["issue_id"] + assert det == comic_cache.get_issue_select_details(details["issue_id"], "test") diff --git a/tests/comicvinetalker_test.py b/tests/comicvinetalker_test.py new file mode 100644 index 0000000..89cabba --- /dev/null +++ b/tests/comicvinetalker_test.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import pytest + +import comicapi.genericmetadata +import comictaggerlib.comicvinetalker +import testing.comicvine +from testing.comicdata import select_details + + +def test_search_for_series(comicvine_api, comic_cache): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + results = ct.search_for_series("cory doctorows futuristic tales of the here and now") + for r in results: + r["image"] = {"super_url": r["image"]["super_url"]} + r["start_year"] = int(r["start_year"]) + del r["publisher"]["id"] + del r["publisher"]["api_detail_url"] + cache_issues = comic_cache.get_search_results(ct.source_name, "cory doctorows futuristic tales of the here and now") + assert results == cache_issues + + +def test_fetch_volume_data(comicvine_api, comic_cache): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + result = ct.fetch_volume_data(23437) + result["start_year"] = int(result["start_year"]) + del result["publisher"]["id"] + del result["publisher"]["api_detail_url"] + assert result == comic_cache.get_volume_info(23437, ct.source_name) + + +def test_fetch_issues_by_volume(comicvine_api, comic_cache): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + results = ct.fetch_issues_by_volume(23437) + cache_issues = comic_cache.get_volume_issues_info(23437, ct.source_name) + for r in results: + r["image"] = {"super_url": r["image"]["super_url"], "thumb_url": r["image"]["thumb_url"]} + del r["volume"] + assert results == cache_issues + + +def test_fetch_issue_data_by_issue_id(comicvine_api, settings, mock_now, mock_version): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + result = ct.fetch_issue_data_by_issue_id(140529, settings) + assert result == testing.comicvine.cv_md + + +def test_fetch_issues_by_volume_issue_num_and_year(comicvine_api): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + results = ct.fetch_issues_by_volume_issue_num_and_year([23437], "1", None) + cv_expected = testing.comicvine.cv_issue_result["results"].copy() + testing.comicvine.filter_field_list( + cv_expected, + {"params": {"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases"}}, + ) + for r, e in zip(results, [cv_expected]): + assert r == e + + +cv_issue = [ + (23437, "", testing.comicvine.cv_md), + (23437, "1", testing.comicvine.cv_md), + (23437, "0", comicapi.genericmetadata.GenericMetadata()), +] + + +@pytest.mark.parametrize("volume_id, issue_number, expected", cv_issue) +def test_fetch_issue_data(comicvine_api, settings, mock_now, mock_version, volume_id, issue_number, expected): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + results = ct.fetch_issue_data(volume_id, issue_number, settings) + assert results == expected + + +def test_fetch_issue_select_details(comicvine_api, mock_now, mock_version): + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + result = ct.fetch_issue_select_details(140529) + expected = { + "cover_date": testing.comicvine.cv_issue_result["results"]["cover_date"], + "site_detail_url": testing.comicvine.cv_issue_result["results"]["site_detail_url"], + "image_url": testing.comicvine.cv_issue_result["results"]["image"]["super_url"], + "thumb_image_url": testing.comicvine.cv_issue_result["results"]["image"]["thumb_url"], + } + assert result == expected + + +@pytest.mark.parametrize("details", select_details) +def test_issue_select_details(comic_cache, details): + expected = details.copy() + del expected["issue_id"] + + ct = comictaggerlib.comicvinetalker.ComicVineTalker() + ct.cache_issue_select_details( + issue_id=details["issue_id"], + image_url=details["image_url"], + thumb_url=details["thumb_image_url"], + cover_date=details["cover_date"], + page_url=details["site_detail_url"], + ) + result = comic_cache.get_issue_select_details(details["issue_id"], ct.source_name) + + assert result == expected diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..654357a --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import copy +import datetime +import io +import shutil +import unittest.mock +from typing import Any, Generator + +import pytest +import requests +from PIL import Image + +import comicapi.comicarchive +import comicapi.genericmetadata +import comictaggerlib.comiccacher +import comictaggerlib.comicvinetalker +import comictaggerlib.settings +from comicapi import utils +from testing import comicvine, filenames +from testing.comicdata import all_seed_imprints, seed_imprints + + +@pytest.fixture +def cbz(): + yield comicapi.comicarchive.ComicArchive(filenames.cbz_path) + + +@pytest.fixture +def tmp_comic(tmp_path): + shutil.copy(filenames.cbz_path, tmp_path) + yield comicapi.comicarchive.ComicArchive(tmp_path / filenames.cbz_path.name) + + +@pytest.fixture +def cbz_double_cover(tmp_path, tmp_comic): + + cover = Image.open(io.BytesIO(tmp_comic.get_page(0))) + + other_page = Image.open(io.BytesIO(tmp_comic.get_page(tmp_comic.get_number_of_pages() - 1))) + + double_cover = Image.new("RGB", (cover.width * 2, cover.height)) + double_cover.paste(other_page, (0, 0)) + double_cover.paste(cover, (cover.width, 0)) + + tmp_comic.archiver.write_file("double_cover.jpg", double_cover.tobytes("jpeg", "RGB")) + yield tmp_comic + + +@pytest.fixture(autouse=True) +def no_requests(monkeypatch) -> None: + """Remove requests.sessions.Session.request for all tests.""" + monkeypatch.delattr("requests.sessions.Session.request") + + +@pytest.fixture +def comicvine_api(monkeypatch, cbz, comic_cache) -> unittest.mock.Mock: + # Any arguments may be passed and mock_get() will always return our + # mocked object, which only has the .json() method or None for invalid urls. + + def make_list(cv_result): + cv_list = copy.deepcopy(cv_result) + if isinstance(cv_list["results"], dict): + cv_list["results"] = [cv_list["results"]] + return cv_list + + def mock_get(*args, **kwargs): + + if args: + if args[0].startswith("https://comicvine.gamespot.com/api/volume/4050-23437"): + cv_result = copy.deepcopy(comicvine.cv_volume_result) + comicvine.filter_field_list(cv_result["results"], kwargs) + return comicvine.MockResponse(cv_result) + if args[0].startswith("https://comicvine.gamespot.com/api/issue/4000-140529"): + return comicvine.MockResponse(comicvine.cv_issue_result) + if ( + args[0].startswith("https://comicvine.gamespot.com/api/issues/") + and "params" in kwargs + and "filter" in kwargs["params"] + and "23437" in kwargs["params"]["filter"] + ): + cv_list = make_list(comicvine.cv_issue_result) + for cv in cv_list["results"]: + comicvine.filter_field_list(cv, kwargs) + return comicvine.MockResponse(cv_list) + if ( + args[0].startswith("https://comicvine.gamespot.com/api/search") + and "params" in kwargs + and "resources" in kwargs["params"] + and "volume" == kwargs["params"]["resources"] + ): + cv_list = make_list(comicvine.cv_volume_result) + for cv in cv_list["results"]: + comicvine.filter_field_list(cv, kwargs) + return comicvine.MockResponse(cv_list) + if ( + args[0] + == "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg" + ): + return comicvine.MockResponse({}, cbz.get_page(0)) + if ( + args[0] + == "https://comicvine.gamespot.com/a/uploads/scale_avatar/0/574/585444-109004_20080707014047_large.jpg" + ): + thumb = Image.open(io.BytesIO(cbz.get_page(0))) + thumb.resize((105, 160), Image.Resampling.LANCZOS) + return comicvine.MockResponse({}, thumb.tobytes("jpeg", "RGB")) + return comicvine.MockResponse(comicvine.cv_not_found) + + m_get = unittest.mock.Mock(side_effect=mock_get) + + # apply the monkeypatch for requests.get to mock_get + monkeypatch.setattr(requests, "get", m_get) + return m_get + + +@pytest.fixture +def mock_now(monkeypatch): + class mydatetime: + time = datetime.datetime(2022, 7, 11, 17, 42, 41) + + @classmethod + def now(cls): + return cls.time + + monkeypatch.setattr(comictaggerlib.comicvinetalker, "datetime", mydatetime) + + +@pytest.fixture +def mock_version(monkeypatch): + version = "1.4.4a9.dev20" + version_tuple = (1, 4, 4, "dev20") + + monkeypatch.setattr(comictaggerlib.ctversion, "version", version) + monkeypatch.setattr(comictaggerlib.ctversion, "__version__", version) + monkeypatch.setattr(comictaggerlib.ctversion, "version_tuple", version_tuple) + monkeypatch.setattr(comictaggerlib.ctversion, "__version_tuple__", version_tuple) + + +@pytest.fixture +def md(): + yield comicapi.genericmetadata.md_test.copy() + + +# manually seeds publishers +@pytest.fixture +def seed_publishers(monkeypatch): + publisher_seed = {} + for publisher, imprint in seed_imprints.items(): + publisher_seed[publisher] = imprint + monkeypatch.setattr(utils, "publishers", publisher_seed) + + +@pytest.fixture +def seed_all_publishers(monkeypatch): + publisher_seed = {} + for publisher, imprint in all_seed_imprints.items(): + publisher_seed[publisher] = imprint + monkeypatch.setattr(utils, "publishers", publisher_seed) + + +@pytest.fixture +def settings(tmp_path): + yield comictaggerlib.settings.ComicTaggerSettings(tmp_path / "settings") + + +@pytest.fixture +def comic_cache(settings) -> Generator[comictaggerlib.comiccacher.ComicCacher, Any, None]: + yield comictaggerlib.comiccacher.ComicCacher() diff --git a/tests/genericmetadata_test.py b/tests/genericmetadata_test.py index 16683fa..c10574b 100644 --- a/tests/genericmetadata_test.py +++ b/tests/genericmetadata_test.py @@ -1,37 +1,23 @@ from __future__ import annotations -import dataclasses - import pytest import comicapi.genericmetadata +from testing.comicdata import credits, metadata -@pytest.fixture -def md(): - yield dataclasses.replace(comicapi.genericmetadata.md_test) +def test_set_default_page_list(tmp_path): + md = comicapi.genericmetadata.GenericMetadata() + md.overlay(comicapi.genericmetadata.md_test) + md.pages = [] + md.set_default_page_list(len(comicapi.genericmetadata.md_test.pages)) + + assert isinstance(md.pages[0]["Image"], int) -stuff = [ - ( - {"series": "test", "issue": "2", "title": "never"}, - dataclasses.replace(comicapi.genericmetadata.md_test, series="test", issue="2", title="never"), - ), - ( - {"series": "", "issue": "2", "title": "never"}, - dataclasses.replace(comicapi.genericmetadata.md_test, series=None, issue="2", title="never"), - ), - ( - {}, - dataclasses.replace(comicapi.genericmetadata.md_test), - ), -] - - -@pytest.mark.parametrize("replaced, expected", stuff) +@pytest.mark.parametrize("replaced, expected", metadata) def test_metadata_overlay(md: comicapi.genericmetadata.GenericMetadata, replaced, expected): - md_overlay = comicapi.genericmetadata.GenericMetadata(**replaced) - md.overlay(md_overlay) + md.overlay(replaced) assert md == expected @@ -40,7 +26,7 @@ def test_add_credit(): md = comicapi.genericmetadata.GenericMetadata() md.add_credit(person="test", role="writer", primary=False) - md.credits == [{"person": "test", "role": "writer", "primary": False}] + assert md.credits == [comicapi.genericmetadata.CreditMetadata(person="test", role="writer", primary=False)] def test_add_credit_primary(): @@ -48,13 +34,7 @@ def test_add_credit_primary(): md.add_credit(person="test", role="writer", primary=False) md.add_credit(person="test", role="writer", primary=True) - md.credits == [{"person": "test", "role": "writer", "primary": True}] - - -credits = [ - ("writer", "Dara Naraghi"), - ("writeR", "Dara Naraghi"), -] + assert md.credits == [comicapi.genericmetadata.CreditMetadata(person="test", role="writer", primary=True)] @pytest.mark.parametrize("role, expected", credits) diff --git a/tests/issueidentifier_test.py b/tests/issueidentifier_test.py new file mode 100644 index 0000000..2111839 --- /dev/null +++ b/tests/issueidentifier_test.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import pytest + +import comicapi.comicarchive +import comicapi.issuestring +import comictaggerlib.comicvinetalker +import comictaggerlib.issueidentifier +import testing.comicdata +import testing.comicvine + + +def test_crop(cbz_double_cover, settings, tmp_path): + ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz_double_cover, settings) + cropped = ii.crop_cover(cbz_double_cover.archiver.read_file("double_cover.jpg")) + original_cover = cbz_double_cover.get_page(0) + + original_hash = ii.calculate_hash(original_cover) + cropped_hash = ii.calculate_hash(cropped) + + assert original_hash == cropped_hash + + +@pytest.mark.parametrize("additional_md, expected", testing.comicdata.metadata_keys) +def test_get_search_keys(cbz, settings, additional_md, expected): + ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, settings) + ii.set_additional_metadata(additional_md) + + assert expected == ii.get_search_keys() + + +def test_get_issue_cover_match_score(cbz, settings, comicvine_api): + ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, settings) + score = ii.get_issue_cover_match_score( + comictaggerlib.comicvinetalker.ComicVineTalker(), + int( + comicapi.issuestring.IssueString( + cbz.read_metadata(comicapi.comicarchive.MetaDataStyle.CIX).issue + ).as_float() + ), + "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg", + "https://comicvine.gamespot.com/a/uploads/scale_avatar/0/574/585444-109004_20080707014047_large.jpg", + "https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/", + [ii.calculate_hash(cbz.get_page(0))], + ) + expected = { + "hash": 1747255366011518976, + "score": 0, + "url": "https://comicvine.gamespot.com/a/uploads/scale_large/0/574/585444-109004_20080707014047_large.jpg", + } + assert expected == score + + +def test_search(cbz, settings, comicvine_api): + ii = comictaggerlib.issueidentifier.IssueIdentifier(cbz, settings) + results = ii.search() + cv_expected = { + "series": f"{testing.comicvine.cv_volume_result['results']['name']} ({testing.comicvine.cv_volume_result['results']['start_year']})", + "distance": 0, + "issue_number": testing.comicvine.cv_issue_result["results"]["issue_number"], + "cv_issue_count": testing.comicvine.cv_volume_result["results"]["count_of_issues"], + "issue_title": testing.comicvine.cv_issue_result["results"]["name"], + "issue_id": testing.comicvine.cv_issue_result["results"]["id"], + "volume_id": testing.comicvine.cv_volume_result["results"]["id"], + "month": testing.comicvine.date[1], + "year": testing.comicvine.date[2], + "publisher": testing.comicvine.cv_volume_result["results"]["publisher"]["name"], + "image_url": testing.comicvine.cv_issue_result["results"]["image"]["super_url"], + "thumb_url": testing.comicvine.cv_issue_result["results"]["image"]["thumb_url"], + "page_url": testing.comicvine.cv_issue_result["results"]["site_detail_url"], + "description": testing.comicvine.cv_issue_result["results"]["description"], + } + for r, e in zip(results, [cv_expected]): + del r["url_image_hash"] + assert r == e diff --git a/tests/rename_test.py b/tests/rename_test.py index 8f6ae5a..ab71c82 100644 --- a/tests/rename_test.py +++ b/tests/rename_test.py @@ -5,13 +5,19 @@ import pathlib import pytest from comicapi.genericmetadata import md_test -from comictaggerlib.filerenamer import FileRenamer -from testing.filenames import rnames +from comictaggerlib import filerenamer +from testing.filenames import rfnames, rnames -@pytest.mark.parametrize("template, move, platform, expected", rnames) -def test_rename(template, platform, move, expected): - fr = FileRenamer(md_test, platform=platform) +@pytest.mark.parametrize("template, move, platform, expected, exception", rnames) +def test_rename(template, platform, move, expected, exception): + fr = filerenamer.FileRenamer(md_test, platform=platform) fr.move = move fr.set_template(template) - assert str(pathlib.PureWindowsPath(fr.determine_name(".cbz"))) == str(pathlib.PureWindowsPath(expected)) + with exception: + assert str(pathlib.PureWindowsPath(fr.determine_name(".cbz"))) == str(pathlib.PureWindowsPath(expected)) + + +@pytest.mark.parametrize("inp, result", rfnames) +def test_get_rename_dir(inp, result, cbz): + assert result(cbz) == filerenamer.get_rename_dir(cbz, inp)