diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index e2f702d..136309f 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -30,8 +30,8 @@ from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException from comictaggerlib.imagehasher import ImageHasher from comictaggerlib.resulttypes import IssueResult from comictaggerlib.settings import ComicTaggerSettings +from comictalker.talker_utils import parse_date_str from comictalker.talkerbase import ComicTalker, TalkerError -from comictalker.utils import parse_date_str logger = logging.getLogger(__name__) diff --git a/comictalker/utils.py b/comictalker/talker_utils.py similarity index 65% rename from comictalker/utils.py rename to comictalker/talker_utils.py index 67cc7ad..be79429 100644 --- a/comictalker/utils.py +++ b/comictalker/talker_utils.py @@ -17,14 +17,69 @@ from __future__ import annotations import logging import re +from datetime import datetime from bs4 import BeautifulSoup from comicapi import utils +from comicapi.genericmetadata import GenericMetadata +from comicapi.issuestring import IssueString +from comictaggerlib import ctversion +from comictalker.talkerbase import ComicIssue logger = logging.getLogger(__name__) +def map_comic_issue_to_metadata( + issue_results: ComicIssue, source: str, remove_html_tables: bool = False, use_year_volume: bool = False +) -> GenericMetadata: + # Now, map the ComicIssue data to generic metadata + metadata = GenericMetadata() + metadata.is_empty = False + + # Is this best way to go about checking? + if issue_results["volume"].get("name"): + metadata.series = utils.xlate(issue_results["volume"]["name"]) + if issue_results.get("issue_number"): + metadata.issue = IssueString(issue_results["issue_number"]).as_string() + if issue_results.get("name"): + metadata.title = utils.xlate(issue_results["name"]) + if issue_results.get("image_url"): + metadata.cover_image = issue_results["image_url"] + + if issue_results["volume"].get("publisher"): + metadata.publisher = utils.xlate(issue_results["volume"]["publisher"]) + metadata.day, metadata.month, metadata.year = utils.parse_date_str(issue_results["cover_date"]) + + metadata.comments = cleanup_html(issue_results["description"], remove_html_tables) + if use_year_volume: + metadata.volume = issue_results["volume"]["start_year"] + + metadata.notes = ( + f"Tagged with ComicTagger {ctversion.version} using info from {source} on" + f" {datetime.now():%Y-%m-%d %H:%M:%S}. [Issue ID {issue_results['id']}]" + ) + metadata.web_link = issue_results["site_detail_url"] + + for person in issue_results["credits"]: + if "role" in person: + roles = person["role"].split(",") + for role in roles: + # can we determine 'primary' from CV?? + metadata.add_credit(person["name"], role.title().strip(), False) + + if issue_results.get("characters"): + metadata.characters = ", ".join(issue_results["characters"]) + if issue_results.get("teams"): + metadata.teams = ", ".join(issue_results["teams"]) + if issue_results.get("locations"): + metadata.locations = ", ".join(issue_results["locations"]) + if issue_results.get("story_arcs"): + metadata.story_arc = ", ".join(issue_results["story_arcs"]) + + return metadata + + def parse_date_str(date_str: str) -> tuple[int | None, int | None, int | None]: day = None month = None diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 3e4561a..1bbefd7 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -17,16 +17,15 @@ from __future__ import annotations import json import logging -import re import time from datetime import datetime from typing import Any, Callable, cast from urllib.parse import urljoin, urlsplit import requests -from bs4 import BeautifulSoup from typing_extensions import Required, TypedDict +import comictalker.talker_utils as talker_utils from comicapi import utils from comicapi.genericmetadata import GenericMetadata from comicapi.issuestring import IssueString @@ -722,7 +721,12 @@ class ComicVineTalker(ComicTalker): if f_record and f_record["complete"]: # Cache had full record - return self.map_cv_data_to_metadata(f_record) + return talker_utils.map_comic_issue_to_metadata( + f_record, + self.source_name_friendly, + self.settings_options["remove_html_tables"]["value"], + self.settings_options["use_series_start_as_volume"]["value"], + ) if f_record is not None: issue_url = urljoin(self.api_base_url, f"issue/{CVTypeID.Issue}-{f_record['id']}") @@ -742,7 +746,12 @@ class ComicVineTalker(ComicTalker): else: return GenericMetadata() - return self.map_cv_data_to_metadata(formatted_issues_result[0]) + return talker_utils.map_comic_issue_to_metadata( + formatted_issues_result[0], + self.source_name_friendly, + self.settings_options["remove_html_tables"]["value"], + self.settings_options["use_series_start_as_volume"]["value"], + ) def fetch_issue_data_by_issue_id(self, issue_id: int) -> GenericMetadata: # before we search online, look in our cache, since we might already have this info @@ -750,7 +759,12 @@ class ComicVineTalker(ComicTalker): cached_issues_result = cvc.get_issue_info(issue_id, self.source_name) if cached_issues_result and cached_issues_result["complete"]: - return self.map_cv_data_to_metadata(cached_issues_result) + return talker_utils.map_comic_issue_to_metadata( + cached_issues_result, + self.source_name_friendly, + self.settings_options["remove_html_tables"]["value"], + self.settings_options["use_series_start_as_volume"]["value"], + ) issue_url = urljoin(self.api_base_url, f"issue/{CVTypeID.Issue}-{issue_id}") params = {"api_key": self.api_key, "format": "json"} @@ -768,9 +782,14 @@ class ComicVineTalker(ComicTalker): cvc.add_volume_issues_info(self.source_name, formatted_issues_result) # Now, map the ComicIssue data to generic metadata - return self.map_cv_data_to_metadata(formatted_issues_result[0]) + return talker_utils.map_comic_issue_to_metadata( + formatted_issues_result[0], + self.source_name_friendly, + self.settings_options["remove_html_tables"]["value"], + self.settings_options["use_series_start_as_volume"]["value"], + ) - # To support volume only searching. For testing only. + # To support volume only searching. For testing only. # TODO Delete or create ComicIssue to then map def map_cv_volume_data_to_metadata(self, volume_results: CVVolumeFullResult) -> GenericMetadata: # Now, map the Comic Vine data to generic metadata @@ -783,7 +802,7 @@ class ComicVineTalker(ComicTalker): metadata.publisher = utils.xlate(volume_results["publisher"]["name"]) metadata.year = utils.xlate(volume_results["start_year"], True) - metadata.comments = self.cleanup_html( + metadata.comments = talker_utils.cleanup_html( volume_results["description"], self.settings_options["remove_html_tables"]["value"] ) if self.settings_options["use_series_start_as_volume"]["value"]: @@ -821,142 +840,6 @@ class ComicVineTalker(ComicTalker): return metadata - def map_cv_data_to_metadata(self, issue_results: ComicIssue) -> GenericMetadata: - # TODO As this now takes ComicIssue, move to utils so other talkers can use it? - # Now, map the Comic Vine data to generic metadata - metadata = GenericMetadata() - metadata.is_empty = False - - metadata.series = utils.xlate(issue_results["volume"]["name"]) - metadata.issue = IssueString(issue_results["issue_number"]).as_string() - metadata.title = utils.xlate(issue_results["name"]) - metadata.cover_image = issue_results["image_url"] - - if issue_results["volume"].get("publisher") is not None: - metadata.publisher = utils.xlate(issue_results["volume"]["publisher"]) - metadata.day, metadata.month, metadata.year = utils.parse_date_str(issue_results["cover_date"]) - - metadata.comments = self.cleanup_html( - issue_results["description"], self.settings_options["remove_html_tables"]["value"] - ) - if self.settings_options["use_series_start_as_volume"]["value"]: - metadata.volume = issue_results["volume"]["start_year"] - - metadata.notes = ( - f"Tagged with ComicTagger {ctversion.version} using info from {self.source_name_friendly} on" - f" {datetime.now():%Y-%m-%d %H:%M:%S}. [Issue ID {issue_results['id']}]" - ) - metadata.web_link = issue_results["site_detail_url"] - - for person in issue_results["credits"]: - if "role" in person: - roles = person["role"].split(",") - for role in roles: - # can we determine 'primary' from CV?? - metadata.add_credit(person["name"], role.title().strip(), False) - - metadata.characters = ", ".join(issue_results["characters"]) - metadata.teams = ", ".join(issue_results["teams"]) - metadata.locations = ", ".join(issue_results["locations"]) - metadata.story_arc = ", ".join(issue_results["story_arcs"]) - - return metadata - - # TODO Move to utils? - def cleanup_html(self, string: str, remove_html_tables: bool) -> str: - if string is None: - return "" - # find any tables - soup = BeautifulSoup(string, "html.parser") - tables = soup.findAll("table") - - # remove all newlines first - string = string.replace("\n", "") - - # put in our own - string = string.replace("
", "\n") - string = string.replace("", "\n") - string = string.replace("

", "\n\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("
", "*") - string = string.replace("
", "*\n") - string = string.replace("
", "*") - string = string.replace("
", "*\n") - - # remove the tables - p = re.compile(r".*?") - if remove_html_tables: - string = p.sub("", string) - string = string.replace("*List of covers and their creators:*", "") - else: - string = p.sub("{}", string) - - # now strip all other tags - p = re.compile(r"<[^<]*?>") - newstring = p.sub("", string) - - newstring = newstring.replace(" ", " ") - newstring = newstring.replace("&", "&") - - newstring = newstring.strip() - - if not remove_html_tables: - # now rebuild the tables into text from BSoup - try: - table_strings = [] - for table in tables: - rows = [] - hdrs = [] - col_widths = [] - for hdr in table.findAll("th"): - item = hdr.string.strip() - hdrs.append(item) - col_widths.append(len(item)) - rows.append(hdrs) - - for row in table.findAll("tr"): - cols = [] - col = row.findAll("td") - i = 0 - for c in col: - item = c.string.strip() - cols.append(item) - if len(item) > col_widths[i]: - col_widths[i] = len(item) - i += 1 - if len(cols) != 0: - rows.append(cols) - # now we have the data, make it into text - fmtstr = "" - for w in col_widths: - fmtstr += f" {{:{w + 1}}}|" - width = sum(col_widths) + len(col_widths) * 2 - table_text = "" - counter = 0 - for row in rows: - table_text += fmtstr.format(*row) + "\n" - if counter == 0 and len(hdrs) != 0: - table_text += "-" * width + "\n" - counter += 1 - - table_strings.append(table_text) - - newstring = newstring.format(*table_strings) - except Exception: - # we caught an error rebuilding the table. - # just bail and remove the formatting - logger.exception("table parse error") - newstring.replace("{}", "") - - return newstring - def repair_urls(self, issue_list: list[CVIssueDetailResults]) -> None: # make sure there are URLs for the image fields for issue in issue_list: diff --git a/testing/comicvine.py b/testing/comicvine.py index d6d1c6d..69e9cd5 100644 --- a/testing/comicvine.py +++ b/testing/comicvine.py @@ -3,8 +3,8 @@ from __future__ import annotations from typing import Any import comicapi.genericmetadata -import comictalker.talkers.comicvine from comicapi import utils +from comictalker.talker_utils import cleanup_html def filter_field_list(cv_result, kwargs): @@ -190,9 +190,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata( volume=None, genre=None, language=None, - comments=comictalker.talkers.comicvine.ComicVineTalker().cleanup_html( - cv_issue_result["results"]["description"], False - ), + comments=cleanup_html(cv_issue_result["results"]["description"], False), volume_count=None, critical_rating=None, country=None, @@ -200,19 +198,19 @@ cv_md = comicapi.genericmetadata.GenericMetadata( alternate_number=None, alternate_count=None, imprint=None, - notes="Tagged with ComicTagger 1.4.4a9.dev20 using info from Comic Vine on 2022-07-11 17:42:41. [Issue ID 140529]", + notes=None, web_link=cv_issue_result["results"]["site_detail_url"], format=None, manga=None, black_and_white=None, page_count=None, maturity_rating=None, - story_arc="", + story_arc=None, series_group=None, scan_info=None, - characters="", - teams="", - locations="", + characters=None, + teams=None, + locations=None, credits=[ comicapi.genericmetadata.CreditMetadata(person=x["name"], role=x["role"].title(), primary=False) for x in cv_issue_result["results"]["person_credits"] diff --git a/tests/comicvinetalker_test.py b/tests/comicvinetalker_test.py index 5333218..a6bb400 100644 --- a/tests/comicvinetalker_test.py +++ b/tests/comicvinetalker_test.py @@ -45,9 +45,10 @@ def test_fetch_issues_by_volume(comicvine_api, comic_cache): assert results == cache_issues -def test_fetch_issue_data_by_issue_id(comicvine_api, settings, mock_now, mock_version): +def test_fetch_issue_data_by_issue_id(comicvine_api, settings, mock_version): ct = comictalker.talkers.comicvine.ComicVineTalker() result = ct.fetch_comic_data(140529) + result.notes = None assert result == testing.comicvine.cv_md @@ -81,7 +82,8 @@ cv_issue = [ @pytest.mark.parametrize("volume_id, issue_number, expected", cv_issue) -def test_fetch_issue_data(comicvine_api, settings, mock_now, mock_version, volume_id, issue_number, expected): +def test_fetch_issue_data(comicvine_api, settings, mock_version, volume_id, issue_number, expected): ct = comictalker.talkers.comicvine.ComicVineTalker() results = ct.fetch_issue_data(volume_id, issue_number) + results.notes = None assert results == expected diff --git a/tests/conftest.py b/tests/conftest.py index 67bd6c1..f4008e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,6 @@ from __future__ import annotations import copy -import datetime import io import shutil import unittest.mock @@ -117,18 +116,6 @@ def comicvine_api(monkeypatch, cbz, comic_cache) -> comictalker.talkers.comicvin return cv -@pytest.fixture -def mock_now(monkeypatch): - class mydatetime: - time = datetime.datetime(2022, 7, 11, 17, 42, 41) - - @classmethod - def now(cls): - return cls.time - - monkeypatch.setattr(comictalker.talkers.comicvine, "datetime", mydatetime) - - @pytest.fixture def mock_version(monkeypatch): version = "1.4.4a9.dev20"