Move map comic data to utils along with remove html. Alter tests.
This commit is contained in:
parent
a724fd8430
commit
67be086638
@ -30,8 +30,8 @@ from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
|
||||
from comictaggerlib.imagehasher import ImageHasher
|
||||
from comictaggerlib.resulttypes import IssueResult
|
||||
from comictaggerlib.settings import ComicTaggerSettings
|
||||
from comictalker.talker_utils import parse_date_str
|
||||
from comictalker.talkerbase import ComicTalker, TalkerError
|
||||
from comictalker.utils import parse_date_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -17,14 +17,69 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from comicapi import utils
|
||||
from comicapi.genericmetadata import GenericMetadata
|
||||
from comicapi.issuestring import IssueString
|
||||
from comictaggerlib import ctversion
|
||||
from comictalker.talkerbase import ComicIssue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def map_comic_issue_to_metadata(
|
||||
issue_results: ComicIssue, source: str, remove_html_tables: bool = False, use_year_volume: bool = False
|
||||
) -> GenericMetadata:
|
||||
# Now, map the ComicIssue data to generic metadata
|
||||
metadata = GenericMetadata()
|
||||
metadata.is_empty = False
|
||||
|
||||
# Is this best way to go about checking?
|
||||
if issue_results["volume"].get("name"):
|
||||
metadata.series = utils.xlate(issue_results["volume"]["name"])
|
||||
if issue_results.get("issue_number"):
|
||||
metadata.issue = IssueString(issue_results["issue_number"]).as_string()
|
||||
if issue_results.get("name"):
|
||||
metadata.title = utils.xlate(issue_results["name"])
|
||||
if issue_results.get("image_url"):
|
||||
metadata.cover_image = issue_results["image_url"]
|
||||
|
||||
if issue_results["volume"].get("publisher"):
|
||||
metadata.publisher = utils.xlate(issue_results["volume"]["publisher"])
|
||||
metadata.day, metadata.month, metadata.year = utils.parse_date_str(issue_results["cover_date"])
|
||||
|
||||
metadata.comments = cleanup_html(issue_results["description"], remove_html_tables)
|
||||
if use_year_volume:
|
||||
metadata.volume = issue_results["volume"]["start_year"]
|
||||
|
||||
metadata.notes = (
|
||||
f"Tagged with ComicTagger {ctversion.version} using info from {source} on"
|
||||
f" {datetime.now():%Y-%m-%d %H:%M:%S}. [Issue ID {issue_results['id']}]"
|
||||
)
|
||||
metadata.web_link = issue_results["site_detail_url"]
|
||||
|
||||
for person in issue_results["credits"]:
|
||||
if "role" in person:
|
||||
roles = person["role"].split(",")
|
||||
for role in roles:
|
||||
# can we determine 'primary' from CV??
|
||||
metadata.add_credit(person["name"], role.title().strip(), False)
|
||||
|
||||
if issue_results.get("characters"):
|
||||
metadata.characters = ", ".join(issue_results["characters"])
|
||||
if issue_results.get("teams"):
|
||||
metadata.teams = ", ".join(issue_results["teams"])
|
||||
if issue_results.get("locations"):
|
||||
metadata.locations = ", ".join(issue_results["locations"])
|
||||
if issue_results.get("story_arcs"):
|
||||
metadata.story_arc = ", ".join(issue_results["story_arcs"])
|
||||
|
||||
return metadata
|
||||
|
||||
|
||||
def parse_date_str(date_str: str) -> tuple[int | None, int | None, int | None]:
|
||||
day = None
|
||||
month = None
|
@ -17,16 +17,15 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, cast
|
||||
from urllib.parse import urljoin, urlsplit
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing_extensions import Required, TypedDict
|
||||
|
||||
import comictalker.talker_utils as talker_utils
|
||||
from comicapi import utils
|
||||
from comicapi.genericmetadata import GenericMetadata
|
||||
from comicapi.issuestring import IssueString
|
||||
@ -722,7 +721,12 @@ class ComicVineTalker(ComicTalker):
|
||||
|
||||
if f_record and f_record["complete"]:
|
||||
# Cache had full record
|
||||
return self.map_cv_data_to_metadata(f_record)
|
||||
return talker_utils.map_comic_issue_to_metadata(
|
||||
f_record,
|
||||
self.source_name_friendly,
|
||||
self.settings_options["remove_html_tables"]["value"],
|
||||
self.settings_options["use_series_start_as_volume"]["value"],
|
||||
)
|
||||
|
||||
if f_record is not None:
|
||||
issue_url = urljoin(self.api_base_url, f"issue/{CVTypeID.Issue}-{f_record['id']}")
|
||||
@ -742,7 +746,12 @@ class ComicVineTalker(ComicTalker):
|
||||
else:
|
||||
return GenericMetadata()
|
||||
|
||||
return self.map_cv_data_to_metadata(formatted_issues_result[0])
|
||||
return talker_utils.map_comic_issue_to_metadata(
|
||||
formatted_issues_result[0],
|
||||
self.source_name_friendly,
|
||||
self.settings_options["remove_html_tables"]["value"],
|
||||
self.settings_options["use_series_start_as_volume"]["value"],
|
||||
)
|
||||
|
||||
def fetch_issue_data_by_issue_id(self, issue_id: int) -> GenericMetadata:
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
@ -750,7 +759,12 @@ class ComicVineTalker(ComicTalker):
|
||||
cached_issues_result = cvc.get_issue_info(issue_id, self.source_name)
|
||||
|
||||
if cached_issues_result and cached_issues_result["complete"]:
|
||||
return self.map_cv_data_to_metadata(cached_issues_result)
|
||||
return talker_utils.map_comic_issue_to_metadata(
|
||||
cached_issues_result,
|
||||
self.source_name_friendly,
|
||||
self.settings_options["remove_html_tables"]["value"],
|
||||
self.settings_options["use_series_start_as_volume"]["value"],
|
||||
)
|
||||
|
||||
issue_url = urljoin(self.api_base_url, f"issue/{CVTypeID.Issue}-{issue_id}")
|
||||
params = {"api_key": self.api_key, "format": "json"}
|
||||
@ -768,9 +782,14 @@ class ComicVineTalker(ComicTalker):
|
||||
cvc.add_volume_issues_info(self.source_name, formatted_issues_result)
|
||||
|
||||
# Now, map the ComicIssue data to generic metadata
|
||||
return self.map_cv_data_to_metadata(formatted_issues_result[0])
|
||||
return talker_utils.map_comic_issue_to_metadata(
|
||||
formatted_issues_result[0],
|
||||
self.source_name_friendly,
|
||||
self.settings_options["remove_html_tables"]["value"],
|
||||
self.settings_options["use_series_start_as_volume"]["value"],
|
||||
)
|
||||
|
||||
# To support volume only searching. For testing only.
|
||||
# To support volume only searching. For testing only. # TODO Delete or create ComicIssue to then map
|
||||
def map_cv_volume_data_to_metadata(self, volume_results: CVVolumeFullResult) -> GenericMetadata:
|
||||
|
||||
# Now, map the Comic Vine data to generic metadata
|
||||
@ -783,7 +802,7 @@ class ComicVineTalker(ComicTalker):
|
||||
metadata.publisher = utils.xlate(volume_results["publisher"]["name"])
|
||||
metadata.year = utils.xlate(volume_results["start_year"], True)
|
||||
|
||||
metadata.comments = self.cleanup_html(
|
||||
metadata.comments = talker_utils.cleanup_html(
|
||||
volume_results["description"], self.settings_options["remove_html_tables"]["value"]
|
||||
)
|
||||
if self.settings_options["use_series_start_as_volume"]["value"]:
|
||||
@ -821,142 +840,6 @@ class ComicVineTalker(ComicTalker):
|
||||
|
||||
return metadata
|
||||
|
||||
def map_cv_data_to_metadata(self, issue_results: ComicIssue) -> GenericMetadata:
|
||||
# TODO As this now takes ComicIssue, move to utils so other talkers can use it?
|
||||
# Now, map the Comic Vine data to generic metadata
|
||||
metadata = GenericMetadata()
|
||||
metadata.is_empty = False
|
||||
|
||||
metadata.series = utils.xlate(issue_results["volume"]["name"])
|
||||
metadata.issue = IssueString(issue_results["issue_number"]).as_string()
|
||||
metadata.title = utils.xlate(issue_results["name"])
|
||||
metadata.cover_image = issue_results["image_url"]
|
||||
|
||||
if issue_results["volume"].get("publisher") is not None:
|
||||
metadata.publisher = utils.xlate(issue_results["volume"]["publisher"])
|
||||
metadata.day, metadata.month, metadata.year = utils.parse_date_str(issue_results["cover_date"])
|
||||
|
||||
metadata.comments = self.cleanup_html(
|
||||
issue_results["description"], self.settings_options["remove_html_tables"]["value"]
|
||||
)
|
||||
if self.settings_options["use_series_start_as_volume"]["value"]:
|
||||
metadata.volume = issue_results["volume"]["start_year"]
|
||||
|
||||
metadata.notes = (
|
||||
f"Tagged with ComicTagger {ctversion.version} using info from {self.source_name_friendly} on"
|
||||
f" {datetime.now():%Y-%m-%d %H:%M:%S}. [Issue ID {issue_results['id']}]"
|
||||
)
|
||||
metadata.web_link = issue_results["site_detail_url"]
|
||||
|
||||
for person in issue_results["credits"]:
|
||||
if "role" in person:
|
||||
roles = person["role"].split(",")
|
||||
for role in roles:
|
||||
# can we determine 'primary' from CV??
|
||||
metadata.add_credit(person["name"], role.title().strip(), False)
|
||||
|
||||
metadata.characters = ", ".join(issue_results["characters"])
|
||||
metadata.teams = ", ".join(issue_results["teams"])
|
||||
metadata.locations = ", ".join(issue_results["locations"])
|
||||
metadata.story_arc = ", ".join(issue_results["story_arcs"])
|
||||
|
||||
return metadata
|
||||
|
||||
# TODO Move to utils?
|
||||
def cleanup_html(self, string: str, remove_html_tables: bool) -> str:
|
||||
if string is None:
|
||||
return ""
|
||||
# find any tables
|
||||
soup = BeautifulSoup(string, "html.parser")
|
||||
tables = soup.findAll("table")
|
||||
|
||||
# remove all newlines first
|
||||
string = string.replace("\n", "")
|
||||
|
||||
# put in our own
|
||||
string = string.replace("<br>", "\n")
|
||||
string = string.replace("</li>", "\n")
|
||||
string = string.replace("</p>", "\n\n")
|
||||
string = string.replace("<h1>", "*")
|
||||
string = string.replace("</h1>", "*\n")
|
||||
string = string.replace("<h2>", "*")
|
||||
string = string.replace("</h2>", "*\n")
|
||||
string = string.replace("<h3>", "*")
|
||||
string = string.replace("</h3>", "*\n")
|
||||
string = string.replace("<h4>", "*")
|
||||
string = string.replace("</h4>", "*\n")
|
||||
string = string.replace("<h5>", "*")
|
||||
string = string.replace("</h5>", "*\n")
|
||||
string = string.replace("<h6>", "*")
|
||||
string = string.replace("</h6>", "*\n")
|
||||
|
||||
# remove the tables
|
||||
p = re.compile(r"<table[^<]*?>.*?</table>")
|
||||
if remove_html_tables:
|
||||
string = p.sub("", string)
|
||||
string = string.replace("*List of covers and their creators:*", "")
|
||||
else:
|
||||
string = p.sub("{}", string)
|
||||
|
||||
# now strip all other tags
|
||||
p = re.compile(r"<[^<]*?>")
|
||||
newstring = p.sub("", string)
|
||||
|
||||
newstring = newstring.replace(" ", " ")
|
||||
newstring = newstring.replace("&", "&")
|
||||
|
||||
newstring = newstring.strip()
|
||||
|
||||
if not remove_html_tables:
|
||||
# now rebuild the tables into text from BSoup
|
||||
try:
|
||||
table_strings = []
|
||||
for table in tables:
|
||||
rows = []
|
||||
hdrs = []
|
||||
col_widths = []
|
||||
for hdr in table.findAll("th"):
|
||||
item = hdr.string.strip()
|
||||
hdrs.append(item)
|
||||
col_widths.append(len(item))
|
||||
rows.append(hdrs)
|
||||
|
||||
for row in table.findAll("tr"):
|
||||
cols = []
|
||||
col = row.findAll("td")
|
||||
i = 0
|
||||
for c in col:
|
||||
item = c.string.strip()
|
||||
cols.append(item)
|
||||
if len(item) > col_widths[i]:
|
||||
col_widths[i] = len(item)
|
||||
i += 1
|
||||
if len(cols) != 0:
|
||||
rows.append(cols)
|
||||
# now we have the data, make it into text
|
||||
fmtstr = ""
|
||||
for w in col_widths:
|
||||
fmtstr += f" {{:{w + 1}}}|"
|
||||
width = sum(col_widths) + len(col_widths) * 2
|
||||
table_text = ""
|
||||
counter = 0
|
||||
for row in rows:
|
||||
table_text += fmtstr.format(*row) + "\n"
|
||||
if counter == 0 and len(hdrs) != 0:
|
||||
table_text += "-" * width + "\n"
|
||||
counter += 1
|
||||
|
||||
table_strings.append(table_text)
|
||||
|
||||
newstring = newstring.format(*table_strings)
|
||||
except Exception:
|
||||
# we caught an error rebuilding the table.
|
||||
# just bail and remove the formatting
|
||||
logger.exception("table parse error")
|
||||
newstring.replace("{}", "")
|
||||
|
||||
return newstring
|
||||
|
||||
def repair_urls(self, issue_list: list[CVIssueDetailResults]) -> None:
|
||||
# make sure there are URLs for the image fields
|
||||
for issue in issue_list:
|
||||
|
@ -3,8 +3,8 @@ from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
import comicapi.genericmetadata
|
||||
import comictalker.talkers.comicvine
|
||||
from comicapi import utils
|
||||
from comictalker.talker_utils import cleanup_html
|
||||
|
||||
|
||||
def filter_field_list(cv_result, kwargs):
|
||||
@ -190,9 +190,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
|
||||
volume=None,
|
||||
genre=None,
|
||||
language=None,
|
||||
comments=comictalker.talkers.comicvine.ComicVineTalker().cleanup_html(
|
||||
cv_issue_result["results"]["description"], False
|
||||
),
|
||||
comments=cleanup_html(cv_issue_result["results"]["description"], False),
|
||||
volume_count=None,
|
||||
critical_rating=None,
|
||||
country=None,
|
||||
@ -200,19 +198,19 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
|
||||
alternate_number=None,
|
||||
alternate_count=None,
|
||||
imprint=None,
|
||||
notes="Tagged with ComicTagger 1.4.4a9.dev20 using info from Comic Vine on 2022-07-11 17:42:41. [Issue ID 140529]",
|
||||
notes=None,
|
||||
web_link=cv_issue_result["results"]["site_detail_url"],
|
||||
format=None,
|
||||
manga=None,
|
||||
black_and_white=None,
|
||||
page_count=None,
|
||||
maturity_rating=None,
|
||||
story_arc="",
|
||||
story_arc=None,
|
||||
series_group=None,
|
||||
scan_info=None,
|
||||
characters="",
|
||||
teams="",
|
||||
locations="",
|
||||
characters=None,
|
||||
teams=None,
|
||||
locations=None,
|
||||
credits=[
|
||||
comicapi.genericmetadata.CreditMetadata(person=x["name"], role=x["role"].title(), primary=False)
|
||||
for x in cv_issue_result["results"]["person_credits"]
|
||||
|
@ -45,9 +45,10 @@ def test_fetch_issues_by_volume(comicvine_api, comic_cache):
|
||||
assert results == cache_issues
|
||||
|
||||
|
||||
def test_fetch_issue_data_by_issue_id(comicvine_api, settings, mock_now, mock_version):
|
||||
def test_fetch_issue_data_by_issue_id(comicvine_api, settings, mock_version):
|
||||
ct = comictalker.talkers.comicvine.ComicVineTalker()
|
||||
result = ct.fetch_comic_data(140529)
|
||||
result.notes = None
|
||||
assert result == testing.comicvine.cv_md
|
||||
|
||||
|
||||
@ -81,7 +82,8 @@ cv_issue = [
|
||||
|
||||
|
||||
@pytest.mark.parametrize("volume_id, issue_number, expected", cv_issue)
|
||||
def test_fetch_issue_data(comicvine_api, settings, mock_now, mock_version, volume_id, issue_number, expected):
|
||||
def test_fetch_issue_data(comicvine_api, settings, mock_version, volume_id, issue_number, expected):
|
||||
ct = comictalker.talkers.comicvine.ComicVineTalker()
|
||||
results = ct.fetch_issue_data(volume_id, issue_number)
|
||||
results.notes = None
|
||||
assert results == expected
|
||||
|
@ -1,7 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import datetime
|
||||
import io
|
||||
import shutil
|
||||
import unittest.mock
|
||||
@ -117,18 +116,6 @@ def comicvine_api(monkeypatch, cbz, comic_cache) -> comictalker.talkers.comicvin
|
||||
return cv
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_now(monkeypatch):
|
||||
class mydatetime:
|
||||
time = datetime.datetime(2022, 7, 11, 17, 42, 41)
|
||||
|
||||
@classmethod
|
||||
def now(cls):
|
||||
return cls.time
|
||||
|
||||
monkeypatch.setattr(comictalker.talkers.comicvine, "datetime", mydatetime)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_version(monkeypatch):
|
||||
version = "1.4.4a9.dev20"
|
||||
|
Loading…
Reference in New Issue
Block a user