Switch to API data for alt images, remove unneeded functions and removed async as new approach needed. See comments about fetch_partial_volume_data
This commit is contained in:
parent
cab69a32be
commit
4514ae80d0
@ -204,7 +204,7 @@ class CoverImageWidget(QtWidgets.QWidget):
|
||||
self.imageCount = len(self.url_list)
|
||||
self.update_content()
|
||||
|
||||
# defer the alt cover search
|
||||
# TODO No need to search for alt covers as they are now in ComicIssue data
|
||||
if self.talker_api.talker.source_details.static_options.has_alt_covers:
|
||||
QtCore.QTimer.singleShot(1, self.start_alt_cover_search)
|
||||
|
||||
@ -216,7 +216,7 @@ class CoverImageWidget(QtWidgets.QWidget):
|
||||
|
||||
# page URL should already be cached, so no need to defer
|
||||
ComicTalker.alt_url_list_fetch_complete = self.sig.emit_list
|
||||
self.talker_api.talker.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id))
|
||||
self.talker_api.talker.fetch_alternate_cover_urls(utils.xlate(self.issue_id))
|
||||
|
||||
def alt_cover_url_list_fetch_complete(self, url_list: list[str]) -> None:
|
||||
if url_list:
|
||||
|
@ -96,16 +96,6 @@ class ComicCacher:
|
||||
+ "PRIMARY KEY (id, source_name))"
|
||||
)
|
||||
|
||||
cur.execute(
|
||||
"CREATE TABLE AltCovers("
|
||||
+ "issue_id INT NOT NULL,"
|
||||
+ "url_list TEXT,"
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')), "
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT," # Newline separated
|
||||
+ "PRIMARY KEY (issue_id, source_name))"
|
||||
)
|
||||
|
||||
cur.execute(
|
||||
"CREATE TABLE Issues("
|
||||
+ "id INT NOT NULL,"
|
||||
@ -120,6 +110,7 @@ class ComicCacher:
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')), "
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT," # Newline separated
|
||||
+ "alt_images_url TEXT," # Comma separated URLs
|
||||
+ "PRIMARY KEY (id, source_name))"
|
||||
)
|
||||
|
||||
@ -186,47 +177,6 @@ class ComicCacher:
|
||||
|
||||
return results
|
||||
|
||||
def add_alt_covers(self, source_name: str, issue_id: int, url_list: list[str]) -> None:
|
||||
|
||||
con = lite.connect(self.db_file)
|
||||
|
||||
with con:
|
||||
con.text_factory = str
|
||||
cur = con.cursor()
|
||||
|
||||
# remove all previous entries with this search term
|
||||
cur.execute("DELETE FROM AltCovers WHERE issue_id=? AND source_name=?", [issue_id, source_name])
|
||||
|
||||
url_list_str = ",".join(url_list)
|
||||
# now add in new record
|
||||
cur.execute(
|
||||
"INSERT INTO AltCovers (source_name, issue_id, url_list) VALUES(?, ?, ?)",
|
||||
(source_name, issue_id, url_list_str),
|
||||
)
|
||||
|
||||
def get_alt_covers(self, source_name: str, issue_id: int) -> list[str]:
|
||||
|
||||
con = lite.connect(self.db_file)
|
||||
with con:
|
||||
cur = con.cursor()
|
||||
con.text_factory = str
|
||||
|
||||
# purge stale issue info - probably issue data won't change
|
||||
# much....
|
||||
a_month_ago = datetime.datetime.today() - datetime.timedelta(days=30)
|
||||
cur.execute("DELETE FROM AltCovers WHERE timestamp < ?", [str(a_month_ago)])
|
||||
|
||||
cur.execute("SELECT url_list FROM AltCovers WHERE issue_id=? AND source_name=?", [issue_id, source_name])
|
||||
row = cur.fetchone()
|
||||
if row is None:
|
||||
return []
|
||||
|
||||
url_list_str = row[0]
|
||||
if not url_list_str:
|
||||
return []
|
||||
url_list = str(url_list_str).split(",")
|
||||
return url_list
|
||||
|
||||
def add_volume_info(self, source_name: str, volume_record: ComicVolume) -> None:
|
||||
con = lite.connect(self.db_file)
|
||||
|
||||
@ -272,6 +222,7 @@ class ComicCacher:
|
||||
"description": issue["description"],
|
||||
"timestamp": timestamp,
|
||||
"aliases": issue["aliases"],
|
||||
"alt_images_url": issue["alt_images_url"],
|
||||
}
|
||||
self.upsert(cur, "issues", data)
|
||||
|
||||
@ -331,7 +282,7 @@ class ComicCacher:
|
||||
|
||||
cur.execute(
|
||||
(
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases"
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,alt_images_url"
|
||||
" FROM Issues WHERE volume_id=? AND source_name=?"
|
||||
),
|
||||
[volume_id, source_name],
|
||||
@ -350,6 +301,7 @@ class ComicCacher:
|
||||
description=row[8],
|
||||
volume=volume,
|
||||
aliases=row[9],
|
||||
alt_images_url=row[10],
|
||||
)
|
||||
|
||||
results.append(record)
|
||||
@ -369,7 +321,7 @@ class ComicCacher:
|
||||
|
||||
cur.execute(
|
||||
(
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,volume_id"
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,volume_id,alt_images_url"
|
||||
" FROM Issues WHERE id=? AND source_name=?"
|
||||
),
|
||||
[issue_id, source_name],
|
||||
@ -391,9 +343,11 @@ class ComicCacher:
|
||||
site_detail_url=row[4],
|
||||
cover_date=row[5],
|
||||
image_url=row[6],
|
||||
image_thumb_url=row[7],
|
||||
description=row[8],
|
||||
volume=volume,
|
||||
aliases=row[9],
|
||||
alt_images_url=row[11],
|
||||
)
|
||||
|
||||
return record
|
||||
|
@ -25,3 +25,4 @@ class ComicIssue(TypedDict, total=False):
|
||||
name: Required[str]
|
||||
site_detail_url: str
|
||||
volume: ComicVolume
|
||||
alt_images_url: str # Comma separated URLs
|
||||
|
@ -196,7 +196,7 @@ class TalkerBase:
|
||||
"""This function is expected to handle a few possibilities:
|
||||
1. Only series_id. Retrieve the SERIES/VOLUME information only.
|
||||
2. series_id and issue_number. Retrieve the ISSUE information.
|
||||
3. Only issue_id. Used solely by the CLI to retrieve the ISSUE information."""
|
||||
3. Only issue_id. Retrieve the ISSUE information."""
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
@ -206,6 +206,3 @@ class TalkerBase:
|
||||
self, volume_id_list: list[int], issue_number: str, year: str | int | None
|
||||
) -> list[ComicIssue]:
|
||||
raise NotImplementedError
|
||||
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
|
||||
raise NotImplementedError
|
||||
|
@ -46,13 +46,6 @@ from comictalker.talkerbase import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from PyQt5 import QtCore, QtNetwork
|
||||
|
||||
qt_available = True
|
||||
except ImportError:
|
||||
qt_available = False
|
||||
|
||||
|
||||
class CVTypeID:
|
||||
Volume = "4050"
|
||||
@ -72,6 +65,13 @@ class CVImage(TypedDict, total=False):
|
||||
image_tags: str
|
||||
|
||||
|
||||
class CVAltImages(TypedDict):
|
||||
original_url: str
|
||||
id: int
|
||||
caption: str
|
||||
image_tags: str
|
||||
|
||||
|
||||
class CVPublisher(TypedDict, total=False):
|
||||
api_detail_url: str
|
||||
id: int
|
||||
@ -140,6 +140,7 @@ class CVIssuesResults(TypedDict):
|
||||
name: str
|
||||
site_detail_url: str
|
||||
volume: CVVolume
|
||||
associated_images: list[CVAltImages]
|
||||
|
||||
|
||||
class CVVolumeFullResult(TypedDict):
|
||||
@ -295,10 +296,6 @@ class ComicVineTalker(TalkerBase):
|
||||
|
||||
self.series_match_thresh = series_match_thresh
|
||||
|
||||
# Used for async cover loading etc.
|
||||
if qt_available:
|
||||
self.nam = QtNetwork.QNetworkAccessManager()
|
||||
|
||||
def check_api_key(self, key: str, url: str) -> bool:
|
||||
if not url:
|
||||
url = self.api_base_url
|
||||
@ -430,6 +427,15 @@ class ComicVineTalker(TalkerBase):
|
||||
image_url = record["image"].get("super_url", "")
|
||||
image_thumb_url = record["image"].get("thumb_url", "")
|
||||
|
||||
alt_images_list = []
|
||||
for alt in record["associated_images"]:
|
||||
# Convert to comma separated
|
||||
for k, v in alt.items():
|
||||
if k == "original_url":
|
||||
alt_images_list.append(v)
|
||||
|
||||
alt_images_url = ",".join(alt_images_list)
|
||||
|
||||
formatted_results.append(
|
||||
ComicIssue(
|
||||
aliases=record["aliases"],
|
||||
@ -442,6 +448,7 @@ class ComicVineTalker(TalkerBase):
|
||||
name=record["name"],
|
||||
site_detail_url=record.get("site_detail_url", ""),
|
||||
volume=cast(ComicVolume, record["volume"]),
|
||||
alt_images_url=alt_images_url,
|
||||
)
|
||||
)
|
||||
|
||||
@ -544,6 +551,7 @@ class ComicVineTalker(TalkerBase):
|
||||
# To support volume only searching
|
||||
def fetch_volume_data(self, series_id: int) -> GenericMetadata:
|
||||
# TODO New cache table or expand current? Makes sense to cache as multiple chapters will want the same data
|
||||
# How to format people in cache? characters and locations can be csv.
|
||||
|
||||
volume_url = urljoin(self.api_base_url, f"volume/{CVTypeID.Volume}-{series_id}")
|
||||
|
||||
@ -573,7 +581,8 @@ class ComicVineTalker(TalkerBase):
|
||||
return comic_data
|
||||
|
||||
def fetch_partial_volume_data(self, series_id: int) -> ComicVolume:
|
||||
|
||||
# This is still required over fetch_volume_data because this can use the cache whereas fetch_volume_data always
|
||||
# requires an API call
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
cvc = ComicCacher()
|
||||
cached_volume_result = cvc.get_volume_info(series_id, self.source_name)
|
||||
@ -598,31 +607,6 @@ class ComicVineTalker(TalkerBase):
|
||||
|
||||
return formatted_volume_results[0]
|
||||
|
||||
def fetch_partial_issue_data(self, issue_id: int) -> ComicIssue:
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
cvc = ComicCacher()
|
||||
cached_issue_result = cvc.get_issue_info(issue_id, self.source_name)
|
||||
|
||||
if cached_issue_result is not None:
|
||||
return cached_issue_result
|
||||
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"filter": f"id:{issue_id}",
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"offset": 0,
|
||||
}
|
||||
cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params)
|
||||
|
||||
issue_result = cast(CVIssuesResults, cv_response["results"])
|
||||
formatted_issue_results = self.format_issue_results([issue_result])
|
||||
|
||||
if formatted_issue_results:
|
||||
cvc.add_volume_issues_info(self.source_name, formatted_issue_results)
|
||||
|
||||
return formatted_issue_results[0]
|
||||
|
||||
def fetch_issues_by_volume(self, series_id: int) -> list[ComicIssue]:
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
cvc = ComicCacher()
|
||||
@ -635,7 +619,7 @@ class ComicVineTalker(TalkerBase):
|
||||
"api_key": self.api_key,
|
||||
"filter": f"volume:{series_id}",
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases,associated_images",
|
||||
"offset": 0,
|
||||
}
|
||||
cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params)
|
||||
@ -682,7 +666,7 @@ class ComicVineTalker(TalkerBase):
|
||||
params: dict[str, str | int] = {
|
||||
"api_key": self.api_key,
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases,associated_images",
|
||||
"filter": flt,
|
||||
}
|
||||
|
||||
@ -739,14 +723,14 @@ class ComicVineTalker(TalkerBase):
|
||||
return self.map_cv_data_to_metadata(volume_results, issue_results)
|
||||
|
||||
def fetch_issue_data_by_issue_id(self, issue_id: int) -> GenericMetadata:
|
||||
|
||||
# Cache will always be missing data (person, character, location) so a call to the API is required
|
||||
issue_url = urljoin(self.api_base_url, f"issue/{CVTypeID.Issue}-{issue_id}")
|
||||
params = {"api_key": self.api_key, "format": "json"}
|
||||
cv_response = self.get_cv_content(issue_url, params)
|
||||
|
||||
issue_results = cast(CVIssueDetailResults, cv_response["results"])
|
||||
|
||||
volume_results = self.fetch_partial_volume_data(issue_results["volume"]["id"])
|
||||
volume_results = self.fetch_partial_volume_data(int(issue_results["volume"]["id"]))
|
||||
|
||||
# Now, map the Comic Vine data to generic metadata
|
||||
md = self.map_cv_data_to_metadata(volume_results, issue_results)
|
||||
@ -963,105 +947,35 @@ class ComicVineTalker(TalkerBase):
|
||||
return newstring
|
||||
|
||||
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
url_list = self.fetch_cached_alternate_cover_urls(issue_id)
|
||||
if url_list:
|
||||
return url_list
|
||||
|
||||
issue_info = self.fetch_partial_issue_data(issue_id)
|
||||
issue_url = issue_info["site_detail_url"]
|
||||
# scrape the CV issue page URL to get the alternate cover URLs
|
||||
content = requests.get(issue_url, headers={"user-agent": "comictagger/" + ctversion.version}).text
|
||||
alt_cover_url_list = self.parse_out_alt_cover_urls(content)
|
||||
|
||||
# cache this alt cover URL list
|
||||
self.cache_alternate_cover_urls(issue_id, alt_cover_url_list)
|
||||
|
||||
return alt_cover_url_list
|
||||
|
||||
def parse_out_alt_cover_urls(self, page_html: str) -> list[str]:
|
||||
soup = BeautifulSoup(page_html, "html.parser")
|
||||
|
||||
alt_cover_url_list = []
|
||||
|
||||
# Using knowledge of the layout of the Comic Vine issue page here:
|
||||
# look for the divs that are in the classes 'imgboxart' and 'issue-cover'
|
||||
div_list = soup.find_all("div")
|
||||
covers_found = 0
|
||||
for d in div_list:
|
||||
if "class" in d.attrs:
|
||||
c = d["class"]
|
||||
if "imgboxart" in c and "issue-cover" in c:
|
||||
if d.img["src"].startswith("http"):
|
||||
covers_found += 1
|
||||
if covers_found != 1:
|
||||
alt_cover_url_list.append(d.img["src"])
|
||||
elif d.img["data-src"].startswith("http"):
|
||||
covers_found += 1
|
||||
if covers_found != 1:
|
||||
alt_cover_url_list.append(d.img["data-src"])
|
||||
|
||||
return alt_cover_url_list
|
||||
|
||||
def fetch_cached_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
# Should always be in cache
|
||||
cvc = ComicCacher()
|
||||
url_list = cvc.get_alt_covers(self.source_name, issue_id)
|
||||
issue = cvc.get_issue_info(issue_id, self.source_name)
|
||||
|
||||
return url_list
|
||||
if issue:
|
||||
url_list = [x for x in issue["alt_images_url"].split(",") if x] # Prevent an empty string producing ['']
|
||||
else:
|
||||
# On the off chance it is not in cache
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"filter": f"id:{issue_id}",
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases,associated_images",
|
||||
"offset": 0,
|
||||
}
|
||||
cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params)
|
||||
|
||||
def cache_alternate_cover_urls(self, issue_id: int, url_list: list[str]) -> None:
|
||||
cvc = ComicCacher()
|
||||
cvc.add_alt_covers(self.source_name, issue_id, url_list)
|
||||
issue_result = cast(CVIssuesResults, cv_response["results"])
|
||||
|
||||
def async_fetch_issue_cover_url_complete(self, reply: QtNetwork.QNetworkReply) -> None:
|
||||
# read in the response
|
||||
data = reply.readAll()
|
||||
# Format to expected output
|
||||
formatted_volume_issues_result = self.format_issue_results(issue_result)
|
||||
|
||||
try:
|
||||
cv_response = cast(CVResult, json.loads(bytes(data)))
|
||||
except Exception:
|
||||
logger.exception("Comic Vine query failed to get JSON data\n%s", str(data))
|
||||
return
|
||||
cvc.add_volume_issues_info(self.source_name, formatted_volume_issues_result)
|
||||
|
||||
if cv_response["status_code"] != 1:
|
||||
logger.error("Comic Vine query failed with error: [%s]. ", cv_response["error"])
|
||||
return
|
||||
url_list = [x for x in issue["alt_images_url"].split(",") if x]
|
||||
|
||||
result = cast(CVIssuesResults, cv_response["results"])
|
||||
|
||||
image_url = result["image"]["super_url"]
|
||||
thumb_url = result["image"]["thumb_url"]
|
||||
|
||||
ComicTalker.url_fetch_complete(image_url, thumb_url)
|
||||
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
|
||||
# bypass async for now
|
||||
url_list = self.fetch_alternate_cover_urls(issue_id)
|
||||
ComicTalker.alt_url_list_fetch_complete(url_list)
|
||||
return
|
||||
|
||||
# This async version requires the issue page url to be provided!
|
||||
self.issue_id = issue_id
|
||||
url_list = self.fetch_cached_alternate_cover_urls(issue_id)
|
||||
if url_list:
|
||||
ComicTalker.alt_url_list_fetch_complete(url_list)
|
||||
|
||||
issue_info = self.fetch_partial_issue_data(issue_id)
|
||||
issue_url = issue_info["site_detail_url"]
|
||||
|
||||
self.nam.finished.connect(self.async_fetch_alternate_cover_urls_complete)
|
||||
self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(str(issue_url))))
|
||||
|
||||
def async_fetch_alternate_cover_urls_complete(self, reply: QtNetwork.QNetworkReply) -> None:
|
||||
# read in the response
|
||||
html = str(reply.readAll())
|
||||
alt_cover_url_list = self.parse_out_alt_cover_urls(html)
|
||||
|
||||
# cache this alt cover URL list
|
||||
self.cache_alternate_cover_urls(cast(int, self.issue_id), alt_cover_url_list)
|
||||
|
||||
ComicTalker.alt_url_list_fetch_complete(alt_cover_url_list)
|
||||
# CLI expects a list returned
|
||||
return url_list
|
||||
|
||||
def repair_urls(self, issue_list: list[CVIssuesResults] | list[CVIssueDetailResults]) -> None:
|
||||
# make sure there are URLs for the image fields
|
||||
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import pytest
|
||||
|
||||
import comictalker.comiccacher
|
||||
from testing.comicdata import alt_covers, search_results
|
||||
from testing.comicdata import search_results
|
||||
|
||||
|
||||
def test_create_cache(settings):
|
||||
@ -20,12 +20,6 @@ def test_search_results(comic_cache):
|
||||
assert search_results == comic_cache.get_search_results("test", "test search")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("alt_cover", alt_covers)
|
||||
def test_alt_covers(comic_cache, alt_cover):
|
||||
comic_cache.add_alt_covers(**alt_cover, source_name="test")
|
||||
assert alt_cover["url_list"] == comic_cache.get_alt_covers(issue_id=alt_cover["issue_id"], source_name="test")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("volume_info", search_results)
|
||||
def test_volume_info(comic_cache, volume_info):
|
||||
comic_cache.add_volume_info(volume_record=volume_info, source_name="test")
|
||||
|
@ -54,6 +54,7 @@ def test_fetch_issues_by_volume_issue_num_and_year(comicvine_api):
|
||||
for r, e in zip(results, [cv_expected]):
|
||||
del r["image_thumb_url"]
|
||||
del r["image_url"]
|
||||
del r["alt_images_url"]
|
||||
assert r == e
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user