Work around having to scrape alt covers from CV. Use cache to get issue page url for scrape.

This commit is contained in:
Mizaki 2022-10-24 16:30:58 +01:00
parent d23258f359
commit c5ad75370f
11 changed files with 102 additions and 32 deletions

View File

@ -179,9 +179,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
if prev is not None and prev.row() == curr.row():
return None
self.altCoverWidget.set_issue_details(
self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"]
)
self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"])
if self.current_match()["description"] is None:
self.teDescription.setText("")
else:

View File

@ -177,12 +177,11 @@ class CoverImageWidget(QtWidgets.QWidget):
self.imageCount = 1
self.update_content()
def set_issue_details(self, issue_id: int, issue_url: str, image_url: str) -> None:
def set_issue_details(self, issue_id: int, image_url: str) -> None:
if self.mode == CoverImageWidget.AltCoverMode:
self.reset_widget()
self.update_content()
self.issue_id = issue_id
self.issue_url = issue_url
ComicTalker.url_fetch_complete = self.sig.emit_url
ComicTalker.url_fetch_complete(image_url, None)
@ -211,13 +210,13 @@ class CoverImageWidget(QtWidgets.QWidget):
def start_alt_cover_search(self) -> None:
if self.issue_url is not None and self.issue_id is not None:
if self.issue_id is not None:
# now we need to get the list of alt cover URLs
self.label.setText("Searching for alt. covers...")
# page URL should already be cached, so no need to defer
ComicTalker.alt_url_list_fetch_complete = self.sig.emit_list
self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id), self.issue_url)
self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id))
def alt_cover_url_list_fetch_complete(self, url_list: list[str]) -> None:
if url_list:

View File

@ -271,7 +271,7 @@ class IssueIdentifier:
raise IssueIdentifierCancelled
if use_remote_alternates:
alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id, page_url)
alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id)
for alt_url in alt_img_url_list:
try:
alt_url_image_data = ImageFetcher().fetch(alt_url, blocking=True)

View File

@ -180,7 +180,7 @@ class IssueSelectionWindow(QtWidgets.QDialog):
for record in self.issue_list:
if record["id"] == self.issue_id:
self.issue_number = record["issue_number"]
self.coverWidget.set_issue_details(self.issue_id, record["site_detail_url"], record["image_url"])
self.coverWidget.set_issue_details(self.issue_id, record["image_url"])
if record["description"] is None:
self.teDescription.setText("")
else:

View File

@ -149,9 +149,7 @@ class MatchSelectionWindow(QtWidgets.QDialog):
if prev is not None and prev.row() == curr.row():
return
self.altCoverWidget.set_issue_details(
self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"]
)
self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"])
if self.current_match()["description"] is None:
self.teDescription.setText("")
else:

View File

@ -248,7 +248,7 @@ class ComicCacher:
}
self.upsert(cur, "volumes", data)
def add_volume_issues_info(self, source_name: str, volume_id: int, volume_issues: list[ComicIssue]) -> None:
def add_volume_issues_info(self, source_name: str, volume_issues: list[ComicIssue]) -> None:
con = lite.connect(self.db_file)
with con:
@ -261,7 +261,7 @@ class ComicCacher:
for issue in volume_issues:
data = {
"id": issue["id"],
"volume_id": volume_id,
"volume_id": issue["volume"]["id"],
"source_name": source_name,
"name": issue["name"],
"issue_number": issue["issue_number"],
@ -275,7 +275,7 @@ class ComicCacher:
}
self.upsert(cur, "issues", data)
def get_volume_info(self, volume_id: int, source_name: str) -> ComicVolume | None:
def get_volume_info(self, volume_id: int, source_name: str, purge: bool = True) -> ComicVolume | None:
result: ComicVolume | None = None
con = lite.connect(self.db_file)
@ -283,9 +283,10 @@ class ComicCacher:
cur = con.cursor()
con.text_factory = str
# purge stale volume info
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)])
if purge:
# purge stale volume info
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)])
# fetch
cur.execute(
@ -314,7 +315,7 @@ class ComicCacher:
def get_volume_issues_info(self, volume_id: int, source_name: str) -> list[ComicIssue]:
# get_volume_info should only fail if someone is doing something weird
volume = self.get_volume_info(volume_id, source_name) or ComicVolume(id=volume_id, name="")
volume = self.get_volume_info(volume_id, source_name, False) or ComicVolume(id=volume_id, name="")
con = lite.connect(self.db_file)
with con:
cur = con.cursor()
@ -355,6 +356,48 @@ class ComicCacher:
return results
def get_issue_info(self, issue_id: int, source_name: str) -> ComicIssue:
con = lite.connect(self.db_file)
with con:
cur = con.cursor()
con.text_factory = str
# purge stale issue info - probably issue data won't change
# much....
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
cur.execute("DELETE FROM Issues WHERE timestamp < ?", [str(a_week_ago)])
cur.execute(
(
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,volume_id"
" FROM Issues WHERE id=? AND source_name=?"
),
[issue_id, source_name],
)
row = cur.fetchone()
record = None
if row:
# get_volume_info should only fail if someone is doing something weird
volume = self.get_volume_info(row[10], source_name, False) or ComicVolume(id=row[10], name="")
# now process the results
record = ComicIssue(
id=row[1],
name=row[2],
issue_number=row[3],
site_detail_url=row[4],
cover_date=row[5],
image_url=row[6],
description=row[8],
volume=volume,
aliases=row[9],
)
return record
def upsert(self, cur: lite.Cursor, tablename: str, data: dict[str, Any]) -> None:
"""This does an insert if the given PK doesn't exist, and an
update it if does

View File

@ -123,9 +123,9 @@ class ComicTalker:
)
# For issueidentifer
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
try:
alt_covers = self.talker.fetch_alternate_cover_urls(issue_id, issue_url)
alt_covers = self.talker.fetch_alternate_cover_urls(issue_id)
return alt_covers
except NotImplementedError:
logger.warning(f"{self.talker.source_details.name} has not implemented: 'fetch_alternate_cover_urls'")
@ -152,14 +152,17 @@ class ComicTalker:
"The source has not implemented: 'fetch_issues_by_volume_issue_num_and_year'",
)
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
def async_fetch_alternate_cover_urls(
self,
issue_id: int,
) -> None:
try:
# TODO: Figure out async
url_list = self.fetch_alternate_cover_urls(issue_id, issue_url)
url_list = self.fetch_alternate_cover_urls(issue_id)
ComicTalker.alt_url_list_fetch_complete(url_list)
logger.info("Should be downloading alt image list: %s", url_list)
return
self.talker.async_fetch_alternate_cover_urls(issue_id, issue_url)
self.talker.async_fetch_alternate_cover_urls(issue_id)
except NotImplementedError:
logger.warning(f"{self.talker.source_details.name} has not implemented: 'async_fetch_alternate_cover_urls'")

View File

@ -195,7 +195,7 @@ class TalkerBase:
def fetch_comic_data(self, series_id: int, issue_number: str = "") -> GenericMetadata:
raise NotImplementedError
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
raise NotImplementedError
def fetch_issues_by_volume_issue_num_and_year(
@ -203,5 +203,5 @@ class TalkerBase:
) -> list[ComicIssue]:
raise NotImplementedError
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
raise NotImplementedError

View File

@ -598,6 +598,31 @@ class ComicVineTalker(TalkerBase):
return formatted_volume_results[0]
def fetch_partial_issue_data(self, issue_id: int) -> ComicIssue:
# before we search online, look in our cache, since we might already have this info
cvc = ComicCacher()
cached_issue_result = cvc.get_issue_info(issue_id, self.source_name)
if cached_issue_result is not None:
return cached_issue_result
params = {
"api_key": self.api_key,
"filter": f"id:{issue_id}",
"format": "json",
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
"offset": 0,
}
cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params)
issue_result = cast(CVIssuesResults, cv_response["results"])
formatted_issue_results = self.format_issue_results([issue_result])
if formatted_issue_results:
cvc.add_volume_issues_info(self.source_name, formatted_issue_results)
return formatted_issue_results[0]
def fetch_issues_by_volume(self, series_id: int) -> list[ComicIssue]:
# before we search online, look in our cache, since we might already have this info
cvc = ComicCacher()
@ -638,7 +663,7 @@ class ComicVineTalker(TalkerBase):
# Format to expected output
formatted_volume_issues_result = self.format_issue_results(volume_issues_result)
cvc.add_volume_issues_info(self.source_name, series_id, formatted_volume_issues_result)
cvc.add_volume_issues_info(self.source_name, formatted_volume_issues_result)
return formatted_volume_issues_result
@ -937,11 +962,13 @@ class ComicVineTalker(TalkerBase):
return newstring
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
url_list = self.fetch_cached_alternate_cover_urls(issue_id)
if url_list:
return url_list
issue_info = self.fetch_partial_issue_data(issue_id)
issue_url = issue_info["site_detail_url"]
# scrape the CV issue page URL to get the alternate cover URLs
content = requests.get(issue_url, headers={"user-agent": "comictagger/" + ctversion.version}).text
alt_cover_url_list = self.parse_out_alt_cover_urls(content)
@ -1008,9 +1035,9 @@ class ComicVineTalker(TalkerBase):
ComicTalker.url_fetch_complete(image_url, thumb_url)
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
# bypass async for now
url_list = self.fetch_alternate_cover_urls(issue_id, issue_url)
url_list = self.fetch_alternate_cover_urls(issue_id)
ComicTalker.alt_url_list_fetch_complete(url_list)
return
@ -1020,6 +1047,9 @@ class ComicVineTalker(TalkerBase):
if url_list:
ComicTalker.alt_url_list_fetch_complete(url_list)
issue_info = self.fetch_partial_issue_data(issue_id)
issue_url = issue_info["site_detail_url"]
self.nam.finished.connect(self.async_fetch_alternate_cover_urls_complete)
self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(str(issue_url))))

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import pytest
import comictalker.comiccacher
from testing.comicdata import alt_covers, search_results, select_details
from testing.comicdata import alt_covers, search_results
def test_create_cache(settings):

View File

@ -5,7 +5,6 @@ import pytest
import comicapi.genericmetadata
import comictalker.talkers.comicvine
import testing.comicvine
from testing.comicdata import select_details
def test_search_for_series(comicvine_api, comic_cache):