Work around having to scrape alt covers from CV. Use cache to get issue page url for scrape.
This commit is contained in:
parent
d23258f359
commit
c5ad75370f
@ -179,9 +179,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
|
||||
if prev is not None and prev.row() == curr.row():
|
||||
return None
|
||||
|
||||
self.altCoverWidget.set_issue_details(
|
||||
self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"]
|
||||
)
|
||||
self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"])
|
||||
if self.current_match()["description"] is None:
|
||||
self.teDescription.setText("")
|
||||
else:
|
||||
|
@ -177,12 +177,11 @@ class CoverImageWidget(QtWidgets.QWidget):
|
||||
self.imageCount = 1
|
||||
self.update_content()
|
||||
|
||||
def set_issue_details(self, issue_id: int, issue_url: str, image_url: str) -> None:
|
||||
def set_issue_details(self, issue_id: int, image_url: str) -> None:
|
||||
if self.mode == CoverImageWidget.AltCoverMode:
|
||||
self.reset_widget()
|
||||
self.update_content()
|
||||
self.issue_id = issue_id
|
||||
self.issue_url = issue_url
|
||||
|
||||
ComicTalker.url_fetch_complete = self.sig.emit_url
|
||||
ComicTalker.url_fetch_complete(image_url, None)
|
||||
@ -211,13 +210,13 @@ class CoverImageWidget(QtWidgets.QWidget):
|
||||
|
||||
def start_alt_cover_search(self) -> None:
|
||||
|
||||
if self.issue_url is not None and self.issue_id is not None:
|
||||
if self.issue_id is not None:
|
||||
# now we need to get the list of alt cover URLs
|
||||
self.label.setText("Searching for alt. covers...")
|
||||
|
||||
# page URL should already be cached, so no need to defer
|
||||
ComicTalker.alt_url_list_fetch_complete = self.sig.emit_list
|
||||
self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id), self.issue_url)
|
||||
self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id))
|
||||
|
||||
def alt_cover_url_list_fetch_complete(self, url_list: list[str]) -> None:
|
||||
if url_list:
|
||||
|
@ -271,7 +271,7 @@ class IssueIdentifier:
|
||||
raise IssueIdentifierCancelled
|
||||
|
||||
if use_remote_alternates:
|
||||
alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id, page_url)
|
||||
alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id)
|
||||
for alt_url in alt_img_url_list:
|
||||
try:
|
||||
alt_url_image_data = ImageFetcher().fetch(alt_url, blocking=True)
|
||||
|
@ -180,7 +180,7 @@ class IssueSelectionWindow(QtWidgets.QDialog):
|
||||
for record in self.issue_list:
|
||||
if record["id"] == self.issue_id:
|
||||
self.issue_number = record["issue_number"]
|
||||
self.coverWidget.set_issue_details(self.issue_id, record["site_detail_url"], record["image_url"])
|
||||
self.coverWidget.set_issue_details(self.issue_id, record["image_url"])
|
||||
if record["description"] is None:
|
||||
self.teDescription.setText("")
|
||||
else:
|
||||
|
@ -149,9 +149,7 @@ class MatchSelectionWindow(QtWidgets.QDialog):
|
||||
if prev is not None and prev.row() == curr.row():
|
||||
return
|
||||
|
||||
self.altCoverWidget.set_issue_details(
|
||||
self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"]
|
||||
)
|
||||
self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"])
|
||||
if self.current_match()["description"] is None:
|
||||
self.teDescription.setText("")
|
||||
else:
|
||||
|
@ -248,7 +248,7 @@ class ComicCacher:
|
||||
}
|
||||
self.upsert(cur, "volumes", data)
|
||||
|
||||
def add_volume_issues_info(self, source_name: str, volume_id: int, volume_issues: list[ComicIssue]) -> None:
|
||||
def add_volume_issues_info(self, source_name: str, volume_issues: list[ComicIssue]) -> None:
|
||||
con = lite.connect(self.db_file)
|
||||
|
||||
with con:
|
||||
@ -261,7 +261,7 @@ class ComicCacher:
|
||||
for issue in volume_issues:
|
||||
data = {
|
||||
"id": issue["id"],
|
||||
"volume_id": volume_id,
|
||||
"volume_id": issue["volume"]["id"],
|
||||
"source_name": source_name,
|
||||
"name": issue["name"],
|
||||
"issue_number": issue["issue_number"],
|
||||
@ -275,7 +275,7 @@ class ComicCacher:
|
||||
}
|
||||
self.upsert(cur, "issues", data)
|
||||
|
||||
def get_volume_info(self, volume_id: int, source_name: str) -> ComicVolume | None:
|
||||
def get_volume_info(self, volume_id: int, source_name: str, purge: bool = True) -> ComicVolume | None:
|
||||
result: ComicVolume | None = None
|
||||
|
||||
con = lite.connect(self.db_file)
|
||||
@ -283,9 +283,10 @@ class ComicCacher:
|
||||
cur = con.cursor()
|
||||
con.text_factory = str
|
||||
|
||||
# purge stale volume info
|
||||
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
|
||||
cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)])
|
||||
if purge:
|
||||
# purge stale volume info
|
||||
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
|
||||
cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)])
|
||||
|
||||
# fetch
|
||||
cur.execute(
|
||||
@ -314,7 +315,7 @@ class ComicCacher:
|
||||
|
||||
def get_volume_issues_info(self, volume_id: int, source_name: str) -> list[ComicIssue]:
|
||||
# get_volume_info should only fail if someone is doing something weird
|
||||
volume = self.get_volume_info(volume_id, source_name) or ComicVolume(id=volume_id, name="")
|
||||
volume = self.get_volume_info(volume_id, source_name, False) or ComicVolume(id=volume_id, name="")
|
||||
con = lite.connect(self.db_file)
|
||||
with con:
|
||||
cur = con.cursor()
|
||||
@ -355,6 +356,48 @@ class ComicCacher:
|
||||
|
||||
return results
|
||||
|
||||
def get_issue_info(self, issue_id: int, source_name: str) -> ComicIssue:
|
||||
con = lite.connect(self.db_file)
|
||||
with con:
|
||||
cur = con.cursor()
|
||||
con.text_factory = str
|
||||
|
||||
# purge stale issue info - probably issue data won't change
|
||||
# much....
|
||||
a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7)
|
||||
cur.execute("DELETE FROM Issues WHERE timestamp < ?", [str(a_week_ago)])
|
||||
|
||||
cur.execute(
|
||||
(
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,volume_id"
|
||||
" FROM Issues WHERE id=? AND source_name=?"
|
||||
),
|
||||
[issue_id, source_name],
|
||||
)
|
||||
row = cur.fetchone()
|
||||
|
||||
record = None
|
||||
|
||||
if row:
|
||||
# get_volume_info should only fail if someone is doing something weird
|
||||
volume = self.get_volume_info(row[10], source_name, False) or ComicVolume(id=row[10], name="")
|
||||
|
||||
# now process the results
|
||||
|
||||
record = ComicIssue(
|
||||
id=row[1],
|
||||
name=row[2],
|
||||
issue_number=row[3],
|
||||
site_detail_url=row[4],
|
||||
cover_date=row[5],
|
||||
image_url=row[6],
|
||||
description=row[8],
|
||||
volume=volume,
|
||||
aliases=row[9],
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
def upsert(self, cur: lite.Cursor, tablename: str, data: dict[str, Any]) -> None:
|
||||
"""This does an insert if the given PK doesn't exist, and an
|
||||
update it if does
|
||||
|
@ -123,9 +123,9 @@ class ComicTalker:
|
||||
)
|
||||
|
||||
# For issueidentifer
|
||||
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
|
||||
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
try:
|
||||
alt_covers = self.talker.fetch_alternate_cover_urls(issue_id, issue_url)
|
||||
alt_covers = self.talker.fetch_alternate_cover_urls(issue_id)
|
||||
return alt_covers
|
||||
except NotImplementedError:
|
||||
logger.warning(f"{self.talker.source_details.name} has not implemented: 'fetch_alternate_cover_urls'")
|
||||
@ -152,14 +152,17 @@ class ComicTalker:
|
||||
"The source has not implemented: 'fetch_issues_by_volume_issue_num_and_year'",
|
||||
)
|
||||
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
|
||||
def async_fetch_alternate_cover_urls(
|
||||
self,
|
||||
issue_id: int,
|
||||
) -> None:
|
||||
try:
|
||||
# TODO: Figure out async
|
||||
url_list = self.fetch_alternate_cover_urls(issue_id, issue_url)
|
||||
url_list = self.fetch_alternate_cover_urls(issue_id)
|
||||
ComicTalker.alt_url_list_fetch_complete(url_list)
|
||||
logger.info("Should be downloading alt image list: %s", url_list)
|
||||
return
|
||||
|
||||
self.talker.async_fetch_alternate_cover_urls(issue_id, issue_url)
|
||||
self.talker.async_fetch_alternate_cover_urls(issue_id)
|
||||
except NotImplementedError:
|
||||
logger.warning(f"{self.talker.source_details.name} has not implemented: 'async_fetch_alternate_cover_urls'")
|
||||
|
@ -195,7 +195,7 @@ class TalkerBase:
|
||||
def fetch_comic_data(self, series_id: int, issue_number: str = "") -> GenericMetadata:
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
|
||||
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_issues_by_volume_issue_num_and_year(
|
||||
@ -203,5 +203,5 @@ class TalkerBase:
|
||||
) -> list[ComicIssue]:
|
||||
raise NotImplementedError
|
||||
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
|
||||
raise NotImplementedError
|
||||
|
@ -598,6 +598,31 @@ class ComicVineTalker(TalkerBase):
|
||||
|
||||
return formatted_volume_results[0]
|
||||
|
||||
def fetch_partial_issue_data(self, issue_id: int) -> ComicIssue:
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
cvc = ComicCacher()
|
||||
cached_issue_result = cvc.get_issue_info(issue_id, self.source_name)
|
||||
|
||||
if cached_issue_result is not None:
|
||||
return cached_issue_result
|
||||
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"filter": f"id:{issue_id}",
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"offset": 0,
|
||||
}
|
||||
cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params)
|
||||
|
||||
issue_result = cast(CVIssuesResults, cv_response["results"])
|
||||
formatted_issue_results = self.format_issue_results([issue_result])
|
||||
|
||||
if formatted_issue_results:
|
||||
cvc.add_volume_issues_info(self.source_name, formatted_issue_results)
|
||||
|
||||
return formatted_issue_results[0]
|
||||
|
||||
def fetch_issues_by_volume(self, series_id: int) -> list[ComicIssue]:
|
||||
# before we search online, look in our cache, since we might already have this info
|
||||
cvc = ComicCacher()
|
||||
@ -638,7 +663,7 @@ class ComicVineTalker(TalkerBase):
|
||||
# Format to expected output
|
||||
formatted_volume_issues_result = self.format_issue_results(volume_issues_result)
|
||||
|
||||
cvc.add_volume_issues_info(self.source_name, series_id, formatted_volume_issues_result)
|
||||
cvc.add_volume_issues_info(self.source_name, formatted_volume_issues_result)
|
||||
|
||||
return formatted_volume_issues_result
|
||||
|
||||
@ -937,11 +962,13 @@ class ComicVineTalker(TalkerBase):
|
||||
|
||||
return newstring
|
||||
|
||||
def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]:
|
||||
def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]:
|
||||
url_list = self.fetch_cached_alternate_cover_urls(issue_id)
|
||||
if url_list:
|
||||
return url_list
|
||||
|
||||
issue_info = self.fetch_partial_issue_data(issue_id)
|
||||
issue_url = issue_info["site_detail_url"]
|
||||
# scrape the CV issue page URL to get the alternate cover URLs
|
||||
content = requests.get(issue_url, headers={"user-agent": "comictagger/" + ctversion.version}).text
|
||||
alt_cover_url_list = self.parse_out_alt_cover_urls(content)
|
||||
@ -1008,9 +1035,9 @@ class ComicVineTalker(TalkerBase):
|
||||
|
||||
ComicTalker.url_fetch_complete(image_url, thumb_url)
|
||||
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None:
|
||||
def async_fetch_alternate_cover_urls(self, issue_id: int) -> None:
|
||||
# bypass async for now
|
||||
url_list = self.fetch_alternate_cover_urls(issue_id, issue_url)
|
||||
url_list = self.fetch_alternate_cover_urls(issue_id)
|
||||
ComicTalker.alt_url_list_fetch_complete(url_list)
|
||||
return
|
||||
|
||||
@ -1020,6 +1047,9 @@ class ComicVineTalker(TalkerBase):
|
||||
if url_list:
|
||||
ComicTalker.alt_url_list_fetch_complete(url_list)
|
||||
|
||||
issue_info = self.fetch_partial_issue_data(issue_id)
|
||||
issue_url = issue_info["site_detail_url"]
|
||||
|
||||
self.nam.finished.connect(self.async_fetch_alternate_cover_urls_complete)
|
||||
self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(str(issue_url))))
|
||||
|
||||
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import pytest
|
||||
|
||||
import comictalker.comiccacher
|
||||
from testing.comicdata import alt_covers, search_results, select_details
|
||||
from testing.comicdata import alt_covers, search_results
|
||||
|
||||
|
||||
def test_create_cache(settings):
|
||||
|
@ -5,7 +5,6 @@ import pytest
|
||||
import comicapi.genericmetadata
|
||||
import comictalker.talkers.comicvine
|
||||
import testing.comicvine
|
||||
from testing.comicdata import select_details
|
||||
|
||||
|
||||
def test_search_for_series(comicvine_api, comic_cache):
|
||||
|
Loading…
Reference in New Issue
Block a user