From c5ad75370f3164b20d5f66a271a0c8c02e561cd7 Mon Sep 17 00:00:00 2001 From: Mizaki Date: Mon, 24 Oct 2022 16:30:58 +0100 Subject: [PATCH] Work around having to scrape alt covers from CV. Use cache to get issue page url for scrape. --- comictaggerlib/autotagmatchwindow.py | 4 +- comictaggerlib/coverimagewidget.py | 7 ++-- comictaggerlib/issueidentifier.py | 2 +- comictaggerlib/issueselectionwindow.py | 2 +- comictaggerlib/matchselectionwindow.py | 4 +- comictalker/comiccacher.py | 57 ++++++++++++++++++++++---- comictalker/comictalker.py | 13 +++--- comictalker/talkerbase.py | 4 +- comictalker/talkers/comicvine.py | 38 +++++++++++++++-- tests/comiccacher_test.py | 2 +- tests/comicvinetalker_test.py | 1 - 11 files changed, 102 insertions(+), 32 deletions(-) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 9f37a98..e2245b4 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -179,9 +179,7 @@ class AutoTagMatchWindow(QtWidgets.QDialog): if prev is not None and prev.row() == curr.row(): return None - self.altCoverWidget.set_issue_details( - self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"] - ) + self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"]) if self.current_match()["description"] is None: self.teDescription.setText("") else: diff --git a/comictaggerlib/coverimagewidget.py b/comictaggerlib/coverimagewidget.py index dded97c..9984050 100644 --- a/comictaggerlib/coverimagewidget.py +++ b/comictaggerlib/coverimagewidget.py @@ -177,12 +177,11 @@ class CoverImageWidget(QtWidgets.QWidget): self.imageCount = 1 self.update_content() - def set_issue_details(self, issue_id: int, issue_url: str, image_url: str) -> None: + def set_issue_details(self, issue_id: int, image_url: str) -> None: if self.mode == CoverImageWidget.AltCoverMode: self.reset_widget() self.update_content() self.issue_id = issue_id - self.issue_url = issue_url ComicTalker.url_fetch_complete = self.sig.emit_url ComicTalker.url_fetch_complete(image_url, None) @@ -211,13 +210,13 @@ class CoverImageWidget(QtWidgets.QWidget): def start_alt_cover_search(self) -> None: - if self.issue_url is not None and self.issue_id is not None: + if self.issue_id is not None: # now we need to get the list of alt cover URLs self.label.setText("Searching for alt. covers...") # page URL should already be cached, so no need to defer ComicTalker.alt_url_list_fetch_complete = self.sig.emit_list - self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id), self.issue_url) + self.talker_api.async_fetch_alternate_cover_urls(utils.xlate(self.issue_id)) def alt_cover_url_list_fetch_complete(self, url_list: list[str]) -> None: if url_list: diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index 38f618d..cd97268 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -271,7 +271,7 @@ class IssueIdentifier: raise IssueIdentifierCancelled if use_remote_alternates: - alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id, page_url) + alt_img_url_list = self.talker_api.fetch_alternate_cover_urls(issue_id) for alt_url in alt_img_url_list: try: alt_url_image_data = ImageFetcher().fetch(alt_url, blocking=True) diff --git a/comictaggerlib/issueselectionwindow.py b/comictaggerlib/issueselectionwindow.py index 0f98b29..eae02db 100644 --- a/comictaggerlib/issueselectionwindow.py +++ b/comictaggerlib/issueselectionwindow.py @@ -180,7 +180,7 @@ class IssueSelectionWindow(QtWidgets.QDialog): for record in self.issue_list: if record["id"] == self.issue_id: self.issue_number = record["issue_number"] - self.coverWidget.set_issue_details(self.issue_id, record["site_detail_url"], record["image_url"]) + self.coverWidget.set_issue_details(self.issue_id, record["image_url"]) if record["description"] is None: self.teDescription.setText("") else: diff --git a/comictaggerlib/matchselectionwindow.py b/comictaggerlib/matchselectionwindow.py index 11f30af..a46a477 100644 --- a/comictaggerlib/matchselectionwindow.py +++ b/comictaggerlib/matchselectionwindow.py @@ -149,9 +149,7 @@ class MatchSelectionWindow(QtWidgets.QDialog): if prev is not None and prev.row() == curr.row(): return - self.altCoverWidget.set_issue_details( - self.current_match()["issue_id"], self.current_match()["page_url"], self.current_match()["image_url"] - ) + self.altCoverWidget.set_issue_details(self.current_match()["issue_id"], self.current_match()["image_url"]) if self.current_match()["description"] is None: self.teDescription.setText("") else: diff --git a/comictalker/comiccacher.py b/comictalker/comiccacher.py index 9a486d2..bfa6bae 100644 --- a/comictalker/comiccacher.py +++ b/comictalker/comiccacher.py @@ -248,7 +248,7 @@ class ComicCacher: } self.upsert(cur, "volumes", data) - def add_volume_issues_info(self, source_name: str, volume_id: int, volume_issues: list[ComicIssue]) -> None: + def add_volume_issues_info(self, source_name: str, volume_issues: list[ComicIssue]) -> None: con = lite.connect(self.db_file) with con: @@ -261,7 +261,7 @@ class ComicCacher: for issue in volume_issues: data = { "id": issue["id"], - "volume_id": volume_id, + "volume_id": issue["volume"]["id"], "source_name": source_name, "name": issue["name"], "issue_number": issue["issue_number"], @@ -275,7 +275,7 @@ class ComicCacher: } self.upsert(cur, "issues", data) - def get_volume_info(self, volume_id: int, source_name: str) -> ComicVolume | None: + def get_volume_info(self, volume_id: int, source_name: str, purge: bool = True) -> ComicVolume | None: result: ComicVolume | None = None con = lite.connect(self.db_file) @@ -283,9 +283,10 @@ class ComicCacher: cur = con.cursor() con.text_factory = str - # purge stale volume info - a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7) - cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)]) + if purge: + # purge stale volume info + a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7) + cur.execute("DELETE FROM Volumes WHERE timestamp < ?", [str(a_week_ago)]) # fetch cur.execute( @@ -314,7 +315,7 @@ class ComicCacher: def get_volume_issues_info(self, volume_id: int, source_name: str) -> list[ComicIssue]: # get_volume_info should only fail if someone is doing something weird - volume = self.get_volume_info(volume_id, source_name) or ComicVolume(id=volume_id, name="") + volume = self.get_volume_info(volume_id, source_name, False) or ComicVolume(id=volume_id, name="") con = lite.connect(self.db_file) with con: cur = con.cursor() @@ -355,6 +356,48 @@ class ComicCacher: return results + def get_issue_info(self, issue_id: int, source_name: str) -> ComicIssue: + con = lite.connect(self.db_file) + with con: + cur = con.cursor() + con.text_factory = str + + # purge stale issue info - probably issue data won't change + # much.... + a_week_ago = datetime.datetime.today() - datetime.timedelta(days=7) + cur.execute("DELETE FROM Issues WHERE timestamp < ?", [str(a_week_ago)]) + + cur.execute( + ( + "SELECT source_name,id,name,issue_number,site_detail_url,cover_date,image_url,thumb_url,description,aliases,volume_id" + " FROM Issues WHERE id=? AND source_name=?" + ), + [issue_id, source_name], + ) + row = cur.fetchone() + + record = None + + if row: + # get_volume_info should only fail if someone is doing something weird + volume = self.get_volume_info(row[10], source_name, False) or ComicVolume(id=row[10], name="") + + # now process the results + + record = ComicIssue( + id=row[1], + name=row[2], + issue_number=row[3], + site_detail_url=row[4], + cover_date=row[5], + image_url=row[6], + description=row[8], + volume=volume, + aliases=row[9], + ) + + return record + def upsert(self, cur: lite.Cursor, tablename: str, data: dict[str, Any]) -> None: """This does an insert if the given PK doesn't exist, and an update it if does diff --git a/comictalker/comictalker.py b/comictalker/comictalker.py index f4c13ad..e488845 100644 --- a/comictalker/comictalker.py +++ b/comictalker/comictalker.py @@ -123,9 +123,9 @@ class ComicTalker: ) # For issueidentifer - def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]: + def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]: try: - alt_covers = self.talker.fetch_alternate_cover_urls(issue_id, issue_url) + alt_covers = self.talker.fetch_alternate_cover_urls(issue_id) return alt_covers except NotImplementedError: logger.warning(f"{self.talker.source_details.name} has not implemented: 'fetch_alternate_cover_urls'") @@ -152,14 +152,17 @@ class ComicTalker: "The source has not implemented: 'fetch_issues_by_volume_issue_num_and_year'", ) - def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None: + def async_fetch_alternate_cover_urls( + self, + issue_id: int, + ) -> None: try: # TODO: Figure out async - url_list = self.fetch_alternate_cover_urls(issue_id, issue_url) + url_list = self.fetch_alternate_cover_urls(issue_id) ComicTalker.alt_url_list_fetch_complete(url_list) logger.info("Should be downloading alt image list: %s", url_list) return - self.talker.async_fetch_alternate_cover_urls(issue_id, issue_url) + self.talker.async_fetch_alternate_cover_urls(issue_id) except NotImplementedError: logger.warning(f"{self.talker.source_details.name} has not implemented: 'async_fetch_alternate_cover_urls'") diff --git a/comictalker/talkerbase.py b/comictalker/talkerbase.py index d18a035..a69f149 100644 --- a/comictalker/talkerbase.py +++ b/comictalker/talkerbase.py @@ -195,7 +195,7 @@ class TalkerBase: def fetch_comic_data(self, series_id: int, issue_number: str = "") -> GenericMetadata: raise NotImplementedError - def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]: + def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]: raise NotImplementedError def fetch_issues_by_volume_issue_num_and_year( @@ -203,5 +203,5 @@ class TalkerBase: ) -> list[ComicIssue]: raise NotImplementedError - def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None: + def async_fetch_alternate_cover_urls(self, issue_id: int) -> None: raise NotImplementedError diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 404e8a1..8f701d2 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -598,6 +598,31 @@ class ComicVineTalker(TalkerBase): return formatted_volume_results[0] + def fetch_partial_issue_data(self, issue_id: int) -> ComicIssue: + # before we search online, look in our cache, since we might already have this info + cvc = ComicCacher() + cached_issue_result = cvc.get_issue_info(issue_id, self.source_name) + + if cached_issue_result is not None: + return cached_issue_result + + params = { + "api_key": self.api_key, + "filter": f"id:{issue_id}", + "format": "json", + "field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases", + "offset": 0, + } + cv_response = self.get_cv_content(urljoin(self.api_base_url, "issues/"), params) + + issue_result = cast(CVIssuesResults, cv_response["results"]) + formatted_issue_results = self.format_issue_results([issue_result]) + + if formatted_issue_results: + cvc.add_volume_issues_info(self.source_name, formatted_issue_results) + + return formatted_issue_results[0] + def fetch_issues_by_volume(self, series_id: int) -> list[ComicIssue]: # before we search online, look in our cache, since we might already have this info cvc = ComicCacher() @@ -638,7 +663,7 @@ class ComicVineTalker(TalkerBase): # Format to expected output formatted_volume_issues_result = self.format_issue_results(volume_issues_result) - cvc.add_volume_issues_info(self.source_name, series_id, formatted_volume_issues_result) + cvc.add_volume_issues_info(self.source_name, formatted_volume_issues_result) return formatted_volume_issues_result @@ -937,11 +962,13 @@ class ComicVineTalker(TalkerBase): return newstring - def fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> list[str]: + def fetch_alternate_cover_urls(self, issue_id: int) -> list[str]: url_list = self.fetch_cached_alternate_cover_urls(issue_id) if url_list: return url_list + issue_info = self.fetch_partial_issue_data(issue_id) + issue_url = issue_info["site_detail_url"] # scrape the CV issue page URL to get the alternate cover URLs content = requests.get(issue_url, headers={"user-agent": "comictagger/" + ctversion.version}).text alt_cover_url_list = self.parse_out_alt_cover_urls(content) @@ -1008,9 +1035,9 @@ class ComicVineTalker(TalkerBase): ComicTalker.url_fetch_complete(image_url, thumb_url) - def async_fetch_alternate_cover_urls(self, issue_id: int, issue_url: str) -> None: + def async_fetch_alternate_cover_urls(self, issue_id: int) -> None: # bypass async for now - url_list = self.fetch_alternate_cover_urls(issue_id, issue_url) + url_list = self.fetch_alternate_cover_urls(issue_id) ComicTalker.alt_url_list_fetch_complete(url_list) return @@ -1020,6 +1047,9 @@ class ComicVineTalker(TalkerBase): if url_list: ComicTalker.alt_url_list_fetch_complete(url_list) + issue_info = self.fetch_partial_issue_data(issue_id) + issue_url = issue_info["site_detail_url"] + self.nam.finished.connect(self.async_fetch_alternate_cover_urls_complete) self.nam.get(QtNetwork.QNetworkRequest(QtCore.QUrl(str(issue_url)))) diff --git a/tests/comiccacher_test.py b/tests/comiccacher_test.py index e3cb272..8d25d2e 100644 --- a/tests/comiccacher_test.py +++ b/tests/comiccacher_test.py @@ -3,7 +3,7 @@ from __future__ import annotations import pytest import comictalker.comiccacher -from testing.comicdata import alt_covers, search_results, select_details +from testing.comicdata import alt_covers, search_results def test_create_cache(settings): diff --git a/tests/comicvinetalker_test.py b/tests/comicvinetalker_test.py index 9488ab9..160d55b 100644 --- a/tests/comicvinetalker_test.py +++ b/tests/comicvinetalker_test.py @@ -5,7 +5,6 @@ import pytest import comicapi.genericmetadata import comictalker.talkers.comicvine import testing.comicvine -from testing.comicdata import select_details def test_search_for_series(comicvine_api, comic_cache):