From 39a4a37d7c6b6b7601dbc0d7bcd5aaa4dc09a938 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Thu, 19 Sep 2024 19:03:30 -0700 Subject: [PATCH 1/2] Add tests --- comictalker/talkers/comicvine.py | 9 +++++---- testing/comicvine.py | 2 +- tests/comicvinetalker_test.py | 19 +++++++++++++++---- tests/conftest.py | 19 +++++++++++++------ tests/integration_test.py | 2 ++ 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 30b2250..abdafb8 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -393,11 +393,12 @@ class ComicVineTalker(ComicTalker): ), ) issue_found = True - else: + break + if not issues: needed_volumes.add(int(series_id)) # we got no results from cache, we definitely need to check online # If we didn't find the issue and we don't have all the issues we don't know if the issue exists, we have to check - if not issue_found and cvseries.get("count_of_issues") == len(issues): + if (not issue_found) and cvseries.get("count_of_issues") != len(issues): needed_volumes.add(int(series_id)) logger.debug("Found %d issues cached need %d issues", len(cached_results), len(needed_volumes)) @@ -405,9 +406,9 @@ class ComicVineTalker(ComicTalker): return cached_results series_filter = "" - for vid in series_id_list: + for vid in needed_volumes: series_filter += str(vid) + "|" - flt = f"volume:{series_filter},issue_number:{issue_number}" # CV uses volume to mean series + flt = f"volume:{series_filter[:-1]},issue_number:{issue_number}" # CV uses volume to mean series int_year = utils.xlate_int(year) if int_year is not None: diff --git a/testing/comicvine.py b/testing/comicvine.py index 54b2fa5..908420f 100644 --- a/testing/comicvine.py +++ b/testing/comicvine.py @@ -118,7 +118,7 @@ cv_volume_result: dict[str, Any] = { "results": { "aliases": None, "api_detail_url": "https://comicvine.gamespot.com/api/volume/4050-23437/", - "count_of_issues": 6, + "count_of_issues": 1, "date_added": "2008-10-16 05:25:47", "date_last_updated": "2012-01-18 17:21:57", "deck": None, diff --git a/tests/comicvinetalker_test.py b/tests/comicvinetalker_test.py index 8129c2a..aec57d4 100644 --- a/tests/comicvinetalker_test.py +++ b/tests/comicvinetalker_test.py @@ -46,13 +46,24 @@ def test_fetch_issue_data_by_issue_id(comicvine_api): assert result == testing.comicvine.cv_md -def test_fetch_issues_in_series_issue_num_and_year(comicvine_api): +def test_fetch_issues_in_series_issue_num_and_year(comicvine_api, cv_requests_get): results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "1", None) cv_expected = testing.comicvine.comic_issue_result.copy() - for r, e in zip(results, [cv_expected]): - assert r.series == e.series - assert r == e + assert results[0].series == cv_expected.series + assert results[0] == cv_expected + assert cv_requests_get.call_count == 2 + + results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "1", None) + + assert results[0].series == cv_expected.series + assert results[0] == cv_expected + assert cv_requests_get.call_count == 2 # verify caching works + + results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "2", None) + + assert not results + assert cv_requests_get.call_count == 2 # verify negative caching works cv_issue = [ diff --git a/tests/conftest.py b/tests/conftest.py index a1107bc..6ecba16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -70,7 +70,7 @@ def no_requests(monkeypatch) -> None: @pytest.fixture -def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comictalker.talkers.comicvine.ComicVineTalker: +def cv_requests_get(monkeypatch, cbz, comic_cache) -> unittest.mock.Mock: # Any arguments may be passed and mock_get() will always return our # mocked object, which only has the .json() method or None for invalid urls. @@ -88,16 +88,18 @@ def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comict return comicvine.MockResponse(cv_result) if args[0].startswith("https://comicvine.gamespot.com/api/issue/4000-140529"): return comicvine.MockResponse(comicvine.cv_issue_result) + flt = kwargs.get("params", {}).get("filter", "").split(",") if ( args[0].startswith("https://comicvine.gamespot.com/api/issues/") and "params" in kwargs and "filter" in kwargs["params"] - and "23437" in kwargs["params"]["filter"] + and "volume:23437" in flt ): - cv_list = make_list(comicvine.cv_issue_result) - for cv in cv_list["results"]: - comicvine.filter_field_list(cv, kwargs) - return comicvine.MockResponse(cv_list) + if "issue_number" not in kwargs["params"]["filter"] or ("issue_number:1" in flt): + cv_list = make_list(comicvine.cv_issue_result) + for cv in cv_list["results"]: + comicvine.filter_field_list(cv, kwargs) + return comicvine.MockResponse(cv_list) if ( args[0].startswith("https://comicvine.gamespot.com/api/search") and "params" in kwargs @@ -126,6 +128,11 @@ def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comict # apply the monkeypatch for requests.get to mock_get monkeypatch.setattr(requests, "get", m_get) + return m_get + + +@pytest.fixture +def comicvine_api(monkeypatch, cv_requests_get, mock_version, config) -> comictalker.talkers.comicvine.ComicVineTalker: monkeypatch.setattr(comictalker.talkers.comicvine, "custom_limiter", Limiter(RequestRate(100, 1))) monkeypatch.setattr(comictalker.talkers.comicvine, "default_limiter", Limiter(RequestRate(100, 1))) diff --git a/tests/integration_test.py b/tests/integration_test.py index 05b9323..decabb3 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -52,6 +52,8 @@ def test_save( # This is inserted here because otherwise several other tests # unrelated to comicvine need to be re-worked + # the comicvine response is mocked to 1 for caching tests and adding the remaining 5 issues is more work + md_saved.issue_count = 1 md_saved.credits.insert( 1, comicapi.genericmetadata.Credit( From 89dfec2363fc00acd9a5de7730cf31e37776842e Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Thu, 19 Sep 2024 19:13:03 -0700 Subject: [PATCH 2/2] ComicVine improvements Add more logging Add a 10 second timeout to all requests Log unhandled exceptions --- comictalker/talkers/comicvine.py | 52 ++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index abdafb8..d7fbe80 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -22,6 +22,7 @@ import json import logging import pathlib import time +from collections import defaultdict from typing import Any, Callable, Generic, TypeVar, cast from urllib.parse import urljoin @@ -185,6 +186,10 @@ class ComicVineTalker(ComicTalker): self.default_api_url = self.api_url = f"{self.website}/api/" self.default_api_key = self.api_key = "27431e6787042105bd3e47e169a624521f89f3a4" self.use_series_start_as_volume: bool = False + self.total_requests_made: dict[str, int] = defaultdict(int) + + def _log_total_requests(self) -> None: + logger.debug("Total requests made to cv: %s", dict(self.total_requests_made)) def register_settings(self, parser: settngs.Manager) -> None: parser.add_setting( @@ -226,24 +231,28 @@ class ComicVineTalker(ComicTalker): if not url: url = self.default_api_url try: - test_url = urljoin(url, "issue/1/") + test_url = urljoin(url, "team/1/") + self.total_requests_made[test_url] += 1 cv_response: CVResult = requests.get( # type: ignore[type-arg] test_url, headers={"user-agent": "comictagger/" + self.version}, params={ "api_key": settings[f"{self.id}_key"] or self.default_api_key, "format": "json", - "field_list": "name", }, + timeout=10, ).json() # Bogus request, but if the key is wrong, you get error 100: "Invalid API Key" if cv_response["status_code"] != 100: + self._log_total_requests() return "The API key is valid", True else: + self._log_total_requests() return "The API key is INVALID!", False except Exception: + self._log_total_requests() return "Failed to connect to the URL!", False def search_for_series( @@ -270,7 +279,9 @@ class ComicVineTalker(ComicTalker): cached_search_results = cvc.get_search_results(self.id, series_name) if len(cached_search_results) > 0: + logger.debug("Search for %s cached: True", repr(series_name)) return self._format_search_results([json.loads(x[0].data) for x in cached_search_results]) + logger.debug("Search for %s cached: False", repr(series_name)) params = { # CV uses volume to mean series "api_key": self.api_key, @@ -459,6 +470,7 @@ class ComicVineTalker(ComicTalker): return formatted_filtered_issues_result def fetch_comics(self, *, issue_ids: list[str]) -> list[GenericMetadata]: + logger.debug("Fetching comic IDs: %s", issue_ids) # before we search online, look in our cache, since we might already have this info cvc = ComicCacher(self.cache_folder, self.version) cached_results: list[GenericMetadata] = [] @@ -476,8 +488,10 @@ class ComicVineTalker(ComicTalker): else: needed_issues.append(int(issue_id)) # CV uses integers for it's IDs + logger.debug("Found %d issues cached need %d issues", len(cached_results), len(needed_issues)) if not needed_issues: return cached_results + issue_filter = "" for iid in needed_issues: issue_filter += str(iid) + "|" @@ -592,8 +606,8 @@ class ComicVineTalker(ComicTalker): if self.api_key == self.default_api_key: ratelimit_key = "cv" with self.limiter.ratelimit(ratelimit_key, delay=True): - cv_response: CVResult[T] = self._get_url_content(url, params) + cv_response: CVResult[T] = self._get_url_content(url, params) if cv_response["status_code"] != 1: logger.debug( f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." @@ -608,7 +622,10 @@ class ComicVineTalker(ComicTalker): for tries in range(1, 5): try: - resp = requests.get(url, params=params, headers={"user-agent": "comictagger/" + self.version}) + self.total_requests_made[url.removeprefix(self.api_url)] += 1 + resp = requests.get( + url, params=params, headers={"user-agent": "comictagger/" + self.version}, timeout=10 + ) if resp.status_code == 200: return resp.json() if resp.status_code == 500: @@ -618,6 +635,7 @@ class ComicVineTalker(ComicTalker): if resp.status_code in (requests.status_codes.codes.TOO_MANY_REQUESTS, TWITTER_TOO_MANY_REQUESTS): logger.info(f"{self.name} rate limit encountered. Waiting for 10 seconds\n") + self._log_total_requests() time.sleep(10) limit_counter += 1 if limit_counter > 3: @@ -640,8 +658,10 @@ class ComicVineTalker(ComicTalker): except json.JSONDecodeError as e: logger.debug(f"JSON decode error: {e}") raise TalkerDataError(self.name, 2, "ComicVine did not provide json") - - raise TalkerNetworkError(self.name, 5) + except Exception as e: + raise TalkerNetworkError(self.name, 5, str(e)) + else: + raise TalkerNetworkError(self.name, 5, "Unknown error occurred") def _format_search_results(self, search_results: list[CVSeries]) -> list[ComicSeries]: formatted_results = [] @@ -680,17 +700,20 @@ class ComicVineTalker(ComicTalker): ) def _fetch_issues_in_series(self, series_id: str) -> list[tuple[GenericMetadata, bool]]: + logger.debug("Fetching all issues in series: %s", series_id) # before we search online, look in our cache, since we might already have this info cvc = ComicCacher(self.cache_folder, self.version) - cached_series_issues_result = cvc.get_series_issues_info(series_id, self.id) + cached_results = cvc.get_series_issues_info(series_id, self.id) series = self._fetch_series_data(int(series_id))[0] - if len(cached_series_issues_result) == series.count_of_issues: - return [ - (self._map_comic_issue_to_metadata(json.loads(x[0].data), series), x[1]) - for x in cached_series_issues_result - ] + logger.debug( + "Found %d issues cached need %d issues", + len(cached_results), + cast(int, series.count_of_issues) - len(cached_results), + ) + if len(cached_results) == series.count_of_issues: + return [(self._map_comic_issue_to_metadata(json.loads(x[0].data), series), x[1]) for x in cached_results] params = { # CV uses volume to mean series "api_key": self.api_key, @@ -734,10 +757,12 @@ class ComicVineTalker(ComicTalker): return [(x, False) for x in formatted_series_issues_result] def _fetch_series_data(self, series_id: int) -> tuple[ComicSeries, bool]: + logger.debug("Fetching series info: %s", series_id) # before we search online, look in our cache, since we might already have this info cvc = ComicCacher(self.cache_folder, self.version) cached_series = cvc.get_series_info(str(series_id), self.id) + logger.debug("Series cached: %s", bool(cached_series)) if cached_series is not None: return (self._format_series(json.loads(cached_series[0].data)), cached_series[1]) @@ -759,6 +784,7 @@ class ComicVineTalker(ComicTalker): return self._format_series(series_results), True def _fetch_issue_data(self, series_id: int, issue_number: str) -> GenericMetadata: + logger.debug("Fetching issue by series ID: %s and issue number: %s", series_id, issue_number) issues_list_results = self._fetch_issues_in_series(str(series_id)) # Loop through issue list to find the required issue info @@ -779,10 +805,12 @@ class ComicVineTalker(ComicTalker): return GenericMetadata() def _fetch_issue_data_by_issue_id(self, issue_id: str) -> GenericMetadata: + logger.debug("Fetching issue by issue ID: %s", issue_id) # before we search online, look in our cache, since we might already have this info cvc = ComicCacher(self.cache_folder, self.version) cached_issue = cvc.get_issue_info(issue_id, self.id) + logger.debug("Issue cached: %s", bool(cached_issue and cached_issue[1])) if cached_issue and cached_issue[1]: return self._map_comic_issue_to_metadata( json.loads(cached_issue[0].data), self._fetch_series_data(int(cached_issue[0].series_id))[0]