From 39a4a37d7c6b6b7601dbc0d7bcd5aaa4dc09a938 Mon Sep 17 00:00:00 2001
From: Timmy Welch <timmy@narnian.us>
Date: Thu, 19 Sep 2024 19:03:30 -0700
Subject: [PATCH 1/2] Add tests

---
 comictalker/talkers/comicvine.py |  9 +++++----
 testing/comicvine.py             |  2 +-
 tests/comicvinetalker_test.py    | 19 +++++++++++++++----
 tests/conftest.py                | 19 +++++++++++++------
 tests/integration_test.py        |  2 ++
 5 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py
index 30b2250..abdafb8 100644
--- a/comictalker/talkers/comicvine.py
+++ b/comictalker/talkers/comicvine.py
@@ -393,11 +393,12 @@ class ComicVineTalker(ComicTalker):
                         ),
                     )
                     issue_found = True
-            else:
+                    break
+            if not issues:
                 needed_volumes.add(int(series_id))  # we got no results from cache, we definitely need to check online
 
             # If we didn't find the issue and we don't have all the issues we don't know if the issue exists, we have to check
-            if not issue_found and cvseries.get("count_of_issues") == len(issues):
+            if (not issue_found) and cvseries.get("count_of_issues") != len(issues):
                 needed_volumes.add(int(series_id))
 
         logger.debug("Found %d issues cached need %d issues", len(cached_results), len(needed_volumes))
@@ -405,9 +406,9 @@ class ComicVineTalker(ComicTalker):
             return cached_results
 
         series_filter = ""
-        for vid in series_id_list:
+        for vid in needed_volumes:
             series_filter += str(vid) + "|"
-        flt = f"volume:{series_filter},issue_number:{issue_number}"  # CV uses volume to mean series
+        flt = f"volume:{series_filter[:-1]},issue_number:{issue_number}"  # CV uses volume to mean series
 
         int_year = utils.xlate_int(year)
         if int_year is not None:
diff --git a/testing/comicvine.py b/testing/comicvine.py
index 54b2fa5..908420f 100644
--- a/testing/comicvine.py
+++ b/testing/comicvine.py
@@ -118,7 +118,7 @@ cv_volume_result: dict[str, Any] = {
     "results": {
         "aliases": None,
         "api_detail_url": "https://comicvine.gamespot.com/api/volume/4050-23437/",
-        "count_of_issues": 6,
+        "count_of_issues": 1,
         "date_added": "2008-10-16 05:25:47",
         "date_last_updated": "2012-01-18 17:21:57",
         "deck": None,
diff --git a/tests/comicvinetalker_test.py b/tests/comicvinetalker_test.py
index 8129c2a..aec57d4 100644
--- a/tests/comicvinetalker_test.py
+++ b/tests/comicvinetalker_test.py
@@ -46,13 +46,24 @@ def test_fetch_issue_data_by_issue_id(comicvine_api):
     assert result == testing.comicvine.cv_md
 
 
-def test_fetch_issues_in_series_issue_num_and_year(comicvine_api):
+def test_fetch_issues_in_series_issue_num_and_year(comicvine_api, cv_requests_get):
     results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "1", None)
     cv_expected = testing.comicvine.comic_issue_result.copy()
 
-    for r, e in zip(results, [cv_expected]):
-        assert r.series == e.series
-        assert r == e
+    assert results[0].series == cv_expected.series
+    assert results[0] == cv_expected
+    assert cv_requests_get.call_count == 2
+
+    results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "1", None)
+
+    assert results[0].series == cv_expected.series
+    assert results[0] == cv_expected
+    assert cv_requests_get.call_count == 2  # verify caching works
+
+    results = comicvine_api.fetch_issues_by_series_issue_num_and_year([23437], "2", None)
+
+    assert not results
+    assert cv_requests_get.call_count == 2  # verify negative caching works
 
 
 cv_issue = [
diff --git a/tests/conftest.py b/tests/conftest.py
index a1107bc..6ecba16 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -70,7 +70,7 @@ def no_requests(monkeypatch) -> None:
 
 
 @pytest.fixture
-def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comictalker.talkers.comicvine.ComicVineTalker:
+def cv_requests_get(monkeypatch, cbz, comic_cache) -> unittest.mock.Mock:
     # Any arguments may be passed and mock_get() will always return our
     # mocked object, which only has the .json() method or None for invalid urls.
 
@@ -88,16 +88,18 @@ def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comict
                 return comicvine.MockResponse(cv_result)
             if args[0].startswith("https://comicvine.gamespot.com/api/issue/4000-140529"):
                 return comicvine.MockResponse(comicvine.cv_issue_result)
+            flt = kwargs.get("params", {}).get("filter", "").split(",")
             if (
                 args[0].startswith("https://comicvine.gamespot.com/api/issues/")
                 and "params" in kwargs
                 and "filter" in kwargs["params"]
-                and "23437" in kwargs["params"]["filter"]
+                and "volume:23437" in flt
             ):
-                cv_list = make_list(comicvine.cv_issue_result)
-                for cv in cv_list["results"]:
-                    comicvine.filter_field_list(cv, kwargs)
-                return comicvine.MockResponse(cv_list)
+                if "issue_number" not in kwargs["params"]["filter"] or ("issue_number:1" in flt):
+                    cv_list = make_list(comicvine.cv_issue_result)
+                    for cv in cv_list["results"]:
+                        comicvine.filter_field_list(cv, kwargs)
+                    return comicvine.MockResponse(cv_list)
             if (
                 args[0].startswith("https://comicvine.gamespot.com/api/search")
                 and "params" in kwargs
@@ -126,6 +128,11 @@ def comicvine_api(monkeypatch, cbz, comic_cache, mock_version, config) -> comict
 
     # apply the monkeypatch for requests.get to mock_get
     monkeypatch.setattr(requests, "get", m_get)
+    return m_get
+
+
+@pytest.fixture
+def comicvine_api(monkeypatch, cv_requests_get, mock_version, config) -> comictalker.talkers.comicvine.ComicVineTalker:
     monkeypatch.setattr(comictalker.talkers.comicvine, "custom_limiter", Limiter(RequestRate(100, 1)))
     monkeypatch.setattr(comictalker.talkers.comicvine, "default_limiter", Limiter(RequestRate(100, 1)))
 
diff --git a/tests/integration_test.py b/tests/integration_test.py
index 05b9323..decabb3 100644
--- a/tests/integration_test.py
+++ b/tests/integration_test.py
@@ -52,6 +52,8 @@ def test_save(
 
     # This is inserted here because otherwise several other tests
     # unrelated to comicvine need to be re-worked
+    # the comicvine response is mocked to 1 for caching tests and adding the remaining 5 issues is more work
+    md_saved.issue_count = 1
     md_saved.credits.insert(
         1,
         comicapi.genericmetadata.Credit(

From 89dfec2363fc00acd9a5de7730cf31e37776842e Mon Sep 17 00:00:00 2001
From: Timmy Welch <timmy@narnian.us>
Date: Thu, 19 Sep 2024 19:13:03 -0700
Subject: [PATCH 2/2] ComicVine improvements

Add more logging
Add a 10 second timeout to all requests
Log unhandled exceptions
---
 comictalker/talkers/comicvine.py | 52 ++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 12 deletions(-)

diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py
index abdafb8..d7fbe80 100644
--- a/comictalker/talkers/comicvine.py
+++ b/comictalker/talkers/comicvine.py
@@ -22,6 +22,7 @@ import json
 import logging
 import pathlib
 import time
+from collections import defaultdict
 from typing import Any, Callable, Generic, TypeVar, cast
 from urllib.parse import urljoin
 
@@ -185,6 +186,10 @@ class ComicVineTalker(ComicTalker):
         self.default_api_url = self.api_url = f"{self.website}/api/"
         self.default_api_key = self.api_key = "27431e6787042105bd3e47e169a624521f89f3a4"
         self.use_series_start_as_volume: bool = False
+        self.total_requests_made: dict[str, int] = defaultdict(int)
+
+    def _log_total_requests(self) -> None:
+        logger.debug("Total requests made to cv: %s", dict(self.total_requests_made))
 
     def register_settings(self, parser: settngs.Manager) -> None:
         parser.add_setting(
@@ -226,24 +231,28 @@ class ComicVineTalker(ComicTalker):
         if not url:
             url = self.default_api_url
         try:
-            test_url = urljoin(url, "issue/1/")
+            test_url = urljoin(url, "team/1/")
 
+            self.total_requests_made[test_url] += 1
             cv_response: CVResult = requests.get(  # type: ignore[type-arg]
                 test_url,
                 headers={"user-agent": "comictagger/" + self.version},
                 params={
                     "api_key": settings[f"{self.id}_key"] or self.default_api_key,
                     "format": "json",
-                    "field_list": "name",
                 },
+                timeout=10,
             ).json()
 
             # Bogus request, but if the key is wrong, you get error 100: "Invalid API Key"
             if cv_response["status_code"] != 100:
+                self._log_total_requests()
                 return "The API key is valid", True
             else:
+                self._log_total_requests()
                 return "The API key is INVALID!", False
         except Exception:
+            self._log_total_requests()
             return "Failed to connect to the URL!", False
 
     def search_for_series(
@@ -270,7 +279,9 @@ class ComicVineTalker(ComicTalker):
             cached_search_results = cvc.get_search_results(self.id, series_name)
 
             if len(cached_search_results) > 0:
+                logger.debug("Search for %s cached: True", repr(series_name))
                 return self._format_search_results([json.loads(x[0].data) for x in cached_search_results])
+        logger.debug("Search for %s cached: False", repr(series_name))
 
         params = {  # CV uses volume to mean series
             "api_key": self.api_key,
@@ -459,6 +470,7 @@ class ComicVineTalker(ComicTalker):
         return formatted_filtered_issues_result
 
     def fetch_comics(self, *, issue_ids: list[str]) -> list[GenericMetadata]:
+        logger.debug("Fetching comic IDs: %s", issue_ids)
         # before we search online, look in our cache, since we might already have this info
         cvc = ComicCacher(self.cache_folder, self.version)
         cached_results: list[GenericMetadata] = []
@@ -476,8 +488,10 @@ class ComicVineTalker(ComicTalker):
             else:
                 needed_issues.append(int(issue_id))  # CV uses integers for it's IDs
 
+        logger.debug("Found %d issues cached need %d issues", len(cached_results), len(needed_issues))
         if not needed_issues:
             return cached_results
+
         issue_filter = ""
         for iid in needed_issues:
             issue_filter += str(iid) + "|"
@@ -592,8 +606,8 @@ class ComicVineTalker(ComicTalker):
         if self.api_key == self.default_api_key:
             ratelimit_key = "cv"
         with self.limiter.ratelimit(ratelimit_key, delay=True):
-            cv_response: CVResult[T] = self._get_url_content(url, params)
 
+            cv_response: CVResult[T] = self._get_url_content(url, params)
             if cv_response["status_code"] != 1:
                 logger.debug(
                     f"{self.name} query failed with error #{cv_response['status_code']}:  [{cv_response['error']}]."
@@ -608,7 +622,10 @@ class ComicVineTalker(ComicTalker):
 
         for tries in range(1, 5):
             try:
-                resp = requests.get(url, params=params, headers={"user-agent": "comictagger/" + self.version})
+                self.total_requests_made[url.removeprefix(self.api_url)] += 1
+                resp = requests.get(
+                    url, params=params, headers={"user-agent": "comictagger/" + self.version}, timeout=10
+                )
                 if resp.status_code == 200:
                     return resp.json()
                 if resp.status_code == 500:
@@ -618,6 +635,7 @@ class ComicVineTalker(ComicTalker):
 
                 if resp.status_code in (requests.status_codes.codes.TOO_MANY_REQUESTS, TWITTER_TOO_MANY_REQUESTS):
                     logger.info(f"{self.name} rate limit encountered. Waiting for 10 seconds\n")
+                    self._log_total_requests()
                     time.sleep(10)
                     limit_counter += 1
                     if limit_counter > 3:
@@ -640,8 +658,10 @@ class ComicVineTalker(ComicTalker):
             except json.JSONDecodeError as e:
                 logger.debug(f"JSON decode error: {e}")
                 raise TalkerDataError(self.name, 2, "ComicVine did not provide json")
-
-        raise TalkerNetworkError(self.name, 5)
+            except Exception as e:
+                raise TalkerNetworkError(self.name, 5, str(e))
+        else:
+            raise TalkerNetworkError(self.name, 5, "Unknown error occurred")
 
     def _format_search_results(self, search_results: list[CVSeries]) -> list[ComicSeries]:
         formatted_results = []
@@ -680,17 +700,20 @@ class ComicVineTalker(ComicTalker):
         )
 
     def _fetch_issues_in_series(self, series_id: str) -> list[tuple[GenericMetadata, bool]]:
+        logger.debug("Fetching all issues in series: %s", series_id)
         # before we search online, look in our cache, since we might already have this info
         cvc = ComicCacher(self.cache_folder, self.version)
-        cached_series_issues_result = cvc.get_series_issues_info(series_id, self.id)
+        cached_results = cvc.get_series_issues_info(series_id, self.id)
 
         series = self._fetch_series_data(int(series_id))[0]
 
-        if len(cached_series_issues_result) == series.count_of_issues:
-            return [
-                (self._map_comic_issue_to_metadata(json.loads(x[0].data), series), x[1])
-                for x in cached_series_issues_result
-            ]
+        logger.debug(
+            "Found %d issues cached need %d issues",
+            len(cached_results),
+            cast(int, series.count_of_issues) - len(cached_results),
+        )
+        if len(cached_results) == series.count_of_issues:
+            return [(self._map_comic_issue_to_metadata(json.loads(x[0].data), series), x[1]) for x in cached_results]
 
         params = {  # CV uses volume to mean series
             "api_key": self.api_key,
@@ -734,10 +757,12 @@ class ComicVineTalker(ComicTalker):
         return [(x, False) for x in formatted_series_issues_result]
 
     def _fetch_series_data(self, series_id: int) -> tuple[ComicSeries, bool]:
+        logger.debug("Fetching series info: %s", series_id)
         # before we search online, look in our cache, since we might already have this info
         cvc = ComicCacher(self.cache_folder, self.version)
         cached_series = cvc.get_series_info(str(series_id), self.id)
 
+        logger.debug("Series cached: %s", bool(cached_series))
         if cached_series is not None:
             return (self._format_series(json.loads(cached_series[0].data)), cached_series[1])
 
@@ -759,6 +784,7 @@ class ComicVineTalker(ComicTalker):
         return self._format_series(series_results), True
 
     def _fetch_issue_data(self, series_id: int, issue_number: str) -> GenericMetadata:
+        logger.debug("Fetching issue by series ID: %s and issue number: %s", series_id, issue_number)
         issues_list_results = self._fetch_issues_in_series(str(series_id))
 
         # Loop through issue list to find the required issue info
@@ -779,10 +805,12 @@ class ComicVineTalker(ComicTalker):
         return GenericMetadata()
 
     def _fetch_issue_data_by_issue_id(self, issue_id: str) -> GenericMetadata:
+        logger.debug("Fetching issue by issue ID: %s", issue_id)
         # before we search online, look in our cache, since we might already have this info
         cvc = ComicCacher(self.cache_folder, self.version)
         cached_issue = cvc.get_issue_info(issue_id, self.id)
 
+        logger.debug("Issue cached: %s", bool(cached_issue and cached_issue[1]))
         if cached_issue and cached_issue[1]:
             return self._map_comic_issue_to_metadata(
                 json.loads(cached_issue[0].data), self._fetch_series_data(int(cached_issue[0].series_id))[0]