From 7a91acb60ca1ea0e18c1f3157d876c8c6ef3733d Mon Sep 17 00:00:00 2001 From: Mizaki Date: Tue, 20 Jun 2023 22:28:29 +0100 Subject: [PATCH 1/5] Add pyrate-limiter and apply CV suggested rate limit --- comictalker/talkers/comicvine.py | 31 +++++++++++++++++++------------ setup.cfg | 1 + 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 337828c..d53e290 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -26,6 +26,7 @@ from urllib.parse import urljoin import requests import settngs +from pyrate_limiter import Limiter, RequestRate from typing_extensions import Required, TypedDict import comictalker.talker_utils as talker_utils @@ -152,6 +153,10 @@ class CVResult(TypedDict, Generic[T]): CV_STATUS_RATELIMIT = 107 +# https://comicvine.gamespot.com/forums/api-developers-2334/api-rate-limiting-1746419/ +# "Space out your requests so AT LEAST one second passes between each and you can make requests all day." +limiter = Limiter(RequestRate(1, 2)) + class ComicVineTalker(ComicTalker): name: str = "Comic Vine" @@ -428,27 +433,29 @@ class ComicVineTalker(ComicTalker): return formatted_filtered_issues_result + @limiter.ratelimit("cv", delay=True) def _get_cv_content(self, url: str, params: dict[str, Any]) -> CVResult: """ - Get the content from the CV server. If we're in "wait mode" and status code is a rate limit error - sleep for a bit and retry. + Get the content from the CV server. We should never hit a rate limit but will cover it anyway. """ - total_time_waited = 0 - limit_wait_time = 1 counter = 0 - wait_times = [1, 2, 3, 4] while True: cv_response: CVResult = self._get_url_content(url, params) - if self.wait_on_ratelimit and cv_response["status_code"] == CV_STATUS_RATELIMIT: - logger.info(f"Rate limit encountered. Waiting for {limit_wait_time} minutes\n") - time.sleep(limit_wait_time * 60) - total_time_waited += limit_wait_time - limit_wait_time = wait_times[counter] + + if cv_response["status_code"] == CV_STATUS_RATELIMIT: + logger.info("Rate limit encountered. Waiting for 10 seconds\n") + time.sleep(10) if counter < 3: counter += 1 - # don't wait much more than 20 minutes - if total_time_waited < self.wait_on_ratelimit_time: continue + # Tried 3 times, inform user to check CV website. + logger.error("Rate limit error. Exceeded 3 retires.") + raise TalkerNetworkError( + self.name, + 3, + "Rate Limit Error: Check your current API usage limit at https://comicvine.gamespot.com/api/", + ) + if cv_response["status_code"] != 1: logger.debug( f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." diff --git a/setup.cfg b/setup.cfg index 4fdc2b6..8f51483 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ install_requires = pathvalidate pillow>=9.1.0,<10 pycountry + pyrate-limiter rapidfuzz>=2.12.0 requests==2.* settngs==0.7.1 From af5a0e50e001ccf130acc4a7fe06a3baa0782574 Mon Sep 17 00:00:00 2001 From: Mizaki Date: Wed, 21 Jun 2023 22:32:06 +0100 Subject: [PATCH 2/5] Remove wait on CV rate limit in autotag --- comictaggerlib/autotagstartwindow.py | 4 - comictaggerlib/ctsettings/file.py | 8 - .../ctsettings/settngs_namespace.py | 1 - comictaggerlib/ui/autotagstartwindow.ui | 185 +++++++++--------- 4 files changed, 89 insertions(+), 109 deletions(-) diff --git a/comictaggerlib/autotagstartwindow.py b/comictaggerlib/autotagstartwindow.py index 94380e8..1f8f138 100644 --- a/comictaggerlib/autotagstartwindow.py +++ b/comictaggerlib/autotagstartwindow.py @@ -48,7 +48,6 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.cbxAssumeIssueOne.setChecked(self.config.autotag_assume_1_if_no_issue_num) self.cbxIgnoreLeadingDigitsInFilename.setChecked(self.config.autotag_ignore_leading_numbers_in_filename) self.cbxRemoveAfterSuccess.setChecked(self.config.autotag_remove_archive_after_successful_match) - self.cbxWaitForRateLimit.setChecked(self.config.autotag_wait_and_retry_on_rate_limit) self.cbxAutoImprint.setChecked(self.config.identifier_auto_imprint) nlmt_tip = """The Name Match Ratio Threshold: Auto-Identify is for eliminating automatic @@ -73,7 +72,6 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.assume_issue_one = False self.ignore_leading_digits_in_filename = False self.remove_after_success = False - self.wait_and_retry_on_rate_limit = False self.search_string = "" self.name_length_match_tolerance = self.config.identifier_series_match_search_thresh self.split_words = self.cbxSplitWords.isChecked() @@ -91,7 +89,6 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.ignore_leading_digits_in_filename = self.cbxIgnoreLeadingDigitsInFilename.isChecked() self.remove_after_success = self.cbxRemoveAfterSuccess.isChecked() self.name_length_match_tolerance = self.sbNameMatchSearchThresh.value() - self.wait_and_retry_on_rate_limit = self.cbxWaitForRateLimit.isChecked() self.split_words = self.cbxSplitWords.isChecked() # persist some settings @@ -100,7 +97,6 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.config.autotag_assume_1_if_no_issue_num = self.assume_issue_one self.config.autotag_ignore_leading_numbers_in_filename = self.ignore_leading_digits_in_filename self.config.autotag_remove_archive_after_successful_match = self.remove_after_success - self.config.autotag_wait_and_retry_on_rate_limit = self.wait_and_retry_on_rate_limit if self.cbxSpecifySearchString.isChecked(): self.search_string = self.leSearchString.text() diff --git a/comictaggerlib/ctsettings/file.py b/comictaggerlib/ctsettings/file.py index fb3e0d4..8283a5a 100644 --- a/comictaggerlib/ctsettings/file.py +++ b/comictaggerlib/ctsettings/file.py @@ -207,14 +207,6 @@ def autotag(parser: settngs.Manager) -> None: help="When searching ignore leading numbers in the filename", ) parser.add_setting("remove_archive_after_successful_match", default=False, cmdline=False) - parser.add_setting( - "-w", - "--wait-on-rate-limit", - dest="wait_and_retry_on_rate_limit", - action=argparse.BooleanOptionalAction, - default=True, - help="When encountering a Comic Vine rate limit\nerror, wait and retry query.\n\n", - ) def validate_file_settings(config: settngs.Config[ct_ns]) -> settngs.Config[ct_ns]: diff --git a/comictaggerlib/ctsettings/settngs_namespace.py b/comictaggerlib/ctsettings/settngs_namespace.py index 453d56a..eaa2e0e 100644 --- a/comictaggerlib/ctsettings/settngs_namespace.py +++ b/comictaggerlib/ctsettings/settngs_namespace.py @@ -102,4 +102,3 @@ class settngs_namespace(settngs.TypedNS): autotag_assume_1_if_no_issue_num: bool autotag_ignore_leading_numbers_in_filename: bool autotag_remove_archive_after_successful_match: bool - autotag_wait_and_retry_on_rate_limit: bool diff --git a/comictaggerlib/ui/autotagstartwindow.ui b/comictaggerlib/ui/autotagstartwindow.ui index dcf341d..a0bb361 100644 --- a/comictaggerlib/ui/autotagstartwindow.ui +++ b/comictaggerlib/ui/autotagstartwindow.ui @@ -10,7 +10,7 @@ 0 0 519 - 440 + 448 @@ -26,84 +26,19 @@ false - - - - - 0 - 0 - + + + + Qt::Horizontal - - - - - true + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok - - - - Checks the publisher against a list of imprints. - - - Auto Imprint - - - - - - - - 0 - 0 - - - - Specify series search string for all selected archives: - - - - - - - - 0 - 0 - - - - Ignore leading (sequence) numbers in filename - - - - - - - - 0 - 0 - - - - Save on low confidence match - - - - - - - 0 - 0 - - - - - @@ -116,7 +51,7 @@ - + @@ -129,19 +64,6 @@ - - - - - 0 - 0 - - - - Don't use publication year in identification process - - - @@ -155,7 +77,7 @@ - + Removes existing metadata before applying retrieved metadata @@ -165,7 +87,66 @@ + + + + + 0 + 0 + + + + + + + + + 0 + 0 + + + + Don't use publication year in identification process + + + + + + Checks the publisher against a list of imprints. + + + Auto Imprint + + + + + + + + 0 + 0 + + + + Specify series search string for all selected archives: + + + + + + + + 0 + 0 + + + + Save on low confidence match + + + + @@ -179,13 +160,19 @@ - + + + + 0 + 0 + + - Wait and retry when Comic Vine rate limit is exceeded (experimental) + Ignore leading (sequence) numbers in filename - + @@ -225,13 +212,19 @@ - - - - Qt::Horizontal + + + + + 0 + 0 + - - QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + true From 861584df3a72f77564f48f13c94ca701b7058e09 Mon Sep 17 00:00:00 2001 From: Mizaki Date: Thu, 22 Jun 2023 23:50:32 +0100 Subject: [PATCH 3/5] Move rate limit check from defunc API status code 107 to HTTP code 429. Set a limit of 10 request every 10 seconds except for the default API key which is 1,2 (to be finisalised). Remove wait on rate limit option. --- comictalker/talkers/comicvine.py | 78 +++++++++++++------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index d53e290..2e5c65b 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -151,13 +151,6 @@ class CVResult(TypedDict, Generic[T]): version: str -CV_STATUS_RATELIMIT = 107 - -# https://comicvine.gamespot.com/forums/api-developers-2334/api-rate-limiting-1746419/ -# "Space out your requests so AT LEAST one second passes between each and you can make requests all day." -limiter = Limiter(RequestRate(1, 2)) - - class ComicVineTalker(ComicTalker): name: str = "Comic Vine" id: str = "comicvine" @@ -165,6 +158,10 @@ class ComicVineTalker(ComicTalker): logo_url: str = f"{website}/a/bundles/comicvinesite/images/logo.png" attribution: str = f"Metadata provided by {name}" + # https://comicvine.gamespot.com/forums/api-developers-2334/api-rate-limiting-1746419/ + # "Space out your requests so AT LEAST one second passes between each and you can make requests all day." + limiter = Limiter(RequestRate(10, 10)) + def __init__(self, version: str, cache_folder: pathlib.Path): super().__init__(version, cache_folder) # Default settings @@ -172,10 +169,6 @@ class ComicVineTalker(ComicTalker): self.default_api_key = self.api_key = "27431e6787042105bd3e47e169a624521f89f3a4" self.remove_html_tables: bool = False self.use_series_start_as_volume: bool = False - self.wait_on_ratelimit: bool = False - - # NOTE: This was hardcoded before which is why it isn't in settings - self.wait_on_ratelimit_time: int = 20 def register_settings(self, parser: settngs.Manager) -> None: parser.add_setting( @@ -185,13 +178,6 @@ class ComicVineTalker(ComicTalker): display_name="Use series start as volume", help="Use the series start year as the volume number", ) - parser.add_setting( - "--cv-wait-on-ratelimit", - default=False, - action=argparse.BooleanOptionalAction, - display_name="Wait on ratelimit", - help="Wait when the rate limit is hit", - ) parser.add_setting( "--cv-remove-html-tables", default=False, @@ -217,8 +203,12 @@ class ComicVineTalker(ComicTalker): settings = super().parse_settings(settings) self.use_series_start_as_volume = settings["cv_use_series_start_as_volume"] - self.wait_on_ratelimit = settings["cv_wait_on_ratelimit"] self.remove_html_tables = settings["cv_remove_html_tables"] + + # Set a different limit if using the default API key + if self.api_key == self.default_api_key: + ComicVineTalker.limiter = Limiter(RequestRate(1, 2)) + return settings def check_api_key(self, url: str, key: str) -> tuple[str, bool]: @@ -436,41 +426,24 @@ class ComicVineTalker(ComicTalker): @limiter.ratelimit("cv", delay=True) def _get_cv_content(self, url: str, params: dict[str, Any]) -> CVResult: """ - Get the content from the CV server. We should never hit a rate limit but will cover it anyway. + Get the content from the CV server. """ - counter = 0 - while True: - cv_response: CVResult = self._get_url_content(url, params) - if cv_response["status_code"] == CV_STATUS_RATELIMIT: - logger.info("Rate limit encountered. Waiting for 10 seconds\n") - time.sleep(10) - if counter < 3: - counter += 1 - continue - # Tried 3 times, inform user to check CV website. - logger.error("Rate limit error. Exceeded 3 retires.") - raise TalkerNetworkError( - self.name, - 3, - "Rate Limit Error: Check your current API usage limit at https://comicvine.gamespot.com/api/", - ) + cv_response: CVResult = self._get_url_content(url, params) - if cv_response["status_code"] != 1: - logger.debug( - f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." - ) - raise TalkerNetworkError(self.name, 0, f"{cv_response['status_code']}: {cv_response['error']}") + if cv_response["status_code"] != 1: + logger.debug( + f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." + ) + raise TalkerNetworkError(self.name, 0, f"{cv_response['status_code']}: {cv_response['error']}") - # it's all good - break return cv_response def _get_url_content(self, url: str, params: dict[str, Any]) -> Any: - # connect to server: # if there is a 500 error, try a few more times before giving up - # any other error, just bail - for tries in range(3): + limit_counter = 0 + tries = 0 + while tries < 4: try: resp = requests.get(url, params=params, headers={"user-agent": "comictagger/" + self.version}) if resp.status_code == 200: @@ -479,6 +452,19 @@ class ComicVineTalker(ComicTalker): logger.debug(f"Try #{tries + 1}: ") time.sleep(1) logger.debug(str(resp.status_code)) + tries += 1 + if resp.status_code == 429: + logger.info("Rate limit encountered. Waiting for 10 seconds\n") + time.sleep(10) + limit_counter += 1 + if limit_counter > 3: + # Tried 3 times, inform user to check CV website. + logger.error("Rate limit error. Exceeded 3 retires.") + raise TalkerNetworkError( + self.name, + 3, + "Rate Limit Error: Check your current API usage limit at https://comicvine.gamespot.com/api/", + ) else: break From fba5518d067d46dda8d456bc154573c05f5b07ce Mon Sep 17 00:00:00 2001 From: Mizaki Date: Fri, 23 Jun 2023 21:25:02 +0100 Subject: [PATCH 4/5] Create two module limiters and assign class limiter var depending. Add to welcome message limits of default CV API key. --- comictaggerlib/taggerwindow.py | 4 ++++ comictalker/talkers/comicvine.py | 36 ++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 3e3b701..7dcbf15 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -257,6 +257,10 @@ class TaggerWindow(QtWidgets.QMainWindow): Also, be aware that writing tags to comic archives will change their file hashes, which has implications with respect to other software packages. It's best to use ComicTagger on local copies of your comics.

+ COMIC VINE NOTE: Using the default API key will serverly limit search and tagging + times. A personal API key will allow for a 5 times increase in online search speed. See the + Wiki page + for more information.

Have fun! """, ) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 2e5c65b..83dfdc9 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -151,6 +151,12 @@ class CVResult(TypedDict, Generic[T]): version: str +# https://comicvine.gamespot.com/forums/api-developers-2334/api-rate-limiting-1746419/ +# "Space out your requests so AT LEAST one second passes between each and you can make requests all day." +custom_limiter = Limiter(RequestRate(10, 10)) +default_limiter = Limiter(RequestRate(1, 5)) + + class ComicVineTalker(ComicTalker): name: str = "Comic Vine" id: str = "comicvine" @@ -158,12 +164,9 @@ class ComicVineTalker(ComicTalker): logo_url: str = f"{website}/a/bundles/comicvinesite/images/logo.png" attribution: str = f"Metadata provided by {name}" - # https://comicvine.gamespot.com/forums/api-developers-2334/api-rate-limiting-1746419/ - # "Space out your requests so AT LEAST one second passes between each and you can make requests all day." - limiter = Limiter(RequestRate(10, 10)) - def __init__(self, version: str, cache_folder: pathlib.Path): super().__init__(version, cache_folder) + self.limiter = default_limiter # Default settings self.default_api_url = self.api_url = f"{self.website}/api/" self.default_api_key = self.api_key = "27431e6787042105bd3e47e169a624521f89f3a4" @@ -207,7 +210,9 @@ class ComicVineTalker(ComicTalker): # Set a different limit if using the default API key if self.api_key == self.default_api_key: - ComicVineTalker.limiter = Limiter(RequestRate(1, 2)) + self.limiter = default_limiter + else: + self.limiter = custom_limiter return settings @@ -423,21 +428,20 @@ class ComicVineTalker(ComicTalker): return formatted_filtered_issues_result - @limiter.ratelimit("cv", delay=True) def _get_cv_content(self, url: str, params: dict[str, Any]) -> CVResult: """ Get the content from the CV server. """ + with self.limiter.ratelimit("cv", delay=True): + cv_response: CVResult = self._get_url_content(url, params) - cv_response: CVResult = self._get_url_content(url, params) + if cv_response["status_code"] != 1: + logger.debug( + f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." + ) + raise TalkerNetworkError(self.name, 0, f"{cv_response['status_code']}: {cv_response['error']}") - if cv_response["status_code"] != 1: - logger.debug( - f"{self.name} query failed with error #{cv_response['status_code']}: [{cv_response['error']}]." - ) - raise TalkerNetworkError(self.name, 0, f"{cv_response['status_code']}: {cv_response['error']}") - - return cv_response + return cv_response def _get_url_content(self, url: str, params: dict[str, Any]) -> Any: # if there is a 500 error, try a few more times before giving up @@ -454,12 +458,12 @@ class ComicVineTalker(ComicTalker): logger.debug(str(resp.status_code)) tries += 1 if resp.status_code == 429: - logger.info("Rate limit encountered. Waiting for 10 seconds\n") + logger.info(f"{self.name} rate limit encountered. Waiting for 10 seconds\n") time.sleep(10) limit_counter += 1 if limit_counter > 3: # Tried 3 times, inform user to check CV website. - logger.error("Rate limit error. Exceeded 3 retires.") + logger.error(f"{self.name} rate limit error. Exceeded 3 retires.") raise TalkerNetworkError( self.name, 3, From 37cc66cbae6191a41adfbab0a0c23a573bd48370 Mon Sep 17 00:00:00 2001 From: Mizaki Date: Tue, 27 Jun 2023 17:48:38 +0100 Subject: [PATCH 5/5] Use requests.status_codes.codes.TOO_MANY_REQUESTS --- comictalker/talkers/comicvine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 83dfdc9..f5b3a13 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -457,7 +457,7 @@ class ComicVineTalker(ComicTalker): time.sleep(1) logger.debug(str(resp.status_code)) tries += 1 - if resp.status_code == 429: + if resp.status_code == requests.status_codes.codes.TOO_MANY_REQUESTS: logger.info(f"{self.name} rate limit encountered. Waiting for 10 seconds\n") time.sleep(10) limit_counter += 1