From 944c0b9b2ead19019165896e2312d7bd99f5ee1e Mon Sep 17 00:00:00 2001 From: lordwelch Date: Tue, 10 Sep 2019 14:52:59 -0700 Subject: [PATCH] Move to python requests module requests is much simpler and fixes all ssl errors. Comic Vine now requires a unique useragent string --- comictaggerlib/comicvinetalker.py | 184 +++++++++++++++--------------- comictaggerlib/imagefetcher.py | 11 +- comictaggerlib/issueidentifier.py | 3 - comictaggerlib/versionchecker.py | 37 +++--- 4 files changed, 114 insertions(+), 121 deletions(-) diff --git a/comictaggerlib/comicvinetalker.py b/comictaggerlib/comicvinetalker.py index cc0d2bc..5a0ca85 100644 --- a/comictaggerlib/comicvinetalker.py +++ b/comictaggerlib/comicvinetalker.py @@ -15,8 +15,7 @@ # limitations under the License. import json -import urllib.request, urllib.error, urllib.parse -import urllib.request, urllib.parse, urllib.error +import requests import re import time import datetime @@ -104,9 +103,6 @@ class ComicVineTalker(QObject): self.log_func = None - # always use a tls context for urlopen - self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLSv1) - def setLogFunc(self, log_func): self.log_func = log_func @@ -134,13 +130,10 @@ class ComicVineTalker(QObject): def testKey(self, key): try: - test_url = self.api_base_url + "/issue/1/?api_key=" + \ - key + "&format=json&field_list=name" - resp = urllib.request.urlopen(test_url, context=self.ssl) - content = resp.read() - - cv_response = json.loads(content.decode('utf-8')) - + test_url = self.api_base_url + "/issue/1/?api_key=" + key + "&format=json&field_list=name" + + cv_response = requests.get(test_url, headers={'user-agent': 'comictagger/' + ctversion.version}).json() + # Bogus request, but if the key is wrong, you get error 100: "Invalid # API Key" return cv_response['status_code'] != 100 @@ -152,14 +145,13 @@ class ComicVineTalker(QObject): sleep for a bit and retry. """ - def getCVContent(self, url): + def getCVContent(self, url, params): total_time_waited = 0 limit_wait_time = 1 counter = 0 wait_times = [1, 2, 3, 4] while True: - content = self.getUrlContent(url) - cv_response = json.loads(content.decode('utf-8')) + cv_response = self.getUrlContent(url, params) if self.wait_for_rate_limit and cv_response[ 'status_code'] == ComicVineTalkerException.RateLimit: self.writeLog( @@ -184,25 +176,24 @@ class ComicVineTalker(QObject): break return cv_response - def getUrlContent(self, url): + def getUrlContent(self, url, params): # connect to server: # if there is a 500 error, try a few more times before giving up # any other error, just bail #print("---", url) for tries in range(3): try: - resp = urllib.request.urlopen(url, context=self.ssl) - return resp.read() - except urllib.error.HTTPError as e: - if e.getcode() == 500: + resp = requests.get(url, params=params, headers={'user-agent': 'comictagger/' + ctversion.version}) + if resp.status_code == 200: + return resp.json() + if resp.status_code == 500: self.writeLog("Try #{0}: ".format(tries + 1)) time.sleep(1) - self.writeLog(str(e) + "\n") - - if e.getcode() != 500: + self.writeLog(str(resp.status_code) + "\n") + else: break - except Exception as e: + except requests.exceptions.RequestException as e: self.writeLog(str(e) + "\n") raise ComicVineTalkerException( ComicVineTalkerException.Network, "Network Error!") @@ -226,17 +217,16 @@ class ComicVineTalker(QObject): original_series_name = series_name - # Split and rejoin to remove extra internal spaces - query_word_list = series_name.split() - query_string = " ".join( query_word_list ).strip() - #print ("Query string = ", query_string) + params = { + 'api_key': self.api_key, + 'format': 'json', + 'resources': 'volume', + 'query': series_name, + 'field_list': 'name,id,start_year,publisher,image,description,count_of_issues', + 'page': 1 + } - query_string = urllib.parse.quote_plus(query_string.encode("utf-8")) - - search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + \ - query_string + \ - "&field_list=name,id,start_year,publisher,image,description,count_of_issues" - cv_response = self.getCVContent(search_url + "&page=1") + cv_response = self.getCVContent(self.api_base_url + "/search", params) search_results = list() @@ -249,15 +239,15 @@ class ComicVineTalker(QObject): # 8 Dec 2018 - Comic Vine changed query results again. Terms are now # ORed together, and we get thousands of results. Good news is the # results are sorted by relevance, so we can be smart about halting - # the search. + # the search. # 1. Don't fetch more than some sane amount of pages. - max_results = 500 + max_results = 500 # 2. Halt when not all of our search terms are present in a result # 3. Halt when the results contain more (plus threshold) words than # our search - result_word_count_max = len(query_word_list) + 3 + result_word_count_max = len(series_name.split()) + 3 - total_result_count = min(total_result_count, max_results) + total_result_count = min(total_result_count, max_results) if callback is None: self.writeLog( @@ -278,15 +268,14 @@ class ComicVineTalker(QObject): # See if the last result's name has all the of the search terms. # if not, break out of this, loop, we're done. - #print("Searching for {} in '{}'".format(query_word_list, last_result)) - for term in query_word_list: + for term in series_name.split(): if term not in last_result.lower(): #print("Term '{}' not in last result. Halting search result fetching".format(term)) stop_searching = True break # Also, stop searching when the word count of last results is too much longer - # than our search terms list + # than our search terms list if len(utils.removearticles(last_result).split()) > result_word_count_max: #print("Last result '{}' is too long. Halting search result fetching".format(last_result)) stop_searching = True @@ -301,7 +290,8 @@ class ComicVineTalker(QObject): total_result_count)) page += 1 - cv_response = self.getCVContent(search_url + "&page=" + str(page)) + params['page'] = page + cv_response = self.getCVContent(self.api_base_url + "/search", params) search_results.extend(cv_response['results']) current_result_count += cv_response['number_of_page_results'] @@ -313,7 +303,7 @@ class ComicVineTalker(QObject): # (iterate backwards for easy removal) for i in range(len(search_results) - 1, -1, -1): record = search_results[i] - for term in query_word_list: + for term in series_name.split(): if term not in record['name'].lower(): del search_results[i] break @@ -339,11 +329,14 @@ class ComicVineTalker(QObject): if cached_volume_result is not None: return cached_volume_result - volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + \ - str(series_id) + "/?api_key=" + self.api_key + \ - "&field_list=name,id,start_year,publisher,count_of_issues&format=json" + volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id) - cv_response = self.getCVContent(volume_url) + params = { + 'api_key': self.api_key, + 'format': 'json', + 'field_list': 'name,id,start_year,publisher,count_of_issues' + } + cv_response = self.getCVContent(volume_url, params) volume_results = cv_response['results'] @@ -361,11 +354,13 @@ class ComicVineTalker(QObject): if cached_volume_issues_result is not None: return cached_volume_issues_result - #--------------------------------- - issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + \ - str(series_id) + \ - "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json" - cv_response = self.getCVContent(issues_url) + params = { + 'api_key': self.api_key, + 'filter': 'volume:' + str(series_id), + 'format': 'json', + 'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description' + } + cv_response = self.getCVContent(self.api_base_url + "/issues/", params) #------------------------------------ @@ -385,9 +380,8 @@ class ComicVineTalker(QObject): page += 1 offset += cv_response['number_of_page_results'] - # print issues_url+ "&offset="+str(offset) - cv_response = self.getCVContent( - issues_url + "&offset=" + str(offset)) + params['offset'] = offset + cv_response = self.getCVContent(self.api_base_url + "/issues/", params) volume_issues_result.extend(cv_response['results']) current_result_count += cv_response['number_of_page_results'] @@ -398,26 +392,24 @@ class ComicVineTalker(QObject): return volume_issues_result - def fetchIssuesByVolumeIssueNumAndYear( - self, volume_id_list, issue_number, year): - volume_filter = "volume:" + def fetchIssuesByVolumeIssueNumAndYear(self, volume_id_list, issue_number, year): + volume_filter = "" for vid in volume_id_list: volume_filter += str(vid) + "|" + filter = "volume:{},issue_number:{}".format(volume_filter, issue_number) - year_filter = "" - if year is not None and str(year).isdigit(): - year_filter = ",cover_date:{0}-1-1|{1}-1-1".format( - year, int(year) + 1) + intYear = utils.xlate(year, True) + if intYear is not None: + filter += ",cover_date:{}-1-1|{}-1-1".format(intYear, intYear + 1) - issue_number = urllib.parse.quote_plus(str(issue_number).encode("utf-8")) + params = { + 'api_key': self.api_key, + 'format': 'json', + 'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description', + 'filter': filter + } - filter = "&filter=" + volume_filter + \ - year_filter + ",issue_number:" + issue_number - - issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + \ - "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json" - - cv_response = self.getCVContent(issues_url) + cv_response = self.getCVContent(self.api_base_url + "/issues", params) #------------------------------------ @@ -437,9 +429,8 @@ class ComicVineTalker(QObject): page += 1 offset += cv_response['number_of_page_results'] - # print issues_url+ "&offset="+str(offset) - cv_response = self.getCVContent( - issues_url + "&offset=" + str(offset)) + params['offset'] = offset + cv_response = self.getCVContent(self.api_base_url + "/issues/", params) filtered_issues_result.extend(cv_response['results']) current_result_count += cv_response['number_of_page_results'] @@ -463,11 +454,12 @@ class ComicVineTalker(QObject): break if (found): - issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \ - str(record['id']) + "/?api_key=" + \ - self.api_key + "&format=json" - - cv_response = self.getCVContent(issue_url) + issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id']) + params = { + 'api_key': self.api_key, + 'format': 'json' + } + cv_response = self.getCVContent(issue_url, params) issue_results = cv_response['results'] else: @@ -479,9 +471,12 @@ class ComicVineTalker(QObject): def fetchIssueDataByIssueID(self, issue_id, settings): - issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \ - str(issue_id) + "/?api_key=" + self.api_key + "&format=json" - cv_response = self.getCVContent(issue_url) + issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + params = { + 'api_key': self.api_key, + 'format': 'json' + } + cv_response = self.getCVContent(issue_url, params) issue_results = cv_response['results'] @@ -672,9 +667,15 @@ class ComicVineTalker(QObject): if cached_details['image_url'] is not None: return cached_details - issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \ - str(issue_id) + "/?api_key=" + self.api_key + \ - "&format=json&field_list=image,cover_date,site_detail_url" + issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + + params = { + 'api_key': self.api_key, + 'format': 'json', + 'field_list': 'image,cover_date,site_detail_url' + } + + cv_response = self.getCVContent(issue_url, params) details = dict() details['image_url'] = None @@ -682,8 +683,6 @@ class ComicVineTalker(QObject): details['cover_date'] = None details['site_detail_url'] = None - cv_response = self.getCVContent(issue_url) - details['image_url'] = cv_response['results']['image']['super_url'] details['thumb_image_url'] = cv_response[ 'results']['image']['thumb_url'] @@ -718,8 +717,7 @@ class ComicVineTalker(QObject): return url_list # scrape the CV issue page URL to get the alternate cover URLs - resp = urllib.request.urlopen(issue_page_url, context=self.ssl) - content = resp.read() + content = requests.get(issue_page_url, headers={'user-agent': 'comictagger/' + ctversion.version}).text alt_cover_url_list = self.parseOutAltCoverUrls(content) # cache this alt cover URL list @@ -729,9 +727,9 @@ class ComicVineTalker(QObject): def parseOutAltCoverUrls(self, page_html): soup = BeautifulSoup(page_html, "html.parser") - + alt_cover_url_list = [] - + # Using knowledge of the layout of the Comic Vine issue page here: # look for the divs that are in the classes 'imgboxart' and # 'issue-cover' @@ -740,15 +738,15 @@ class ComicVineTalker(QObject): for d in div_list: if 'class' in d.attrs: c = d['class'] - if ('imgboxart' in c and + if ('imgboxart' in c and 'issue-cover' in c and d.img['src'].startswith("http") ): - + covers_found += 1 if covers_found != 1: alt_cover_url_list.append(d.img['src']) - + return alt_cover_url_list def fetchCachedAlternateCoverURLs(self, issue_id): diff --git a/comictaggerlib/imagefetcher.py b/comictaggerlib/imagefetcher.py index 48baa5b..5dbd215 100644 --- a/comictaggerlib/imagefetcher.py +++ b/comictaggerlib/imagefetcher.py @@ -19,9 +19,7 @@ import os import datetime import shutil import tempfile -import urllib.request, urllib.parse, urllib.error -import ssl -#import urllib2 +import requests try: from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest @@ -46,6 +44,7 @@ except ImportError: pass from .settings import ComicTaggerSettings +from . import ctversion class ImageFetcherException(Exception): @@ -66,9 +65,6 @@ class ImageFetcher(QObject): if not os.path.exists(self.db_file): self.create_image_db() - # always use a tls context for urlopen - self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLSv1) - def clearCache(self): os.unlink(self.db_file) if os.path.isdir(self.cache_folder): @@ -90,7 +86,8 @@ class ImageFetcher(QObject): if blocking: if image_data is None: try: - image_data = urllib.request.urlopen(url, context=self.ssl).read() + print(url) + image_data = requests.get(url, headers={'user-agent': 'comictagger/' + ctversion.version}).content except Exception as e: print(e) raise ImageFetcherException("Network Error!") diff --git a/comictaggerlib/issueidentifier.py b/comictaggerlib/issueidentifier.py index 47b51af..292e84c 100644 --- a/comictaggerlib/issueidentifier.py +++ b/comictaggerlib/issueidentifier.py @@ -16,9 +16,6 @@ import sys import io -#import math -#import urllib2 -#import urllib try: from PIL import Image diff --git a/comictaggerlib/versionchecker.py b/comictaggerlib/versionchecker.py index 32b3684..e1fbad5 100644 --- a/comictaggerlib/versionchecker.py +++ b/comictaggerlib/versionchecker.py @@ -16,9 +16,9 @@ import sys import platform -import urllib.request, urllib.error, urllib.parse +import requests +import urllib.parse #import os -#import urllib try: from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply @@ -47,28 +47,30 @@ class VersionChecker(QObject): base_url = "http://comictagger1.appspot.com/latest" args = "" - + params = dict() if use_stats: + params = { + 'uuid': uuid, + 'version': ctversion.version + } if platform.system() == "Windows": - plat = "win" + params['platform'] = "win" elif platform.system() == "Linux": - plat = "lin" + params['platform'] = "lin" elif platform.system() == "Darwin": - plat = "mac" + params['platform'] = "mac" else: - plat = "other" - args = "?uuid={0}&platform={1}&version={2}".format( - uuid, plat, ctversion.version) - if not getattr(sys, 'frozen', None): - args += "&src=T" + params['platform'] = "other" - return base_url + args + if not getattr(sys, 'frozen', None): + params['src'] = 'T' + + return (base_url, params) def getLatestVersion(self, uuid, use_stats=True): - try: - resp = urllib.request.urlopen(self.getRequestUrl(uuid, use_stats)) - new_version = resp.read() + url, params = self.getRequestUrl(uuid, use_stats) + new_version = requests.get(url, params=params).text except Exception as e: return None @@ -79,12 +81,11 @@ class VersionChecker(QObject): versionRequestComplete = pyqtSignal(str) def asyncGetLatestVersion(self, uuid, use_stats): - - url = self.getRequestUrl(uuid, use_stats) + url, params = self.getRequestUrl(uuid, use_stats) self.nam = QNetworkAccessManager() self.nam.finished.connect(self.asyncGetLatestVersionComplete) - self.nam.get(QNetworkRequest(QUrl(str(url)))) + self.nam.get(QNetworkRequest(QUrl(str(url + '?' + urllib.parse.urlencode(params))))) def asyncGetLatestVersionComplete(self, reply): if (reply.error() != QNetworkReply.NoError):