Move to python requests module

Add requests to requirements.txt
Requests is much simpler and fixes all ssl errors.
Comic Vine now requires a unique useragent string
This commit is contained in:
lordwelch 2020-02-12 23:30:04 -08:00
parent af4b3af14e
commit 11bf5a9709
5 changed files with 115 additions and 121 deletions

View File

@ -15,8 +15,7 @@
# limitations under the License.
import json
import urllib.request, urllib.error, urllib.parse
import urllib.request, urllib.parse, urllib.error
import requests
import re
import time
import datetime
@ -104,9 +103,6 @@ class ComicVineTalker(QObject):
self.log_func = None
# always use a tls context for urlopen
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
def setLogFunc(self, log_func):
self.log_func = log_func
@ -134,13 +130,10 @@ class ComicVineTalker(QObject):
def testKey(self, key):
try:
test_url = self.api_base_url + "/issue/1/?api_key=" + \
key + "&format=json&field_list=name"
resp = urllib.request.urlopen(test_url, context=self.ssl)
content = resp.read()
cv_response = json.loads(content.decode('utf-8'))
test_url = self.api_base_url + "/issue/1/?api_key=" + key + "&format=json&field_list=name"
cv_response = requests.get(test_url, headers={'user-agent': 'comictagger/' + ctversion.version}).json()
# Bogus request, but if the key is wrong, you get error 100: "Invalid
# API Key"
return cv_response['status_code'] != 100
@ -152,14 +145,13 @@ class ComicVineTalker(QObject):
sleep for a bit and retry.
"""
def getCVContent(self, url):
def getCVContent(self, url, params):
total_time_waited = 0
limit_wait_time = 1
counter = 0
wait_times = [1, 2, 3, 4]
while True:
content = self.getUrlContent(url)
cv_response = json.loads(content.decode('utf-8'))
cv_response = self.getUrlContent(url, params)
if self.wait_for_rate_limit and cv_response[
'status_code'] == ComicVineTalkerException.RateLimit:
self.writeLog(
@ -184,25 +176,24 @@ class ComicVineTalker(QObject):
break
return cv_response
def getUrlContent(self, url):
def getUrlContent(self, url, params):
# connect to server:
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
#print("---", url)
for tries in range(3):
try:
resp = urllib.request.urlopen(url, context=self.ssl)
return resp.read()
except urllib.error.HTTPError as e:
if e.getcode() == 500:
resp = requests.get(url, params=params, headers={'user-agent': 'comictagger/' + ctversion.version})
if resp.status_code == 200:
return resp.json()
if resp.status_code == 500:
self.writeLog("Try #{0}: ".format(tries + 1))
time.sleep(1)
self.writeLog(str(e) + "\n")
if e.getcode() != 500:
self.writeLog(str(resp.status_code) + "\n")
else:
break
except Exception as e:
except requests.exceptions.RequestException as e:
self.writeLog(str(e) + "\n")
raise ComicVineTalkerException(
ComicVineTalkerException.Network, "Network Error!")
@ -226,17 +217,16 @@ class ComicVineTalker(QObject):
original_series_name = series_name
# Split and rejoin to remove extra internal spaces
query_word_list = series_name.split()
query_string = " ".join( query_word_list ).strip()
#print ("Query string = ", query_string)
params = {
'api_key': self.api_key,
'format': 'json',
'resources': 'volume',
'query': series_name,
'field_list': 'name,id,start_year,publisher,image,description,count_of_issues',
'page': 1
}
query_string = urllib.parse.quote_plus(query_string.encode("utf-8"))
search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + \
query_string + \
"&field_list=name,id,start_year,publisher,image,description,count_of_issues&limit=100"
cv_response = self.getCVContent(search_url + "&page=1")
cv_response = self.getCVContent(self.api_base_url + "/search", params)
search_results = list()
@ -249,15 +239,15 @@ class ComicVineTalker(QObject):
# 8 Dec 2018 - Comic Vine changed query results again. Terms are now
# ORed together, and we get thousands of results. Good news is the
# results are sorted by relevance, so we can be smart about halting
# the search.
# the search.
# 1. Don't fetch more than some sane amount of pages.
max_results = 500
max_results = 500
# 2. Halt when not all of our search terms are present in a result
# 3. Halt when the results contain more (plus threshold) words than
# our search
result_word_count_max = len(query_word_list) + 3
result_word_count_max = len(series_name.split()) + 3
total_result_count = min(total_result_count, max_results)
total_result_count = min(total_result_count, max_results)
if callback is None:
self.writeLog(
@ -278,15 +268,14 @@ class ComicVineTalker(QObject):
# See if the last result's name has all the of the search terms.
# if not, break out of this, loop, we're done.
#print("Searching for {} in '{}'".format(query_word_list, last_result))
for term in query_word_list:
for term in series_name.split():
if term not in last_result.lower():
#print("Term '{}' not in last result. Halting search result fetching".format(term))
stop_searching = True
break
# Also, stop searching when the word count of last results is too much longer
# than our search terms list
# than our search terms list
if len(utils.removearticles(last_result).split()) > result_word_count_max:
#print("Last result '{}' is too long. Halting search result fetching".format(last_result))
stop_searching = True
@ -301,7 +290,8 @@ class ComicVineTalker(QObject):
total_result_count))
page += 1
cv_response = self.getCVContent(search_url + "&page=" + str(page))
params['page'] = page
cv_response = self.getCVContent(self.api_base_url + "/search", params)
search_results.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -313,7 +303,7 @@ class ComicVineTalker(QObject):
# (iterate backwards for easy removal)
for i in range(len(search_results) - 1, -1, -1):
record = search_results[i]
for term in query_word_list:
for term in series_name.split():
if term not in record['name'].lower():
del search_results[i]
break
@ -339,11 +329,14 @@ class ComicVineTalker(QObject):
if cached_volume_result is not None:
return cached_volume_result
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + \
str(series_id) + "/?api_key=" + self.api_key + \
"&field_list=name,id,start_year,publisher,count_of_issues&format=json"
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id)
cv_response = self.getCVContent(volume_url)
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'name,id,start_year,publisher,count_of_issues'
}
cv_response = self.getCVContent(volume_url, params)
volume_results = cv_response['results']
@ -361,11 +354,13 @@ class ComicVineTalker(QObject):
if cached_volume_issues_result is not None:
return cached_volume_issues_result
#---------------------------------
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + \
str(series_id) + \
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
params = {
'api_key': self.api_key,
'filter': 'volume:' + str(series_id),
'format': 'json',
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description'
}
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
#------------------------------------
@ -385,9 +380,8 @@ class ComicVineTalker(QObject):
page += 1
offset += cv_response['number_of_page_results']
# print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(
issues_url + "&offset=" + str(offset))
params['offset'] = offset
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
volume_issues_result.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -398,26 +392,24 @@ class ComicVineTalker(QObject):
return volume_issues_result
def fetchIssuesByVolumeIssueNumAndYear(
self, volume_id_list, issue_number, year):
volume_filter = "volume:"
def fetchIssuesByVolumeIssueNumAndYear(self, volume_id_list, issue_number, year):
volume_filter = ""
for vid in volume_id_list:
volume_filter += str(vid) + "|"
filter = "volume:{},issue_number:{}".format(volume_filter, issue_number)
year_filter = ""
if year is not None and str(year).isdigit():
year_filter = ",cover_date:{0}-1-1|{1}-1-1".format(
year, int(year) + 1)
intYear = utils.xlate(year, True)
if intYear is not None:
filter += ",cover_date:{}-1-1|{}-1-1".format(intYear, intYear + 1)
issue_number = urllib.parse.quote_plus(str(issue_number).encode("utf-8"))
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description',
'filter': filter
}
filter = "&filter=" + volume_filter + \
year_filter + ",issue_number:" + issue_number
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + \
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
cv_response = self.getCVContent(self.api_base_url + "/issues", params)
#------------------------------------
@ -437,9 +429,8 @@ class ComicVineTalker(QObject):
page += 1
offset += cv_response['number_of_page_results']
# print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(
issues_url + "&offset=" + str(offset))
params['offset'] = offset
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
filtered_issues_result.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -463,11 +454,12 @@ class ComicVineTalker(QObject):
break
if (found):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(record['id']) + "/?api_key=" + \
self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id'])
params = {
'api_key': self.api_key,
'format': 'json'
}
cv_response = self.getCVContent(issue_url, params)
issue_results = cv_response['results']
else:
@ -479,9 +471,12 @@ class ComicVineTalker(QObject):
def fetchIssueDataByIssueID(self, issue_id, settings):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(issue_id) + "/?api_key=" + self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
params = {
'api_key': self.api_key,
'format': 'json'
}
cv_response = self.getCVContent(issue_url, params)
issue_results = cv_response['results']
@ -670,9 +665,15 @@ class ComicVineTalker(QObject):
if cached_details['image_url'] is not None:
return cached_details
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(issue_id) + "/?api_key=" + self.api_key + \
"&format=json&field_list=image,cover_date,site_detail_url"
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'image,cover_date,site_detail_url'
}
cv_response = self.getCVContent(issue_url, params)
details = dict()
details['image_url'] = None
@ -680,8 +681,6 @@ class ComicVineTalker(QObject):
details['cover_date'] = None
details['site_detail_url'] = None
cv_response = self.getCVContent(issue_url)
details['image_url'] = cv_response['results']['image']['super_url']
details['thumb_image_url'] = cv_response[
'results']['image']['thumb_url']
@ -716,8 +715,7 @@ class ComicVineTalker(QObject):
return url_list
# scrape the CV issue page URL to get the alternate cover URLs
resp = urllib.request.urlopen(issue_page_url, context=self.ssl)
content = resp.read()
content = requests.get(issue_page_url, headers={'user-agent': 'comictagger/' + ctversion.version}).text
alt_cover_url_list = self.parseOutAltCoverUrls(content)
# cache this alt cover URL list
@ -727,9 +725,9 @@ class ComicVineTalker(QObject):
def parseOutAltCoverUrls(self, page_html):
soup = BeautifulSoup(page_html, "html.parser")
alt_cover_url_list = []
# Using knowledge of the layout of the Comic Vine issue page here:
# look for the divs that are in the classes 'imgboxart' and
# 'issue-cover'
@ -738,15 +736,15 @@ class ComicVineTalker(QObject):
for d in div_list:
if 'class' in d.attrs:
c = d['class']
if ('imgboxart' in c and
if ('imgboxart' in c and
'issue-cover' in c and
d.img['src'].startswith("http")
):
covers_found += 1
if covers_found != 1:
alt_cover_url_list.append(d.img['src'])
return alt_cover_url_list
def fetchCachedAlternateCoverURLs(self, issue_id):

View File

@ -19,9 +19,7 @@ import os
import datetime
import shutil
import tempfile
import urllib.request, urllib.parse, urllib.error
import ssl
#import urllib2
import requests
try:
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest
@ -46,6 +44,7 @@ except ImportError:
pass
from .settings import ComicTaggerSettings
from . import ctversion
class ImageFetcherException(Exception):
@ -66,9 +65,6 @@ class ImageFetcher(QObject):
if not os.path.exists(self.db_file):
self.create_image_db()
# always use a tls context for urlopen
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
def clearCache(self):
os.unlink(self.db_file)
if os.path.isdir(self.cache_folder):
@ -90,7 +86,8 @@ class ImageFetcher(QObject):
if blocking:
if image_data is None:
try:
image_data = urllib.request.urlopen(url, context=self.ssl).read()
print(url)
image_data = requests.get(url, headers={'user-agent': 'comictagger/' + ctversion.version}).content
except Exception as e:
print(e)
raise ImageFetcherException("Network Error!")

View File

@ -16,9 +16,6 @@
import sys
import io
#import math
#import urllib2
#import urllib
try:
from PIL import Image

View File

@ -16,9 +16,9 @@
import sys
import platform
import urllib.request, urllib.error, urllib.parse
import requests
import urllib.parse
#import os
#import urllib
try:
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply
@ -47,28 +47,30 @@ class VersionChecker(QObject):
base_url = "http://comictagger1.appspot.com/latest"
args = ""
params = dict()
if use_stats:
params = {
'uuid': uuid,
'version': ctversion.version
}
if platform.system() == "Windows":
plat = "win"
params['platform'] = "win"
elif platform.system() == "Linux":
plat = "lin"
params['platform'] = "lin"
elif platform.system() == "Darwin":
plat = "mac"
params['platform'] = "mac"
else:
plat = "other"
args = "?uuid={0}&platform={1}&version={2}".format(
uuid, plat, ctversion.version)
if not getattr(sys, 'frozen', None):
args += "&src=T"
params['platform'] = "other"
return base_url + args
if not getattr(sys, 'frozen', None):
params['src'] = 'T'
return (base_url, params)
def getLatestVersion(self, uuid, use_stats=True):
try:
resp = urllib.request.urlopen(self.getRequestUrl(uuid, use_stats))
new_version = resp.read()
url, params = self.getRequestUrl(uuid, use_stats)
new_version = requests.get(url, params=params).text
except Exception as e:
return None
@ -79,12 +81,11 @@ class VersionChecker(QObject):
versionRequestComplete = pyqtSignal(str)
def asyncGetLatestVersion(self, uuid, use_stats):
url = self.getRequestUrl(uuid, use_stats)
url, params = self.getRequestUrl(uuid, use_stats)
self.nam = QNetworkAccessManager()
self.nam.finished.connect(self.asyncGetLatestVersionComplete)
self.nam.get(QNetworkRequest(QUrl(str(url))))
self.nam.get(QNetworkRequest(QUrl(str(url + '?' + urllib.parse.urlencode(params)))))
def asyncGetLatestVersionComplete(self, reply):
if (reply.error() != QNetworkReply.NoError):

View File

@ -1,4 +1,5 @@
configparser
requests
beautifulsoup4 >= 4.1
natsort==3.5.2
PyPDF2==1.24