Move to python requests module
Add requests to requirements.txt Requests is much simpler and fixes all ssl errors. Comic Vine now requires a unique useragent string
This commit is contained in:
parent
af4b3af14e
commit
11bf5a9709
@ -15,8 +15,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import urllib.request, urllib.parse, urllib.error
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
import datetime
|
||||
@ -104,9 +103,6 @@ class ComicVineTalker(QObject):
|
||||
|
||||
self.log_func = None
|
||||
|
||||
# always use a tls context for urlopen
|
||||
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
||||
|
||||
def setLogFunc(self, log_func):
|
||||
self.log_func = log_func
|
||||
|
||||
@ -134,13 +130,10 @@ class ComicVineTalker(QObject):
|
||||
def testKey(self, key):
|
||||
|
||||
try:
|
||||
test_url = self.api_base_url + "/issue/1/?api_key=" + \
|
||||
key + "&format=json&field_list=name"
|
||||
resp = urllib.request.urlopen(test_url, context=self.ssl)
|
||||
content = resp.read()
|
||||
|
||||
cv_response = json.loads(content.decode('utf-8'))
|
||||
|
||||
test_url = self.api_base_url + "/issue/1/?api_key=" + key + "&format=json&field_list=name"
|
||||
|
||||
cv_response = requests.get(test_url, headers={'user-agent': 'comictagger/' + ctversion.version}).json()
|
||||
|
||||
# Bogus request, but if the key is wrong, you get error 100: "Invalid
|
||||
# API Key"
|
||||
return cv_response['status_code'] != 100
|
||||
@ -152,14 +145,13 @@ class ComicVineTalker(QObject):
|
||||
sleep for a bit and retry.
|
||||
"""
|
||||
|
||||
def getCVContent(self, url):
|
||||
def getCVContent(self, url, params):
|
||||
total_time_waited = 0
|
||||
limit_wait_time = 1
|
||||
counter = 0
|
||||
wait_times = [1, 2, 3, 4]
|
||||
while True:
|
||||
content = self.getUrlContent(url)
|
||||
cv_response = json.loads(content.decode('utf-8'))
|
||||
cv_response = self.getUrlContent(url, params)
|
||||
if self.wait_for_rate_limit and cv_response[
|
||||
'status_code'] == ComicVineTalkerException.RateLimit:
|
||||
self.writeLog(
|
||||
@ -184,25 +176,24 @@ class ComicVineTalker(QObject):
|
||||
break
|
||||
return cv_response
|
||||
|
||||
def getUrlContent(self, url):
|
||||
def getUrlContent(self, url, params):
|
||||
# connect to server:
|
||||
# if there is a 500 error, try a few more times before giving up
|
||||
# any other error, just bail
|
||||
#print("---", url)
|
||||
for tries in range(3):
|
||||
try:
|
||||
resp = urllib.request.urlopen(url, context=self.ssl)
|
||||
return resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.getcode() == 500:
|
||||
resp = requests.get(url, params=params, headers={'user-agent': 'comictagger/' + ctversion.version})
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
if resp.status_code == 500:
|
||||
self.writeLog("Try #{0}: ".format(tries + 1))
|
||||
time.sleep(1)
|
||||
self.writeLog(str(e) + "\n")
|
||||
|
||||
if e.getcode() != 500:
|
||||
self.writeLog(str(resp.status_code) + "\n")
|
||||
else:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.writeLog(str(e) + "\n")
|
||||
raise ComicVineTalkerException(
|
||||
ComicVineTalkerException.Network, "Network Error!")
|
||||
@ -226,17 +217,16 @@ class ComicVineTalker(QObject):
|
||||
|
||||
original_series_name = series_name
|
||||
|
||||
# Split and rejoin to remove extra internal spaces
|
||||
query_word_list = series_name.split()
|
||||
query_string = " ".join( query_word_list ).strip()
|
||||
#print ("Query string = ", query_string)
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json',
|
||||
'resources': 'volume',
|
||||
'query': series_name,
|
||||
'field_list': 'name,id,start_year,publisher,image,description,count_of_issues',
|
||||
'page': 1
|
||||
}
|
||||
|
||||
query_string = urllib.parse.quote_plus(query_string.encode("utf-8"))
|
||||
|
||||
search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + \
|
||||
query_string + \
|
||||
"&field_list=name,id,start_year,publisher,image,description,count_of_issues&limit=100"
|
||||
cv_response = self.getCVContent(search_url + "&page=1")
|
||||
cv_response = self.getCVContent(self.api_base_url + "/search", params)
|
||||
|
||||
search_results = list()
|
||||
|
||||
@ -249,15 +239,15 @@ class ComicVineTalker(QObject):
|
||||
# 8 Dec 2018 - Comic Vine changed query results again. Terms are now
|
||||
# ORed together, and we get thousands of results. Good news is the
|
||||
# results are sorted by relevance, so we can be smart about halting
|
||||
# the search.
|
||||
# the search.
|
||||
# 1. Don't fetch more than some sane amount of pages.
|
||||
max_results = 500
|
||||
max_results = 500
|
||||
# 2. Halt when not all of our search terms are present in a result
|
||||
# 3. Halt when the results contain more (plus threshold) words than
|
||||
# our search
|
||||
result_word_count_max = len(query_word_list) + 3
|
||||
result_word_count_max = len(series_name.split()) + 3
|
||||
|
||||
total_result_count = min(total_result_count, max_results)
|
||||
total_result_count = min(total_result_count, max_results)
|
||||
|
||||
if callback is None:
|
||||
self.writeLog(
|
||||
@ -278,15 +268,14 @@ class ComicVineTalker(QObject):
|
||||
|
||||
# See if the last result's name has all the of the search terms.
|
||||
# if not, break out of this, loop, we're done.
|
||||
#print("Searching for {} in '{}'".format(query_word_list, last_result))
|
||||
for term in query_word_list:
|
||||
for term in series_name.split():
|
||||
if term not in last_result.lower():
|
||||
#print("Term '{}' not in last result. Halting search result fetching".format(term))
|
||||
stop_searching = True
|
||||
break
|
||||
|
||||
# Also, stop searching when the word count of last results is too much longer
|
||||
# than our search terms list
|
||||
# than our search terms list
|
||||
if len(utils.removearticles(last_result).split()) > result_word_count_max:
|
||||
#print("Last result '{}' is too long. Halting search result fetching".format(last_result))
|
||||
stop_searching = True
|
||||
@ -301,7 +290,8 @@ class ComicVineTalker(QObject):
|
||||
total_result_count))
|
||||
page += 1
|
||||
|
||||
cv_response = self.getCVContent(search_url + "&page=" + str(page))
|
||||
params['page'] = page
|
||||
cv_response = self.getCVContent(self.api_base_url + "/search", params)
|
||||
|
||||
search_results.extend(cv_response['results'])
|
||||
current_result_count += cv_response['number_of_page_results']
|
||||
@ -313,7 +303,7 @@ class ComicVineTalker(QObject):
|
||||
# (iterate backwards for easy removal)
|
||||
for i in range(len(search_results) - 1, -1, -1):
|
||||
record = search_results[i]
|
||||
for term in query_word_list:
|
||||
for term in series_name.split():
|
||||
if term not in record['name'].lower():
|
||||
del search_results[i]
|
||||
break
|
||||
@ -339,11 +329,14 @@ class ComicVineTalker(QObject):
|
||||
if cached_volume_result is not None:
|
||||
return cached_volume_result
|
||||
|
||||
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + \
|
||||
str(series_id) + "/?api_key=" + self.api_key + \
|
||||
"&field_list=name,id,start_year,publisher,count_of_issues&format=json"
|
||||
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id)
|
||||
|
||||
cv_response = self.getCVContent(volume_url)
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json',
|
||||
'field_list': 'name,id,start_year,publisher,count_of_issues'
|
||||
}
|
||||
cv_response = self.getCVContent(volume_url, params)
|
||||
|
||||
volume_results = cv_response['results']
|
||||
|
||||
@ -361,11 +354,13 @@ class ComicVineTalker(QObject):
|
||||
if cached_volume_issues_result is not None:
|
||||
return cached_volume_issues_result
|
||||
|
||||
#---------------------------------
|
||||
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + \
|
||||
str(series_id) + \
|
||||
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
|
||||
cv_response = self.getCVContent(issues_url)
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'filter': 'volume:' + str(series_id),
|
||||
'format': 'json',
|
||||
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description'
|
||||
}
|
||||
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
|
||||
|
||||
#------------------------------------
|
||||
|
||||
@ -385,9 +380,8 @@ class ComicVineTalker(QObject):
|
||||
page += 1
|
||||
offset += cv_response['number_of_page_results']
|
||||
|
||||
# print issues_url+ "&offset="+str(offset)
|
||||
cv_response = self.getCVContent(
|
||||
issues_url + "&offset=" + str(offset))
|
||||
params['offset'] = offset
|
||||
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
|
||||
|
||||
volume_issues_result.extend(cv_response['results'])
|
||||
current_result_count += cv_response['number_of_page_results']
|
||||
@ -398,26 +392,24 @@ class ComicVineTalker(QObject):
|
||||
|
||||
return volume_issues_result
|
||||
|
||||
def fetchIssuesByVolumeIssueNumAndYear(
|
||||
self, volume_id_list, issue_number, year):
|
||||
volume_filter = "volume:"
|
||||
def fetchIssuesByVolumeIssueNumAndYear(self, volume_id_list, issue_number, year):
|
||||
volume_filter = ""
|
||||
for vid in volume_id_list:
|
||||
volume_filter += str(vid) + "|"
|
||||
filter = "volume:{},issue_number:{}".format(volume_filter, issue_number)
|
||||
|
||||
year_filter = ""
|
||||
if year is not None and str(year).isdigit():
|
||||
year_filter = ",cover_date:{0}-1-1|{1}-1-1".format(
|
||||
year, int(year) + 1)
|
||||
intYear = utils.xlate(year, True)
|
||||
if intYear is not None:
|
||||
filter += ",cover_date:{}-1-1|{}-1-1".format(intYear, intYear + 1)
|
||||
|
||||
issue_number = urllib.parse.quote_plus(str(issue_number).encode("utf-8"))
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json',
|
||||
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description',
|
||||
'filter': filter
|
||||
}
|
||||
|
||||
filter = "&filter=" + volume_filter + \
|
||||
year_filter + ",issue_number:" + issue_number
|
||||
|
||||
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + \
|
||||
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
|
||||
|
||||
cv_response = self.getCVContent(issues_url)
|
||||
cv_response = self.getCVContent(self.api_base_url + "/issues", params)
|
||||
|
||||
#------------------------------------
|
||||
|
||||
@ -437,9 +429,8 @@ class ComicVineTalker(QObject):
|
||||
page += 1
|
||||
offset += cv_response['number_of_page_results']
|
||||
|
||||
# print issues_url+ "&offset="+str(offset)
|
||||
cv_response = self.getCVContent(
|
||||
issues_url + "&offset=" + str(offset))
|
||||
params['offset'] = offset
|
||||
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
|
||||
|
||||
filtered_issues_result.extend(cv_response['results'])
|
||||
current_result_count += cv_response['number_of_page_results']
|
||||
@ -463,11 +454,12 @@ class ComicVineTalker(QObject):
|
||||
break
|
||||
|
||||
if (found):
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
|
||||
str(record['id']) + "/?api_key=" + \
|
||||
self.api_key + "&format=json"
|
||||
|
||||
cv_response = self.getCVContent(issue_url)
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id'])
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json'
|
||||
}
|
||||
cv_response = self.getCVContent(issue_url, params)
|
||||
issue_results = cv_response['results']
|
||||
|
||||
else:
|
||||
@ -479,9 +471,12 @@ class ComicVineTalker(QObject):
|
||||
|
||||
def fetchIssueDataByIssueID(self, issue_id, settings):
|
||||
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
|
||||
str(issue_id) + "/?api_key=" + self.api_key + "&format=json"
|
||||
cv_response = self.getCVContent(issue_url)
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json'
|
||||
}
|
||||
cv_response = self.getCVContent(issue_url, params)
|
||||
|
||||
issue_results = cv_response['results']
|
||||
|
||||
@ -670,9 +665,15 @@ class ComicVineTalker(QObject):
|
||||
if cached_details['image_url'] is not None:
|
||||
return cached_details
|
||||
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
|
||||
str(issue_id) + "/?api_key=" + self.api_key + \
|
||||
"&format=json&field_list=image,cover_date,site_detail_url"
|
||||
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
|
||||
|
||||
params = {
|
||||
'api_key': self.api_key,
|
||||
'format': 'json',
|
||||
'field_list': 'image,cover_date,site_detail_url'
|
||||
}
|
||||
|
||||
cv_response = self.getCVContent(issue_url, params)
|
||||
|
||||
details = dict()
|
||||
details['image_url'] = None
|
||||
@ -680,8 +681,6 @@ class ComicVineTalker(QObject):
|
||||
details['cover_date'] = None
|
||||
details['site_detail_url'] = None
|
||||
|
||||
cv_response = self.getCVContent(issue_url)
|
||||
|
||||
details['image_url'] = cv_response['results']['image']['super_url']
|
||||
details['thumb_image_url'] = cv_response[
|
||||
'results']['image']['thumb_url']
|
||||
@ -716,8 +715,7 @@ class ComicVineTalker(QObject):
|
||||
return url_list
|
||||
|
||||
# scrape the CV issue page URL to get the alternate cover URLs
|
||||
resp = urllib.request.urlopen(issue_page_url, context=self.ssl)
|
||||
content = resp.read()
|
||||
content = requests.get(issue_page_url, headers={'user-agent': 'comictagger/' + ctversion.version}).text
|
||||
alt_cover_url_list = self.parseOutAltCoverUrls(content)
|
||||
|
||||
# cache this alt cover URL list
|
||||
@ -727,9 +725,9 @@ class ComicVineTalker(QObject):
|
||||
|
||||
def parseOutAltCoverUrls(self, page_html):
|
||||
soup = BeautifulSoup(page_html, "html.parser")
|
||||
|
||||
|
||||
alt_cover_url_list = []
|
||||
|
||||
|
||||
# Using knowledge of the layout of the Comic Vine issue page here:
|
||||
# look for the divs that are in the classes 'imgboxart' and
|
||||
# 'issue-cover'
|
||||
@ -738,15 +736,15 @@ class ComicVineTalker(QObject):
|
||||
for d in div_list:
|
||||
if 'class' in d.attrs:
|
||||
c = d['class']
|
||||
if ('imgboxart' in c and
|
||||
if ('imgboxart' in c and
|
||||
'issue-cover' in c and
|
||||
d.img['src'].startswith("http")
|
||||
):
|
||||
|
||||
|
||||
covers_found += 1
|
||||
if covers_found != 1:
|
||||
alt_cover_url_list.append(d.img['src'])
|
||||
|
||||
|
||||
return alt_cover_url_list
|
||||
|
||||
def fetchCachedAlternateCoverURLs(self, issue_id):
|
||||
|
@ -19,9 +19,7 @@ import os
|
||||
import datetime
|
||||
import shutil
|
||||
import tempfile
|
||||
import urllib.request, urllib.parse, urllib.error
|
||||
import ssl
|
||||
#import urllib2
|
||||
import requests
|
||||
|
||||
try:
|
||||
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest
|
||||
@ -46,6 +44,7 @@ except ImportError:
|
||||
pass
|
||||
|
||||
from .settings import ComicTaggerSettings
|
||||
from . import ctversion
|
||||
|
||||
|
||||
class ImageFetcherException(Exception):
|
||||
@ -66,9 +65,6 @@ class ImageFetcher(QObject):
|
||||
if not os.path.exists(self.db_file):
|
||||
self.create_image_db()
|
||||
|
||||
# always use a tls context for urlopen
|
||||
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
||||
|
||||
def clearCache(self):
|
||||
os.unlink(self.db_file)
|
||||
if os.path.isdir(self.cache_folder):
|
||||
@ -90,7 +86,8 @@ class ImageFetcher(QObject):
|
||||
if blocking:
|
||||
if image_data is None:
|
||||
try:
|
||||
image_data = urllib.request.urlopen(url, context=self.ssl).read()
|
||||
print(url)
|
||||
image_data = requests.get(url, headers={'user-agent': 'comictagger/' + ctversion.version}).content
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise ImageFetcherException("Network Error!")
|
||||
|
@ -16,9 +16,6 @@
|
||||
|
||||
import sys
|
||||
import io
|
||||
#import math
|
||||
#import urllib2
|
||||
#import urllib
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
|
@ -16,9 +16,9 @@
|
||||
|
||||
import sys
|
||||
import platform
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
import requests
|
||||
import urllib.parse
|
||||
#import os
|
||||
#import urllib
|
||||
|
||||
try:
|
||||
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply
|
||||
@ -47,28 +47,30 @@ class VersionChecker(QObject):
|
||||
|
||||
base_url = "http://comictagger1.appspot.com/latest"
|
||||
args = ""
|
||||
|
||||
params = dict()
|
||||
if use_stats:
|
||||
params = {
|
||||
'uuid': uuid,
|
||||
'version': ctversion.version
|
||||
}
|
||||
if platform.system() == "Windows":
|
||||
plat = "win"
|
||||
params['platform'] = "win"
|
||||
elif platform.system() == "Linux":
|
||||
plat = "lin"
|
||||
params['platform'] = "lin"
|
||||
elif platform.system() == "Darwin":
|
||||
plat = "mac"
|
||||
params['platform'] = "mac"
|
||||
else:
|
||||
plat = "other"
|
||||
args = "?uuid={0}&platform={1}&version={2}".format(
|
||||
uuid, plat, ctversion.version)
|
||||
if not getattr(sys, 'frozen', None):
|
||||
args += "&src=T"
|
||||
params['platform'] = "other"
|
||||
|
||||
return base_url + args
|
||||
if not getattr(sys, 'frozen', None):
|
||||
params['src'] = 'T'
|
||||
|
||||
return (base_url, params)
|
||||
|
||||
def getLatestVersion(self, uuid, use_stats=True):
|
||||
|
||||
try:
|
||||
resp = urllib.request.urlopen(self.getRequestUrl(uuid, use_stats))
|
||||
new_version = resp.read()
|
||||
url, params = self.getRequestUrl(uuid, use_stats)
|
||||
new_version = requests.get(url, params=params).text
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
@ -79,12 +81,11 @@ class VersionChecker(QObject):
|
||||
versionRequestComplete = pyqtSignal(str)
|
||||
|
||||
def asyncGetLatestVersion(self, uuid, use_stats):
|
||||
|
||||
url = self.getRequestUrl(uuid, use_stats)
|
||||
url, params = self.getRequestUrl(uuid, use_stats)
|
||||
|
||||
self.nam = QNetworkAccessManager()
|
||||
self.nam.finished.connect(self.asyncGetLatestVersionComplete)
|
||||
self.nam.get(QNetworkRequest(QUrl(str(url))))
|
||||
self.nam.get(QNetworkRequest(QUrl(str(url + '?' + urllib.parse.urlencode(params)))))
|
||||
|
||||
def asyncGetLatestVersionComplete(self, reply):
|
||||
if (reply.error() != QNetworkReply.NoError):
|
||||
|
@ -1,4 +1,5 @@
|
||||
configparser
|
||||
requests
|
||||
beautifulsoup4 >= 4.1
|
||||
natsort==3.5.2
|
||||
PyPDF2==1.24
|
||||
|
Loading…
Reference in New Issue
Block a user