Merge pull request #189 from lordwelch/seriesSearch

Series search
This commit is contained in:
Timmy Welch 2021-09-21 19:59:26 -07:00 committed by GitHub
commit 03a8d906ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 292 additions and 295 deletions

View File

@ -24,39 +24,33 @@ from . import utils
class ComicBookInfo:
def metadataFromString(self, string):
class Default(dict):
def __missing__(self, key):
return None
cbi_container = json.loads(str(string, 'utf-8'))
metadata = GenericMetadata()
cbi = cbi_container['ComicBookInfo/1.0']
cbi = Default(cbi_container['ComicBookInfo/1.0'])
# helper func
# If item is not in CBI, return None
def xlate(cbi_entry):
if cbi_entry in cbi:
return cbi[cbi_entry]
else:
return None
metadata.series = utils.xlate(cbi['series'])
metadata.title = utils.xlate(cbi['title'])
metadata.issue = utils.xlate(cbi['issue'])
metadata.publisher = utils.xlate(cbi['publisher'])
metadata.month = utils.xlate(cbi['publicationMonth'], True)
metadata.year = utils.xlate(cbi['publicationYear'], True)
metadata.issueCount = utils.xlate(cbi['numberOfIssues'], True)
metadata.comments = utils.xlate(cbi['comments'])
metadata.genre = utils.xlate(cbi['genre'])
metadata.volume = utils.xlate(cbi['volume'], True)
metadata.volumeCount = utils.xlate(cbi['numberOfVolumes'], True)
metadata.language = utils.xlate(cbi['language'])
metadata.country = utils.xlate(cbi['country'])
metadata.criticalRating = utils.xlate(cbi['rating'])
metadata.series = xlate('series')
metadata.title = xlate('title')
metadata.issue = xlate('issue')
metadata.publisher = xlate('publisher')
metadata.month = xlate('publicationMonth')
metadata.year = xlate('publicationYear')
metadata.issueCount = xlate('numberOfIssues')
metadata.comments = xlate('comments')
metadata.credits = xlate('credits')
metadata.genre = xlate('genre')
metadata.volume = xlate('volume')
metadata.volumeCount = xlate('numberOfVolumes')
metadata.language = xlate('language')
metadata.country = xlate('country')
metadata.criticalRating = xlate('rating')
metadata.tags = xlate('tags')
metadata.credits = cbi['credits']
metadata.tags = cbi['tags']
# make sure credits and tags are at least empty lists and not None
if metadata.credits is None:
@ -103,33 +97,23 @@ class ComicBookInfo:
# helper func
def assign(cbi_entry, md_entry):
if md_entry is not None:
if md_entry is not None or isinstance(md_entry, str) and md_entry != "":
cbi[cbi_entry] = md_entry
# helper func
def toInt(s):
i = None
if type(s) in [str, str, int]:
try:
i = int(s)
except ValueError:
pass
return i
assign('series', metadata.series)
assign('title', metadata.title)
assign('issue', metadata.issue)
assign('publisher', metadata.publisher)
assign('publicationMonth', toInt(metadata.month))
assign('publicationYear', toInt(metadata.year))
assign('numberOfIssues', toInt(metadata.issueCount))
assign('comments', metadata.comments)
assign('genre', metadata.genre)
assign('volume', toInt(metadata.volume))
assign('numberOfVolumes', toInt(metadata.volumeCount))
assign('language', utils.getLanguageFromISO(metadata.language))
assign('country', metadata.country)
assign('rating', metadata.criticalRating)
assign('series', utils.xlate(metadata.series))
assign('title', utils.xlate(metadata.title))
assign('issue', utils.xlate(metadata.issue))
assign('publisher', utils.xlate(metadata.publisher))
assign('publicationMonth', utils.xlate(metadata.month, True))
assign('publicationYear', utils.xlate(metadata.year, True))
assign('numberOfIssues', utils.xlate(metadata.issueCount, True))
assign('comments', utils.xlate(metadata.comments))
assign('genre', utils.xlate(metadata.genre))
assign('volume', utils.xlate(metadata.volume, True))
assign('numberOfVolumes', utils.xlate(metadata.volumeCount, True))
assign('language', utils.xlate(utils.getLanguageFromISO(metadata.language)))
assign('country', utils.xlate(metadata.country))
assign('rating', utils.xlate(metadata.criticalRating))
assign('credits', metadata.credits)
assign('tags', metadata.tags)

View File

@ -20,6 +20,7 @@ import xml.etree.ElementTree as ET
#import zipfile
from .genericmetadata import GenericMetadata
from .issuestring import IssueString
from . import utils
@ -206,48 +207,44 @@ class ComicInfoXml:
raise 1
return None
metadata = GenericMetadata()
md = metadata
# Helper function
def xlate(tag):
node = root.find(tag)
if node is not None:
return node.text
else:
def get(name):
tag = root.find(name)
if tag is None:
return None
return tag.text
md.series = xlate('Series')
md.title = xlate('Title')
md.issue = xlate('Number')
md.issueCount = xlate('Count')
md.volume = xlate('Volume')
md.alternateSeries = xlate('AlternateSeries')
md.alternateNumber = xlate('AlternateNumber')
md.alternateCount = xlate('AlternateCount')
md.comments = xlate('Summary')
md.notes = xlate('Notes')
md.year = xlate('Year')
md.month = xlate('Month')
md.day = xlate('Day')
md.publisher = xlate('Publisher')
md.imprint = xlate('Imprint')
md.genre = xlate('Genre')
md.webLink = xlate('Web')
md.language = xlate('LanguageISO')
md.format = xlate('Format')
md.manga = xlate('Manga')
md.characters = xlate('Characters')
md.teams = xlate('Teams')
md.locations = xlate('Locations')
md.pageCount = xlate('PageCount')
md.scanInfo = xlate('ScanInformation')
md.storyArc = xlate('StoryArc')
md.seriesGroup = xlate('SeriesGroup')
md.maturityRating = xlate('AgeRating')
md = GenericMetadata()
tmp = xlate('BlackAndWhite')
md.blackAndWhite = False
md.series = utils.xlate(get('Series'))
md.title = utils.xlate(get('Title'))
md.issue = IssueString(utils.xlate(get('Number'))).asString()
md.issueCount = utils.xlate(get('Count'), True)
md.volume = utils.xlate(get('Volume'), True)
md.alternateSeries = utils.xlate(get('AlternateSeries'))
md.alternateNumber = IssueString(utils.xlate(get('AlternateNumber'))).asString()
md.alternateCount = utils.xlate(get('AlternateCount'), True)
md.comments = utils.xlate(get('Summary'))
md.notes = utils.xlate(get('Notes'))
md.year = utils.xlate(get('Year'), True)
md.month = utils.xlate(get('Month'), True)
md.day = utils.xlate(get('Day'), True)
md.publisher = utils.xlate(get('Publisher'))
md.imprint = utils.xlate(get('Imprint'))
md.genre = utils.xlate(get('Genre'))
md.webLink = utils.xlate(get('Web'))
md.language = utils.xlate(get('LanguageISO'))
md.format = utils.xlate(get('Format'))
md.manga = utils.xlate(get('Manga'))
md.characters = utils.xlate(get('Characters'))
md.teams = utils.xlate(get('Teams'))
md.locations = utils.xlate(get('Locations'))
md.pageCount = utils.xlate(get('PageCount'), True)
md.scanInfo = utils.xlate(get('ScanInformation'))
md.storyArc = utils.xlate(get('StoryArc'))
md.seriesGroup = utils.xlate(get('SeriesGroup'))
md.maturityRating = utils.xlate(get('AgeRating'))
tmp = utils.xlate(get('BlackAndWhite'))
if tmp is not None and tmp.lower() in ["yes", "true", "1"]:
md.blackAndWhite = True
# Now extract the credit info
@ -261,23 +258,23 @@ class ComicInfoXml:
):
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), n.tag)
md.addCredit(name.strip(), n.tag)
if n.tag == 'CoverArtist':
if n.text is not None:
for name in n.text.split(','):
metadata.addCredit(name.strip(), "Cover")
md.addCredit(name.strip(), "Cover")
# parse page data now
pages_node = root.find("Pages")
if pages_node is not None:
for page in pages_node:
metadata.pages.append(page.attrib)
md.pages.append(page.attrib)
# print page.attrib
metadata.isEmpty = False
md.isEmpty = False
return metadata
return md
def writeToExternalFile(self, filename, metadata):

View File

@ -121,6 +121,23 @@ def which(program):
return None
def xlate(data, isInt=False):
class Default(dict):
def __missing__(self, key):
return None
if data is None or data == "":
return None
if isInt:
i = str(data).translate(Default(zip((ord(c) for c in "1234567890"),"1234567890")))
if i == "0":
return "0"
if i is "":
return None
return int(i)
else:
return str(data)
def removearticles(text):
text = text.lower()
articles = ['and', 'a', '&', 'issue', 'the']
@ -131,16 +148,6 @@ def removearticles(text):
newText = newText[:-1]
# now get rid of some other junk
newText = newText.replace(":", "")
newText = newText.replace(",", "")
newText = newText.replace("-", " ")
# since the CV API changed, searches for series names with periods
# now explicitly require the period to be in the search key,
# so the line below is removed (for now)
#newText = newText.replace(".", "")
return newText

View File

@ -15,13 +15,13 @@
# limitations under the License.
import json
import urllib.request, urllib.error, urllib.parse
import urllib.request, urllib.parse, urllib.error
import requests
import re
import time
import datetime
import sys
import ssl
import unicodedata
#from pprint import pprint
#import math
@ -104,9 +104,6 @@ class ComicVineTalker(QObject):
self.log_func = None
# always use a tls context for urlopen
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
def setLogFunc(self, log_func):
self.log_func = log_func
@ -124,23 +121,20 @@ class ComicVineTalker(QObject):
year = None
if date_str is not None:
parts = date_str.split('-')
year = parts[0]
year = utils.xlate(parts[0], True)
if len(parts) > 1:
month = parts[1]
month = utils.xlate(parts[1], True)
if len(parts) > 2:
day = parts[2]
day = utils.xlate(parts[2], True)
return day, month, year
def testKey(self, key):
try:
test_url = self.api_base_url + "/issue/1/?api_key=" + \
key + "&format=json&field_list=name"
resp = urllib.request.urlopen(test_url, context=self.ssl)
content = resp.read()
cv_response = json.loads(content.decode('utf-8'))
test_url = self.api_base_url + "/issue/1/?api_key=" + key + "&format=json&field_list=name"
cv_response = requests.get(test_url, headers={'user-agent': 'comictagger/' + ctversion.version}).json()
# Bogus request, but if the key is wrong, you get error 100: "Invalid
# API Key"
return cv_response['status_code'] != 100
@ -152,14 +146,13 @@ class ComicVineTalker(QObject):
sleep for a bit and retry.
"""
def getCVContent(self, url):
def getCVContent(self, url, params):
total_time_waited = 0
limit_wait_time = 1
counter = 0
wait_times = [1, 2, 3, 4]
while True:
content = self.getUrlContent(url)
cv_response = json.loads(content.decode('utf-8'))
cv_response = self.getUrlContent(url, params)
if self.wait_for_rate_limit and cv_response[
'status_code'] == ComicVineTalkerException.RateLimit:
self.writeLog(
@ -184,25 +177,24 @@ class ComicVineTalker(QObject):
break
return cv_response
def getUrlContent(self, url):
def getUrlContent(self, url, params):
# connect to server:
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
#print("---", url)
for tries in range(3):
try:
resp = urllib.request.urlopen(url, context=self.ssl)
return resp.read()
except urllib.error.HTTPError as e:
if e.getcode() == 500:
resp = requests.get(url, params=params, headers={'user-agent': 'comictagger/' + ctversion.version})
if resp.status_code == 200:
return resp.json()
if resp.status_code == 500:
self.writeLog("Try #{0}: ".format(tries + 1))
time.sleep(1)
self.writeLog(str(e) + "\n")
if e.getcode() != 500:
self.writeLog(str(resp.status_code) + "\n")
else:
break
except Exception as e:
except requests.exceptions.RequestException as e:
self.writeLog(str(e) + "\n")
raise ComicVineTalkerException(
ComicVineTalkerException.Network, "Network Error!")
@ -212,8 +204,13 @@ class ComicVineTalker(QObject):
def searchForSeries(self, series_name, callback=None, refresh_cache=False):
# remove cruft from the search string
series_name = utils.removearticles(series_name).lower().strip()
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
search_series_name = unicodedata.normalize('NFKD', series_name).encode('ascii', 'ignore').decode('ascii')
# comicvine ignores punctuation and accents
search_series_name = re.sub(r'[^A-Za-z0-9]+',' ', search_series_name)
# remove extra space and articles and all lower case
search_series_name = utils.removearticles(search_series_name).lower().strip()
# before we search online, look in our cache, since we might have
# done this same search recently
@ -224,19 +221,16 @@ class ComicVineTalker(QObject):
if len(cached_search_results) > 0:
return cached_search_results
original_series_name = series_name
params = {
'api_key': self.api_key,
'format': 'json',
'resources': 'volume',
'query': search_series_name,
'field_list': 'volume,name,id,start_year,publisher,image,description,count_of_issues',
'page': 1
}
# Split and rejoin to remove extra internal spaces
query_word_list = series_name.split()
query_string = " ".join( query_word_list ).strip()
#print ("Query string = ", query_string)
query_string = urllib.parse.quote_plus(query_string.encode("utf-8"))
search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + \
query_string + \
"&field_list=name,id,start_year,publisher,image,description,count_of_issues&limit=100"
cv_response = self.getCVContent(search_url + "&page=1")
cv_response = self.getCVContent(self.api_base_url + "/search", params)
search_results = list()
@ -249,15 +243,15 @@ class ComicVineTalker(QObject):
# 8 Dec 2018 - Comic Vine changed query results again. Terms are now
# ORed together, and we get thousands of results. Good news is the
# results are sorted by relevance, so we can be smart about halting
# the search.
# the search.
# 1. Don't fetch more than some sane amount of pages.
max_results = 500
max_results = 500
# 2. Halt when not all of our search terms are present in a result
# 3. Halt when the results contain more (plus threshold) words than
# our search
result_word_count_max = len(query_word_list) + 3
result_word_count_max = len(search_series_name.split()) + 3
total_result_count = min(total_result_count, max_results)
total_result_count = min(total_result_count, max_results)
if callback is None:
self.writeLog(
@ -276,18 +270,24 @@ class ComicVineTalker(QObject):
last_result = search_results[-1]['name']
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
last_result = unicodedata.normalize('NFKD', last_result).encode('ascii', 'ignore').decode('ascii')
# comicvine ignores punctuation and accents
last_result = re.sub(r'[^A-Za-z0-9]+',' ', last_result)
# remove extra space and articles and all lower case
last_result = utils.removearticles(last_result).lower().strip()
# See if the last result's name has all the of the search terms.
# if not, break out of this, loop, we're done.
#print("Searching for {} in '{}'".format(query_word_list, last_result))
for term in query_word_list:
for term in search_series_name.split():
if term not in last_result.lower():
#print("Term '{}' not in last result. Halting search result fetching".format(term))
stop_searching = True
break
# Also, stop searching when the word count of last results is too much longer
# than our search terms list
if len(utils.removearticles(last_result).split()) > result_word_count_max:
# than our search terms list
if len(last_result) > result_word_count_max:
#print("Last result '{}' is too long. Halting search result fetching".format(last_result))
stop_searching = True
@ -301,7 +301,8 @@ class ComicVineTalker(QObject):
total_result_count))
page += 1
cv_response = self.getCVContent(search_url + "&page=" + str(page))
params['page'] = page
cv_response = self.getCVContent(self.api_base_url + "/search", params)
search_results.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -313,8 +314,15 @@ class ComicVineTalker(QObject):
# (iterate backwards for easy removal)
for i in range(len(search_results) - 1, -1, -1):
record = search_results[i]
for term in query_word_list:
if term not in record['name'].lower():
for term in search_series_name.split():
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
recordName = unicodedata.normalize('NFKD', record['name']).encode('ascii', 'ignore').decode('ascii')
# comicvine ignores punctuation and accents
recordName = re.sub(r'[^A-Za-z0-9]+',' ', recordName)
# remove extra space and articles and all lower case
recordName = utils.removearticles(recordName).lower().strip()
if term not in recordName:
del search_results[i]
break
@ -325,7 +333,7 @@ class ComicVineTalker(QObject):
#print(u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year']))
# cache these search results
cvc.add_search_results(original_series_name, search_results)
cvc.add_search_results(series_name, search_results)
return search_results
@ -339,11 +347,14 @@ class ComicVineTalker(QObject):
if cached_volume_result is not None:
return cached_volume_result
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + \
str(series_id) + "/?api_key=" + self.api_key + \
"&field_list=name,id,start_year,publisher,count_of_issues&format=json"
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id)
cv_response = self.getCVContent(volume_url)
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'name,id,start_year,publisher,count_of_issues'
}
cv_response = self.getCVContent(volume_url, params)
volume_results = cv_response['results']
@ -361,11 +372,13 @@ class ComicVineTalker(QObject):
if cached_volume_issues_result is not None:
return cached_volume_issues_result
#---------------------------------
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + \
str(series_id) + \
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
params = {
'api_key': self.api_key,
'filter': 'volume:' + str(series_id),
'format': 'json',
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description'
}
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
#------------------------------------
@ -385,9 +398,8 @@ class ComicVineTalker(QObject):
page += 1
offset += cv_response['number_of_page_results']
# print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(
issues_url + "&offset=" + str(offset))
params['offset'] = offset
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
volume_issues_result.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -398,26 +410,24 @@ class ComicVineTalker(QObject):
return volume_issues_result
def fetchIssuesByVolumeIssueNumAndYear(
self, volume_id_list, issue_number, year):
volume_filter = "volume:"
def fetchIssuesByVolumeIssueNumAndYear(self, volume_id_list, issue_number, year):
volume_filter = ""
for vid in volume_id_list:
volume_filter += str(vid) + "|"
filter = "volume:{},issue_number:{}".format(volume_filter, issue_number)
year_filter = ""
if year is not None and str(year).isdigit():
year_filter = ",cover_date:{0}-1-1|{1}-1-1".format(
year, int(year) + 1)
intYear = utils.xlate(year, True)
if intYear is not None:
filter += ",cover_date:{}-1-1|{}-1-1".format(intYear, intYear + 1)
issue_number = urllib.parse.quote_plus(str(issue_number).encode("utf-8"))
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'id,volume,issue_number,name,image,cover_date,site_detail_url,description',
'filter': filter
}
filter = "&filter=" + volume_filter + \
year_filter + ",issue_number:" + issue_number
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + \
"&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
cv_response = self.getCVContent(issues_url)
cv_response = self.getCVContent(self.api_base_url + "/issues", params)
#------------------------------------
@ -437,9 +447,8 @@ class ComicVineTalker(QObject):
page += 1
offset += cv_response['number_of_page_results']
# print issues_url+ "&offset="+str(offset)
cv_response = self.getCVContent(
issues_url + "&offset=" + str(offset))
params['offset'] = offset
cv_response = self.getCVContent(self.api_base_url + "/issues/", params)
filtered_issues_result.extend(cv_response['results'])
current_result_count += cv_response['number_of_page_results']
@ -463,11 +472,12 @@ class ComicVineTalker(QObject):
break
if (found):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(record['id']) + "/?api_key=" + \
self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id'])
params = {
'api_key': self.api_key,
'format': 'json'
}
cv_response = self.getCVContent(issue_url, params)
issue_results = cv_response['results']
else:
@ -479,9 +489,12 @@ class ComicVineTalker(QObject):
def fetchIssueDataByIssueID(self, issue_id, settings):
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(issue_id) + "/?api_key=" + self.api_key + "&format=json"
cv_response = self.getCVContent(issue_url)
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
params = {
'api_key': self.api_key,
'format': 'json'
}
cv_response = self.getCVContent(issue_url, params)
issue_results = cv_response['results']
@ -497,15 +510,13 @@ class ComicVineTalker(QObject):
# Now, map the Comic Vine data to generic metadata
metadata = GenericMetadata()
metadata.series = issue_results['volume']['name']
metadata.series = utils.xlate(issue_results['volume']['name'])
metadata.issue = IssueString(issue_results['issue_number']).asString()
metadata.title = utils.xlate(issue_results['name'])
num_s = IssueString(issue_results['issue_number']).asString()
metadata.issue = num_s
metadata.title = issue_results['name']
metadata.publisher = volume_results['publisher']['name']
metadata.day, metadata.month, metadata.year = self.parseDateStr(
issue_results['cover_date'])
if volume_results['publisher'] is not None:
metadata.publisher = utils.xlate(volume_results['publisher']['name'])
metadata.day, metadata.month, metadata.year = self.parseDateStr(issue_results['cover_date'])
#metadata.issueCount = volume_results['count_of_issues']
metadata.comments = self.cleanup_html(
@ -672,9 +683,15 @@ class ComicVineTalker(QObject):
if cached_details['image_url'] is not None:
return cached_details
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + \
str(issue_id) + "/?api_key=" + self.api_key + \
"&format=json&field_list=image,cover_date,site_detail_url"
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id)
params = {
'api_key': self.api_key,
'format': 'json',
'field_list': 'image,cover_date,site_detail_url'
}
cv_response = self.getCVContent(issue_url, params)
details = dict()
details['image_url'] = None
@ -682,8 +699,6 @@ class ComicVineTalker(QObject):
details['cover_date'] = None
details['site_detail_url'] = None
cv_response = self.getCVContent(issue_url)
details['image_url'] = cv_response['results']['image']['super_url']
details['thumb_image_url'] = cv_response[
'results']['image']['thumb_url']
@ -718,8 +733,7 @@ class ComicVineTalker(QObject):
return url_list
# scrape the CV issue page URL to get the alternate cover URLs
resp = urllib.request.urlopen(issue_page_url, context=self.ssl)
content = resp.read()
content = requests.get(issue_page_url, headers={'user-agent': 'comictagger/' + ctversion.version}).text
alt_cover_url_list = self.parseOutAltCoverUrls(content)
# cache this alt cover URL list
@ -729,9 +743,9 @@ class ComicVineTalker(QObject):
def parseOutAltCoverUrls(self, page_html):
soup = BeautifulSoup(page_html, "html.parser")
alt_cover_url_list = []
# Using knowledge of the layout of the Comic Vine issue page here:
# look for the divs that are in the classes 'imgboxart' and
# 'issue-cover'
@ -740,15 +754,15 @@ class ComicVineTalker(QObject):
for d in div_list:
if 'class' in d.attrs:
c = d['class']
if ('imgboxart' in c and
if ('imgboxart' in c and
'issue-cover' in c and
d.img['src'].startswith("http")
):
covers_found += 1
if covers_found != 1:
alt_cover_url_list.append(d.img['src'])
return alt_cover_url_list
def fetchCachedAlternateCoverURLs(self, issue_id):

View File

@ -19,9 +19,7 @@ import os
import datetime
import shutil
import tempfile
import urllib.request, urllib.parse, urllib.error
import ssl
#import urllib2
import requests
try:
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest
@ -46,6 +44,7 @@ except ImportError:
pass
from .settings import ComicTaggerSettings
from . import ctversion
class ImageFetcherException(Exception):
@ -66,9 +65,6 @@ class ImageFetcher(QObject):
if not os.path.exists(self.db_file):
self.create_image_db()
# always use a tls context for urlopen
self.ssl = ssl.SSLContext(ssl.PROTOCOL_TLS)
def clearCache(self):
os.unlink(self.db_file)
if os.path.isdir(self.cache_folder):
@ -90,7 +86,8 @@ class ImageFetcher(QObject):
if blocking:
if image_data is None:
try:
image_data = urllib.request.urlopen(url, context=self.ssl).read()
print(url)
image_data = requests.get(url, headers={'user-agent': 'comictagger/' + ctversion.version}).content
except Exception as e:
print(e)
raise ImageFetcherException("Network Error!")

View File

@ -16,9 +16,6 @@
import sys
import io
#import math
#import urllib2
#import urllib
try:
from PIL import Image

View File

@ -51,6 +51,7 @@ from .cbltransformer import CBLTransformer
from .renamewindow import RenameWindow
from .exportwindow import ExportWindow, ExportConflictOpts
from .issueidentifier import IssueIdentifier
from .issuestring import IssueString
from .autotagstartwindow import AutoTagStartWindow
from .autotagprogresswindow import AutoTagProgressWindow
from .autotagmatchwindow import AutoTagMatchWindow
@ -761,14 +762,12 @@ class TaggerWindow(QtWidgets.QMainWindow):
for child in widget.children():
self.clearChildren(child)
# Copy all of the metadata object into to the form.
# Merging of metadata should be done via the overlay function
def metadataToForm(self):
# copy the the metadata object into to the form
# helper func
def assignText(field, value):
if value is not None:
field.setText(str(value))
md = self.metadata
assignText(self.leSeries, md.series)
@ -810,23 +809,33 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.cbMaturityRating.setEditText(md.maturityRating)
else:
self.cbMaturityRating.setCurrentIndex(i)
else:
self.cbMaturityRating.setCurrentIndex(0)
if md.language is not None:
i = self.cbLanguage.findData(md.language)
self.cbLanguage.setCurrentIndex(i)
else:
self.cbLanguage.setCurrentIndex(0)
if md.country is not None:
i = self.cbCountry.findText(md.country)
self.cbCountry.setCurrentIndex(i)
else:
self.cbCountry.setCurrentIndex(0)
if md.manga is not None:
i = self.cbManga.findData(md.manga)
self.cbManga.setCurrentIndex(i)
else:
self.cbManga.setCurrentIndex(0)
if md.blackAndWhite is not None and md.blackAndWhite:
if md.blackAndWhite:
self.cbBW.setChecked(True)
else:
self.cbBW.setChecked(False)
assignText(self.teTags, utils.listToString(md.tags))
self.teTags.setText(utils.listToString(md.tags))
# !!! Should we clear the credits table or just avoid duplicates?
while self.twCredits.rowCount() > 0:
@ -885,58 +894,47 @@ class TaggerWindow(QtWidgets.QMainWindow):
return False
def formToMetadata(self):
# helper func
def xlate(data, type_str):
s = "{0}".format(data).strip()
if s == "":
return None
elif type_str == "str":
return s
else:
return int(s)
# copy the data from the form into the metadata
md = self.metadata
md.series = xlate(self.leSeries.text(), "str")
md.issue = xlate(self.leIssueNum.text(), "str")
md.issueCount = xlate(self.leIssueCount.text(), "int")
md.volume = xlate(self.leVolumeNum.text(), "int")
md.volumeCount = xlate(self.leVolumeCount.text(), "int")
md.title = xlate(self.leTitle.text(), "str")
md.publisher = xlate(self.lePublisher.text(), "str")
md.month = xlate(self.lePubMonth.text(), "int")
md.year = xlate(self.lePubYear.text(), "int")
md.day = xlate(self.lePubDay.text(), "int")
md.genre = xlate(self.leGenre.text(), "str")
md.imprint = xlate(self.leImprint.text(), "str")
md.comments = xlate(self.teComments.toPlainText(), "str")
md.notes = xlate(self.teNotes.toPlainText(), "str")
md.criticalRating = xlate(self.leCriticalRating.text(), "int")
md.maturityRating = xlate(self.cbMaturityRating.currentText(), "str")
md = GenericMetadata()
md.isEmpty = False
md.alternateNumber = IssueString(self.leAltIssueNum.text()).asString()
md.issue = IssueString(self.leIssueNum.text()).asString()
md.issueCount = utils.xlate(self.leIssueCount.text(), True)
md.volume = utils.xlate(self.leVolumeNum.text(), True)
md.volumeCount = utils.xlate(self.leVolumeCount.text(), True)
md.month = utils.xlate(self.lePubMonth.text(), True)
md.year = utils.xlate(self.lePubYear.text(), True)
md.day = utils.xlate(self.lePubDay.text(), True)
md.criticalRating = utils.xlate(self.leCriticalRating.text(), True)
md.alternateCount = utils.xlate(self.leAltIssueCount.text(), True)
md.storyArc = xlate(self.leStoryArc.text(), "str")
md.scanInfo = xlate(self.leScanInfo.text(), "str")
md.seriesGroup = xlate(self.leSeriesGroup.text(), "str")
md.alternateSeries = xlate(self.leAltSeries.text(), "str")
md.alternateNumber = xlate(self.leAltIssueNum.text(), "int")
md.alternateCount = xlate(self.leAltIssueCount.text(), "int")
md.webLink = xlate(self.leWebLink.text(), "str")
md.characters = xlate(self.teCharacters.toPlainText(), "str")
md.teams = xlate(self.teTeams.toPlainText(), "str")
md.locations = xlate(self.teLocations.toPlainText(), "str")
md.series = self.leSeries.text()
md.title = self.leTitle.text()
md.publisher = self.lePublisher.text()
md.genre = self.leGenre.text()
md.imprint = self.leImprint.text()
md.comments = self.teComments.toPlainText()
md.notes = self.teNotes.toPlainText()
md.maturityRating = self.cbMaturityRating.currentText()
md.format = xlate(self.cbFormat.currentText(), "str")
md.country = xlate(self.cbCountry.currentText(), "str")
md.storyArc = self.leStoryArc.text()
md.scanInfo = self.leScanInfo.text()
md.seriesGroup = self.leSeriesGroup.text()
md.alternateSeries = self.leAltSeries.text()
md.webLink = self.leWebLink.text()
md.characters = self.teCharacters.toPlainText()
md.teams = self.teTeams.toPlainText()
md.locations = self.teLocations.toPlainText()
langiso = self.cbLanguage.itemData(self.cbLanguage.currentIndex())
md.language = xlate(langiso, "str")
md.format = self.cbFormat.currentText()
md.country = self.cbCountry.currentText()
manga_code = self.cbManga.itemData(self.cbManga.currentIndex())
md.manga = xlate(manga_code, "str")
md.language = utils.xlate(self.cbLanguage.itemData(self.cbLanguage.currentIndex()))
md.manga = utils.xlate(self.cbManga.itemData(self.cbManga.currentIndex()))
# Make a list from the coma delimited tags string
tmp = xlate(self.teTags.toPlainText(), "str")
tmp = self.teTags.toPlainText()
if tmp is not None:
def striplist(l):
return([x.strip() for x in l])
@ -960,6 +958,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
row += 1
md.pages = self.pageListEditor.getPageList()
self.metadata = md
def useFilename(self):
if self.comic_archive is not None:

View File

@ -16,9 +16,9 @@
import sys
import platform
import urllib.request, urllib.error, urllib.parse
import requests
import urllib.parse
#import os
#import urllib
try:
from PyQt5.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply
@ -47,28 +47,30 @@ class VersionChecker(QObject):
base_url = "http://comictagger1.appspot.com/latest"
args = ""
params = dict()
if use_stats:
params = {
'uuid': uuid,
'version': ctversion.version
}
if platform.system() == "Windows":
plat = "win"
params['platform'] = "win"
elif platform.system() == "Linux":
plat = "lin"
params['platform'] = "lin"
elif platform.system() == "Darwin":
plat = "mac"
params['platform'] = "mac"
else:
plat = "other"
args = "?uuid={0}&platform={1}&version={2}".format(
uuid, plat, ctversion.version)
if not getattr(sys, 'frozen', None):
args += "&src=T"
params['platform'] = "other"
return base_url + args
if not getattr(sys, 'frozen', None):
params['src'] = 'T'
return (base_url, params)
def getLatestVersion(self, uuid, use_stats=True):
try:
resp = urllib.request.urlopen(self.getRequestUrl(uuid, use_stats))
new_version = resp.read()
url, params = self.getRequestUrl(uuid, use_stats)
new_version = requests.get(url, params=params).text
except Exception as e:
return None
@ -79,12 +81,11 @@ class VersionChecker(QObject):
versionRequestComplete = pyqtSignal(str)
def asyncGetLatestVersion(self, uuid, use_stats):
url = self.getRequestUrl(uuid, use_stats)
url, params = self.getRequestUrl(uuid, use_stats)
self.nam = QNetworkAccessManager()
self.nam.finished.connect(self.asyncGetLatestVersionComplete)
self.nam.get(QNetworkRequest(QUrl(str(url))))
self.nam.get(QNetworkRequest(QUrl(str(url + '?' + urllib.parse.urlencode(params)))))
def asyncGetLatestVersionComplete(self, reply):
if (reply.error() != QNetworkReply.NoError):

View File

@ -1,4 +1,5 @@
configparser
requests
beautifulsoup4 >= 4.1
natsort==3.5.2
PyPDF2==1.24