2015-02-21 18:30:32 -08:00
|
|
|
"""A class to automatically identify a comic archive"""
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# Copyright 2012-2014 Anthony Beville
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2013-04-02 17:34:55 -07:00
|
|
|
|
|
|
|
import sys
|
2018-09-19 13:05:39 -07:00
|
|
|
import io
|
2015-02-13 15:08:07 -08:00
|
|
|
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
|
|
|
from PIL import Image
|
|
|
|
pil_available = True
|
2013-04-02 17:34:55 -07:00
|
|
|
except ImportError:
|
2015-02-12 14:57:46 -08:00
|
|
|
pil_available = False
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2018-09-19 13:05:39 -07:00
|
|
|
from .genericmetadata import GenericMetadata
|
|
|
|
from .comicvinetalker import ComicVineTalker, ComicVineTalkerException
|
|
|
|
from .imagehasher import ImageHasher
|
|
|
|
from .imagefetcher import ImageFetcher, ImageFetcherException
|
|
|
|
from .issuestring import IssueString
|
|
|
|
from . import utils
|
2015-02-21 18:30:32 -08:00
|
|
|
#from settings import ComicTaggerSettings
|
|
|
|
#from comicvinecacher import ComicVineCacher
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
|
2013-04-02 17:34:55 -07:00
|
|
|
class IssueIdentifierNetworkError(Exception):
|
2015-02-12 14:57:46 -08:00
|
|
|
pass
|
2015-02-13 15:08:07 -08:00
|
|
|
|
|
|
|
|
2013-04-02 17:34:55 -07:00
|
|
|
class IssueIdentifierCancelled(Exception):
|
2015-02-12 14:57:46 -08:00
|
|
|
pass
|
2013-04-02 17:34:55 -07:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
|
2013-04-02 17:34:55 -07:00
|
|
|
class IssueIdentifier:
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
ResultNoMatches = 0
|
|
|
|
ResultFoundMatchButBadCoverScore = 1
|
|
|
|
ResultFoundMatchButNotFirstPage = 2
|
2015-02-12 14:57:46 -08:00
|
|
|
ResultMultipleMatchesWithBadImageScores = 3
|
2015-02-15 02:44:00 -08:00
|
|
|
ResultOneGoodMatch = 4
|
|
|
|
ResultMultipleGoodMatches = 5
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def __init__(self, comic_archive, settings):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.comic_archive = comic_archive
|
|
|
|
self.image_hasher = 1
|
|
|
|
|
|
|
|
self.onlyUseAdditionalMetaData = False
|
|
|
|
|
|
|
|
# a decent hamming score, good enough to call it a match
|
|
|
|
self.min_score_thresh = 16
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# for alternate covers, be more stringent, since we're a bit more
|
|
|
|
# scattershot in comparisons
|
2015-02-12 14:57:46 -08:00
|
|
|
self.min_alternate_score_thresh = 12
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# the min distance a hamming score must be to separate itself from
|
|
|
|
# closest neighbor
|
2015-02-12 14:57:46 -08:00
|
|
|
self.min_score_distance = 4
|
|
|
|
|
|
|
|
# a very strong hamming score, almost certainly the same image
|
|
|
|
self.strong_score_thresh = 8
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# used to eliminate series names that are too long based on our search
|
|
|
|
# string
|
2015-02-12 14:57:46 -08:00
|
|
|
self.length_delta_thresh = settings.id_length_delta_thresh
|
|
|
|
|
|
|
|
# used to eliminate unlikely publishers
|
2022-01-22 15:00:22 -08:00
|
|
|
self.publisher_filter = [
|
|
|
|
s.strip().lower() for s in settings.id_publisher_filter.split(',')]
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
self.additional_metadata = GenericMetadata()
|
|
|
|
self.output_function = IssueIdentifier.defaultWriteOutput
|
|
|
|
self.callback = None
|
|
|
|
self.coverUrlCallback = None
|
|
|
|
self.search_result = self.ResultNoMatches
|
|
|
|
self.cover_page_index = 0
|
|
|
|
self.cancel = False
|
|
|
|
self.waitAndRetryOnRateLimit = False
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setScoreMinThreshold(self, thresh):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.min_score_thresh = thresh
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setScoreMinDistance(self, distance):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.min_score_distance = distance
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setAdditionalMetadata(self, md):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.additional_metadata = md
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setNameLengthDeltaThreshold(self, delta):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.length_delta_thresh = delta
|
|
|
|
|
2022-01-22 15:00:22 -08:00
|
|
|
def setPublisherFilter(self, filter):
|
|
|
|
self.publisher_filter = filter
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setHasherAlgorithm(self, algo):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.image_hasher = algo
|
|
|
|
pass
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setOutputFunction(self, func):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.output_function = func
|
|
|
|
pass
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def calculateHash(self, image_data):
|
2015-02-12 14:57:46 -08:00
|
|
|
if self.image_hasher == '3':
|
2015-02-13 15:08:07 -08:00
|
|
|
return ImageHasher(data=image_data).dct_average_hash()
|
2015-02-12 14:57:46 -08:00
|
|
|
elif self.image_hasher == '2':
|
2015-02-13 15:08:07 -08:00
|
|
|
return ImageHasher(data=image_data).average_hash2()
|
2015-02-12 14:57:46 -08:00
|
|
|
else:
|
2015-02-13 15:08:07 -08:00
|
|
|
return ImageHasher(data=image_data).average_hash()
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def getAspectRatio(self, image_data):
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
2018-09-19 13:05:39 -07:00
|
|
|
im = Image.open(io.StringIO(image_data))
|
2015-02-15 02:44:00 -08:00
|
|
|
w, h = im.size
|
|
|
|
return float(h) / float(w)
|
2015-02-12 14:57:46 -08:00
|
|
|
except:
|
|
|
|
return 1.5
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def cropCover(self, image_data):
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2018-09-19 13:05:39 -07:00
|
|
|
im = Image.open(io.StringIO(image_data))
|
2015-02-15 02:44:00 -08:00
|
|
|
w, h = im.size
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
try:
|
2015-02-15 02:44:00 -08:00
|
|
|
cropped_im = im.crop((int(w / 2), 0, w, h))
|
2015-02-12 14:57:46 -08:00
|
|
|
except Exception as e:
|
2018-09-19 13:05:39 -07:00
|
|
|
print("cropCover() error:", e)
|
2015-02-12 14:57:46 -08:00
|
|
|
return None
|
|
|
|
|
2018-09-19 13:05:39 -07:00
|
|
|
output = io.StringIO()
|
2015-02-12 14:57:46 -08:00
|
|
|
cropped_im.save(output, format="PNG")
|
|
|
|
cropped_image_data = output.getvalue()
|
|
|
|
output.close()
|
|
|
|
|
|
|
|
return cropped_image_data
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setProgressCallback(self, cb_func):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.callback = cb_func
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def setCoverURLCallback(self, cb_func):
|
2015-02-12 14:57:46 -08:00
|
|
|
self.coverUrlCallback = cb_func
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def getSearchKeys(self):
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
ca = self.comic_archive
|
|
|
|
search_keys = dict()
|
|
|
|
search_keys['series'] = None
|
|
|
|
search_keys['issue_number'] = None
|
|
|
|
search_keys['month'] = None
|
|
|
|
search_keys['year'] = None
|
|
|
|
search_keys['issue_count'] = None
|
|
|
|
|
|
|
|
if ca is None:
|
|
|
|
return
|
|
|
|
|
|
|
|
if self.onlyUseAdditionalMetaData:
|
|
|
|
search_keys['series'] = self.additional_metadata.series
|
|
|
|
search_keys['issue_number'] = self.additional_metadata.issue
|
|
|
|
search_keys['year'] = self.additional_metadata.year
|
|
|
|
search_keys['month'] = self.additional_metadata.month
|
|
|
|
search_keys['issue_count'] = self.additional_metadata.issueCount
|
|
|
|
return search_keys
|
|
|
|
|
|
|
|
# see if the archive has any useful meta data for searching with
|
|
|
|
if ca.hasCIX():
|
|
|
|
internal_metadata = ca.readCIX()
|
|
|
|
elif ca.hasCBI():
|
|
|
|
internal_metadata = ca.readCBI()
|
|
|
|
else:
|
|
|
|
internal_metadata = ca.readCBI()
|
|
|
|
|
|
|
|
# try to get some metadata from filename
|
|
|
|
md_from_filename = ca.metadataFromFilename()
|
|
|
|
|
|
|
|
# preference order:
|
2015-02-15 02:44:00 -08:00
|
|
|
# 1. Additional metadata
|
|
|
|
# 1. Internal metadata
|
|
|
|
# 1. Filename metadata
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if self.additional_metadata.series is not None:
|
|
|
|
search_keys['series'] = self.additional_metadata.series
|
|
|
|
elif internal_metadata.series is not None:
|
|
|
|
search_keys['series'] = internal_metadata.series
|
|
|
|
else:
|
|
|
|
search_keys['series'] = md_from_filename.series
|
|
|
|
|
|
|
|
if self.additional_metadata.issue is not None:
|
|
|
|
search_keys['issue_number'] = self.additional_metadata.issue
|
|
|
|
elif internal_metadata.issue is not None:
|
|
|
|
search_keys['issue_number'] = internal_metadata.issue
|
|
|
|
else:
|
|
|
|
search_keys['issue_number'] = md_from_filename.issue
|
|
|
|
|
|
|
|
if self.additional_metadata.year is not None:
|
|
|
|
search_keys['year'] = self.additional_metadata.year
|
|
|
|
elif internal_metadata.year is not None:
|
|
|
|
search_keys['year'] = internal_metadata.year
|
|
|
|
else:
|
|
|
|
search_keys['year'] = md_from_filename.year
|
|
|
|
|
|
|
|
if self.additional_metadata.month is not None:
|
|
|
|
search_keys['month'] = self.additional_metadata.month
|
|
|
|
elif internal_metadata.month is not None:
|
|
|
|
search_keys['month'] = internal_metadata.month
|
|
|
|
else:
|
|
|
|
search_keys['month'] = md_from_filename.month
|
|
|
|
|
|
|
|
if self.additional_metadata.issueCount is not None:
|
|
|
|
search_keys['issue_count'] = self.additional_metadata.issueCount
|
|
|
|
elif internal_metadata.issueCount is not None:
|
|
|
|
search_keys['issue_count'] = internal_metadata.issueCount
|
|
|
|
else:
|
|
|
|
search_keys['issue_count'] = md_from_filename.issueCount
|
|
|
|
|
|
|
|
return search_keys
|
|
|
|
|
|
|
|
@staticmethod
|
2015-02-13 15:08:07 -08:00
|
|
|
def defaultWriteOutput(text):
|
|
|
|
sys.stdout.write(text)
|
2015-02-12 14:57:46 -08:00
|
|
|
sys.stdout.flush()
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
def log_msg(self, msg, newline=True):
|
2015-02-12 14:57:46 -08:00
|
|
|
if newline:
|
2022-02-21 20:05:07 -08:00
|
|
|
msg += "\n"
|
|
|
|
self.output_function(msg)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
def getIssueCoverMatchScore(
|
|
|
|
self,
|
|
|
|
comicVine,
|
|
|
|
issue_id,
|
|
|
|
primary_img_url,
|
|
|
|
primary_thumb_url,
|
|
|
|
page_url,
|
|
|
|
localCoverHashList,
|
|
|
|
useRemoteAlternates=False,
|
|
|
|
useLog=True):
|
2015-02-12 14:57:46 -08:00
|
|
|
# localHashes is a list of pre-calculated hashs.
|
|
|
|
# useRemoteAlternates - indicates to use alternate covers from CV
|
|
|
|
|
|
|
|
try:
|
2015-02-15 02:44:00 -08:00
|
|
|
url_image_data = ImageFetcher().fetch(
|
|
|
|
primary_thumb_url, blocking=True)
|
2015-02-12 14:57:46 -08:00
|
|
|
except ImageFetcherException:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Network issue while fetching cover image from Comic Vine. Aborting...")
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierNetworkError
|
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
if self.cancel:
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierCancelled
|
|
|
|
|
|
|
|
# alert the GUI, if needed
|
|
|
|
if self.coverUrlCallback is not None:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.coverUrlCallback(url_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
remote_cover_list = []
|
|
|
|
item = dict()
|
|
|
|
item['url'] = primary_img_url
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
item['hash'] = self.calculateHash(url_image_data)
|
|
|
|
remote_cover_list.append(item)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
if self.cancel:
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierCancelled
|
|
|
|
|
|
|
|
if useRemoteAlternates:
|
2015-02-15 02:44:00 -08:00
|
|
|
alt_img_url_list = comicVine.fetchAlternateCoverURLs(
|
|
|
|
issue_id, page_url)
|
2015-02-12 14:57:46 -08:00
|
|
|
for alt_url in alt_img_url_list:
|
|
|
|
try:
|
2015-02-15 02:44:00 -08:00
|
|
|
alt_url_image_data = ImageFetcher().fetch(
|
|
|
|
alt_url, blocking=True)
|
2015-02-12 14:57:46 -08:00
|
|
|
except ImageFetcherException:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Network issue while fetching alt. cover image from Comic Vine. Aborting...")
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierNetworkError
|
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
if self.cancel:
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierCancelled
|
|
|
|
|
|
|
|
# alert the GUI, if needed
|
|
|
|
if self.coverUrlCallback is not None:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.coverUrlCallback(alt_url_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
item = dict()
|
|
|
|
item['url'] = alt_url
|
2015-02-13 15:08:07 -08:00
|
|
|
item['hash'] = self.calculateHash(alt_url_image_data)
|
|
|
|
remote_cover_list.append(item)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
if self.cancel:
|
2015-02-12 14:57:46 -08:00
|
|
|
raise IssueIdentifierCancelled
|
|
|
|
|
|
|
|
if useLog and useRemoteAlternates:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
|
|
|
"[{0} alt. covers]".format(len(remote_cover_list) - 1), False)
|
2015-02-12 14:57:46 -08:00
|
|
|
if useLog:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("[ ", False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
score_list = []
|
|
|
|
done = False
|
|
|
|
for local_cover_hash in localCoverHashList:
|
|
|
|
for remote_cover_item in remote_cover_list:
|
2015-02-15 02:44:00 -08:00
|
|
|
score = ImageHasher.hamming_distance(
|
|
|
|
local_cover_hash, remote_cover_item['hash'])
|
2015-02-12 14:57:46 -08:00
|
|
|
score_item = dict()
|
|
|
|
score_item['score'] = score
|
|
|
|
score_item['url'] = remote_cover_item['url']
|
|
|
|
score_item['hash'] = remote_cover_item['hash']
|
2015-02-13 15:08:07 -08:00
|
|
|
score_list.append(score_item)
|
2015-02-12 14:57:46 -08:00
|
|
|
if useLog:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("{0}".format(score), False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if score <= self.strong_score_thresh:
|
2015-02-15 02:44:00 -08:00
|
|
|
# such a good score, we can quit now, since for sure we
|
|
|
|
# have a winner
|
2015-02-12 14:57:46 -08:00
|
|
|
done = True
|
|
|
|
break
|
|
|
|
if done:
|
|
|
|
break
|
|
|
|
|
|
|
|
if useLog:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg(" ]", False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
best_score_item = min(score_list, key=lambda x: x['score'])
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
return best_score_item
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# def validate(self, issue_id):
|
2015-02-12 14:57:46 -08:00
|
|
|
# create hash list
|
2015-02-21 18:30:32 -08:00
|
|
|
# score = self.getIssueMatchScore(issue_id, hash_list, useRemoteAlternates = True)
|
|
|
|
# if score < 20:
|
|
|
|
# return True
|
|
|
|
# else:
|
|
|
|
# return False
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
def search(self):
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
ca = self.comic_archive
|
|
|
|
self.match_list = []
|
|
|
|
self.cancel = False
|
|
|
|
self.search_result = self.ResultNoMatches
|
|
|
|
|
|
|
|
if not pil_available:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
|
|
|
"Python Imaging Library (PIL) is not available and is needed for issue identification.")
|
2015-02-12 14:57:46 -08:00
|
|
|
return self.match_list
|
|
|
|
|
|
|
|
if not ca.seemsToBeAComicArchive():
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Sorry, but " + opts.filename + " is not a comic archive!")
|
2015-02-12 14:57:46 -08:00
|
|
|
return self.match_list
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
cover_image_data = ca.getPage(self.cover_page_index)
|
|
|
|
cover_hash = self.calculateHash(cover_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# check the aspect ratio
|
2015-02-12 14:57:46 -08:00
|
|
|
# if it's wider than it is high, it's probably a two page spread
|
|
|
|
# if so, crop it and calculate a second hash
|
|
|
|
narrow_cover_hash = None
|
2015-02-13 15:08:07 -08:00
|
|
|
aspect_ratio = self.getAspectRatio(cover_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
if aspect_ratio < 1.0:
|
2015-02-13 15:08:07 -08:00
|
|
|
right_side_image_data = self.cropCover(cover_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
if right_side_image_data is not None:
|
2015-02-13 15:08:07 -08:00
|
|
|
narrow_cover_hash = self.calculateHash(right_side_image_data)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
#self.log_msg("Cover hash = {0:016x}".format(cover_hash))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
keys = self.getSearchKeys()
|
2015-02-15 02:44:00 -08:00
|
|
|
# normalize the issue number
|
2015-02-12 14:57:46 -08:00
|
|
|
keys['issue_number'] = IssueString(keys['issue_number']).asString()
|
|
|
|
|
|
|
|
# we need, at minimum, a series and issue number
|
|
|
|
if keys['series'] is None or keys['issue_number'] is None:
|
|
|
|
self.log_msg("Not enough info for a search!")
|
|
|
|
return []
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("Going to search for:")
|
|
|
|
self.log_msg("\tSeries: " + keys['series'])
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("\tIssue: " + keys['issue_number'])
|
2015-02-12 14:57:46 -08:00
|
|
|
if keys['issue_count'] is not None:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("\tCount: " + str(keys['issue_count']))
|
2015-02-12 14:57:46 -08:00
|
|
|
if keys['year'] is not None:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("\tYear: " + str(keys['year']))
|
2015-02-12 14:57:46 -08:00
|
|
|
if keys['month'] is not None:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("\tMonth: " + str(keys['month']))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2022-01-22 15:00:22 -08:00
|
|
|
#self.log_msg("Publisher Filter: " + str(self.publisher_filter))
|
2015-02-13 15:08:07 -08:00
|
|
|
comicVine = ComicVineTalker()
|
2015-02-12 14:57:46 -08:00
|
|
|
comicVine.wait_for_rate_limit = self.waitAndRetryOnRateLimit
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
comicVine.setLogFunc(self.output_function)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# self.log_msg(("Searching for " + keys['series'] + "...")
|
2018-09-19 13:05:39 -07:00
|
|
|
self.log_msg("Searching for {0} #{1} ...".format(
|
2015-02-15 02:44:00 -08:00
|
|
|
keys['series'], keys['issue_number']))
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
2015-02-13 15:08:07 -08:00
|
|
|
cv_search_results = comicVine.searchForSeries(keys['series'])
|
2015-02-12 14:57:46 -08:00
|
|
|
except ComicVineTalkerException:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Network issue while searching for series. Aborting...")
|
2015-02-12 14:57:46 -08:00
|
|
|
return []
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
#self.log_msg("Found " + str(len(cv_search_results)) + " initial results")
|
2015-02-15 03:55:04 -08:00
|
|
|
if self.cancel:
|
2015-02-12 14:57:46 -08:00
|
|
|
return []
|
|
|
|
|
2015-02-15 03:44:09 -08:00
|
|
|
if cv_search_results is None:
|
2015-02-12 14:57:46 -08:00
|
|
|
return []
|
|
|
|
|
|
|
|
series_second_round_list = []
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
#self.log_msg("Removing results with too long names, banned publishers, or future start dates")
|
2015-02-12 14:57:46 -08:00
|
|
|
for item in cv_search_results:
|
|
|
|
length_approved = False
|
|
|
|
publisher_approved = True
|
|
|
|
date_approved = True
|
|
|
|
|
|
|
|
# remove any series that starts after the issue year
|
2015-02-15 03:55:04 -08:00
|
|
|
if keys['year'] is not None and str(
|
|
|
|
keys['year']).isdigit() and item['start_year'] is not None and str(
|
|
|
|
item['start_year']).isdigit():
|
2015-02-12 14:57:46 -08:00
|
|
|
if int(keys['year']) < int(item['start_year']):
|
|
|
|
date_approved = False
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# assume that our search name is close to the actual name, say
|
|
|
|
# within ,e.g. 5 chars
|
2021-09-26 17:06:30 -07:00
|
|
|
# sanitize both the search string and the result so that
|
|
|
|
# we are comparing the same type of data
|
|
|
|
shortened_key = utils.sanitize_title(keys['series'])
|
|
|
|
shortened_item_name = utils.sanitize_title(item['name'])
|
2015-02-15 03:44:09 -08:00
|
|
|
if len(shortened_item_name) < (
|
|
|
|
len(shortened_key) + self.length_delta_thresh):
|
2015-02-12 14:57:46 -08:00
|
|
|
length_approved = True
|
|
|
|
|
2022-01-22 15:00:22 -08:00
|
|
|
# remove any series from publishers on the filter
|
2015-02-12 14:57:46 -08:00
|
|
|
if item['publisher'] is not None:
|
|
|
|
publisher = item['publisher']['name']
|
2015-02-15 03:44:09 -08:00
|
|
|
if publisher is not None and publisher.lower(
|
2022-01-22 15:00:22 -08:00
|
|
|
) in self.publisher_filter:
|
2015-02-12 14:57:46 -08:00
|
|
|
publisher_approved = False
|
|
|
|
|
|
|
|
if length_approved and publisher_approved and date_approved:
|
|
|
|
series_second_round_list.append(item)
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
|
|
|
"Searching in " + str(len(series_second_round_list)) + " series")
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if self.callback is not None:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.callback(0, len(series_second_round_list))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
# now sort the list by name length
|
2015-02-15 02:44:00 -08:00
|
|
|
series_second_round_list.sort(
|
|
|
|
key=lambda x: len(x['name']), reverse=False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# build a list of volume IDs
|
2015-02-12 14:57:46 -08:00
|
|
|
volume_id_list = list()
|
|
|
|
for series in series_second_round_list:
|
2015-02-13 15:08:07 -08:00
|
|
|
volume_id_list.append(series['id'])
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
try:
|
2015-02-15 03:55:04 -08:00
|
|
|
issue_list = comicVine.fetchIssuesByVolumeIssueNumAndYear(
|
|
|
|
volume_id_list,
|
|
|
|
keys['issue_number'],
|
|
|
|
keys['year'])
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
except ComicVineTalkerException:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Network issue while searching for series details. Aborting...")
|
2015-02-12 14:57:46 -08:00
|
|
|
return []
|
|
|
|
|
|
|
|
if issue_list is None:
|
|
|
|
return []
|
|
|
|
|
|
|
|
shortlist = list()
|
2015-02-15 02:44:00 -08:00
|
|
|
# now re-associate the issues and volumes
|
2015-02-12 14:57:46 -08:00
|
|
|
for issue in issue_list:
|
|
|
|
for series in series_second_round_list:
|
|
|
|
if series['id'] == issue['volume']['id']:
|
2015-02-13 15:08:07 -08:00
|
|
|
shortlist.append((series, issue))
|
2015-02-12 14:57:46 -08:00
|
|
|
break
|
|
|
|
|
|
|
|
if keys['year'] is None:
|
2018-09-19 13:05:39 -07:00
|
|
|
self.log_msg("Found {0} series that have an issue #{1}".format(
|
2015-02-15 02:44:00 -08:00
|
|
|
len(shortlist), keys['issue_number']))
|
2015-02-12 14:57:46 -08:00
|
|
|
else:
|
2015-02-15 03:55:04 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"Found {0} series that have an issue #{1} from {2}".format(
|
2015-02-15 03:55:04 -08:00
|
|
|
len(shortlist),
|
|
|
|
keys['issue_number'],
|
|
|
|
keys['year']))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
# now we have a shortlist of volumes with the desired issue number
|
|
|
|
# Do first round of cover matching
|
|
|
|
counter = len(shortlist)
|
2015-02-15 02:44:00 -08:00
|
|
|
for series, issue in shortlist:
|
2015-02-12 14:57:46 -08:00
|
|
|
if self.callback is not None:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.callback(counter, len(shortlist) * 3)
|
2015-02-12 14:57:46 -08:00
|
|
|
counter += 1
|
|
|
|
|
2018-09-19 13:05:39 -07:00
|
|
|
self.log_msg("Examining covers for ID: {0} {1} ({2}) ...".format(
|
2015-02-15 02:44:00 -08:00
|
|
|
series['id'],
|
|
|
|
series['name'],
|
|
|
|
series['start_year']), newline=False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
# parse out the cover date
|
2015-02-13 15:08:07 -08:00
|
|
|
day, month, year = comicVine.parseDateStr(issue['cover_date'])
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
# Now check the cover match against the primary image
|
2015-02-15 02:44:00 -08:00
|
|
|
hash_list = [cover_hash]
|
2015-02-12 14:57:46 -08:00
|
|
|
if narrow_cover_hash is not None:
|
|
|
|
hash_list.append(narrow_cover_hash)
|
|
|
|
|
|
|
|
try:
|
|
|
|
image_url = issue['image']['super_url']
|
|
|
|
thumb_url = issue['image']['thumb_url']
|
|
|
|
page_url = issue['site_detail_url']
|
|
|
|
|
2015-02-15 03:55:04 -08:00
|
|
|
score_item = self.getIssueCoverMatchScore(
|
|
|
|
comicVine,
|
|
|
|
issue['id'],
|
|
|
|
image_url,
|
|
|
|
thumb_url,
|
|
|
|
page_url,
|
|
|
|
hash_list,
|
|
|
|
useRemoteAlternates=False)
|
2015-02-12 14:57:46 -08:00
|
|
|
except:
|
|
|
|
self.match_list = []
|
|
|
|
return self.match_list
|
|
|
|
|
|
|
|
match = dict()
|
2018-09-19 13:05:39 -07:00
|
|
|
match['series'] = "{0} ({1})".format(
|
2015-02-15 02:44:00 -08:00
|
|
|
series['name'], series['start_year'])
|
2015-02-12 14:57:46 -08:00
|
|
|
match['distance'] = score_item['score']
|
|
|
|
match['issue_number'] = keys['issue_number']
|
|
|
|
match['cv_issue_count'] = series['count_of_issues']
|
|
|
|
match['url_image_hash'] = score_item['hash']
|
|
|
|
match['issue_title'] = issue['name']
|
|
|
|
match['issue_id'] = issue['id']
|
|
|
|
match['volume_id'] = series['id']
|
|
|
|
match['month'] = month
|
|
|
|
match['year'] = year
|
|
|
|
match['publisher'] = None
|
|
|
|
if series['publisher'] is not None:
|
|
|
|
match['publisher'] = series['publisher']['name']
|
|
|
|
match['image_url'] = image_url
|
|
|
|
match['thumb_url'] = thumb_url
|
|
|
|
match['page_url'] = page_url
|
|
|
|
match['description'] = issue['description']
|
|
|
|
|
|
|
|
self.match_list.append(match)
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg(" --> {0}".format(match['distance']), newline=False)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("")
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if len(self.match_list) == 0:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg(":-(no matches!")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultNoMatches
|
|
|
|
return self.match_list
|
|
|
|
|
|
|
|
# sort list by image match scores
|
|
|
|
self.match_list.sort(key=lambda k: k['distance'])
|
|
|
|
|
|
|
|
l = []
|
|
|
|
for i in self.match_list:
|
2015-02-13 15:08:07 -08:00
|
|
|
l.append(i['distance'])
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg("Compared to covers in {0} issue(s):".format(
|
|
|
|
len(self.match_list)), newline=False)
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg(str(l))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
def print_match(item):
|
2018-09-19 13:05:39 -07:00
|
|
|
self.log_msg("-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format(
|
2015-02-15 02:44:00 -08:00
|
|
|
item['series'],
|
|
|
|
item['issue_number'],
|
|
|
|
item['issue_title'],
|
|
|
|
item['month'],
|
|
|
|
item['year'],
|
|
|
|
item['distance']))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
best_score = self.match_list[0]['distance']
|
|
|
|
|
|
|
|
if best_score >= self.min_score_thresh:
|
|
|
|
# we have 1 or more low-confidence matches (all bad cover scores)
|
2015-02-15 02:44:00 -08:00
|
|
|
# look at a few more pages in the archive, and also alternate
|
|
|
|
# covers online
|
|
|
|
self.log_msg(
|
2015-02-21 18:30:32 -08:00
|
|
|
"Very weak scores for the cover. Analyzing alternate pages and covers...")
|
2015-02-15 02:44:00 -08:00
|
|
|
hash_list = [cover_hash]
|
2015-02-12 14:57:46 -08:00
|
|
|
if narrow_cover_hash is not None:
|
|
|
|
hash_list.append(narrow_cover_hash)
|
2015-02-13 15:08:07 -08:00
|
|
|
for i in range(1, min(3, ca.getNumberOfPages())):
|
2015-02-12 14:57:46 -08:00
|
|
|
image_data = ca.getPage(i)
|
2015-02-13 15:08:07 -08:00
|
|
|
page_hash = self.calculateHash(image_data)
|
|
|
|
hash_list.append(page_hash)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
second_match_list = []
|
2015-02-15 02:44:00 -08:00
|
|
|
counter = 2 * len(self.match_list)
|
2015-02-12 14:57:46 -08:00
|
|
|
for m in self.match_list:
|
|
|
|
if self.callback is not None:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.callback(counter, len(self.match_list) * 3)
|
2015-02-12 14:57:46 -08:00
|
|
|
counter += 1
|
2015-02-15 03:55:04 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"Examining alternate covers for ID: {0} {1} ...".format(
|
2015-02-15 03:55:04 -08:00
|
|
|
m['volume_id'],
|
|
|
|
m['series']),
|
|
|
|
newline=False)
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
2015-02-15 03:55:04 -08:00
|
|
|
score_item = self.getIssueCoverMatchScore(
|
|
|
|
comicVine,
|
|
|
|
m['issue_id'],
|
|
|
|
m['image_url'],
|
|
|
|
m['thumb_url'],
|
|
|
|
m['page_url'],
|
|
|
|
hash_list,
|
|
|
|
useRemoteAlternates=True)
|
2015-02-12 14:57:46 -08:00
|
|
|
except:
|
|
|
|
self.match_list = []
|
|
|
|
return self.match_list
|
|
|
|
self.log_msg("--->{0}".format(score_item['score']))
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("")
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if score_item['score'] < self.min_alternate_score_thresh:
|
|
|
|
second_match_list.append(m)
|
|
|
|
m['distance'] = score_item['score']
|
|
|
|
|
2015-02-13 15:08:07 -08:00
|
|
|
if len(second_match_list) == 0:
|
|
|
|
if len(self.match_list) == 1:
|
|
|
|
self.log_msg("No matching pages in the issue.")
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
print_match(self.match_list[0])
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultFoundMatchButBadCoverScore
|
|
|
|
else:
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"Multiple bad cover matches! Need to use other info...")
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultMultipleMatchesWithBadImageScores
|
|
|
|
return self.match_list
|
|
|
|
else:
|
|
|
|
# We did good, found something!
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("Success in secondary/alternate cover matching!")
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
self.match_list = second_match_list
|
|
|
|
# sort new list by image match scores
|
|
|
|
self.match_list.sort(key=lambda k: k['distance'])
|
|
|
|
best_score = self.match_list[0]['distance']
|
2015-02-15 02:44:00 -08:00
|
|
|
self.log_msg(
|
|
|
|
"[Second round cover matching: best score = {0}]".format(best_score))
|
2015-02-12 14:57:46 -08:00
|
|
|
# now drop down into the rest of the processing
|
|
|
|
|
|
|
|
if self.callback is not None:
|
2015-02-13 15:08:07 -08:00
|
|
|
self.callback(99, 100)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# now pare down list, remove any item more than specified distant from
|
|
|
|
# the top scores
|
2015-02-12 14:57:46 -08:00
|
|
|
for item in reversed(self.match_list):
|
|
|
|
if item['distance'] > best_score + self.min_score_distance:
|
|
|
|
self.match_list.remove(item)
|
|
|
|
|
|
|
|
# One more test for the case choosing limited series first issue vs a trade with the same cover:
|
2015-02-15 02:44:00 -08:00
|
|
|
# if we have a given issue count > 1 and the volume from CV has
|
|
|
|
# count==1, remove it from match list
|
2015-02-15 03:44:09 -08:00
|
|
|
if len(self.match_list) >= 2 and keys[
|
|
|
|
'issue_count'] is not None and keys['issue_count'] != 1:
|
2015-02-12 14:57:46 -08:00
|
|
|
new_list = list()
|
|
|
|
for match in self.match_list:
|
|
|
|
if match['cv_issue_count'] != 1:
|
|
|
|
new_list.append(match)
|
|
|
|
else:
|
2015-02-15 03:55:04 -08:00
|
|
|
self.log_msg(
|
|
|
|
"Removing volume {0} [{1}] from consideration (only 1 issue)".format(
|
|
|
|
match['series'],
|
|
|
|
match['volume_id']))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
if len(new_list) > 0:
|
|
|
|
self.match_list = new_list
|
|
|
|
|
|
|
|
if len(self.match_list) == 1:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
print_match(self.match_list[0])
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultOneGoodMatch
|
|
|
|
|
|
|
|
elif len(self.match_list) == 0:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-13 15:08:07 -08:00
|
|
|
self.log_msg("No matches found :(")
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultNoMatches
|
|
|
|
else:
|
|
|
|
# we've got multiple good matches:
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg("More than one likely candidate.")
|
2015-02-12 14:57:46 -08:00
|
|
|
self.search_result = self.ResultMultipleGoodMatches
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
for item in self.match_list:
|
|
|
|
print_match(item)
|
2015-02-21 18:30:32 -08:00
|
|
|
self.log_msg(
|
2018-09-19 13:05:39 -07:00
|
|
|
"--------------------------------------------------------------------------")
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
return self.match_list
|