2c3a2566cc
I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
678 lines
26 KiB
Python
678 lines
26 KiB
Python
"""A class to automatically identify a comic archive"""
|
|
#
|
|
# Copyright 2012-2014 ComicTagger Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import logging
|
|
import sys
|
|
from typing import Any, Callable
|
|
|
|
from typing_extensions import NotRequired, TypedDict
|
|
|
|
from comicapi import utils
|
|
from comicapi.comicarchive import ComicArchive
|
|
from comicapi.genericmetadata import GenericMetadata
|
|
from comicapi.issuestring import IssueString
|
|
from comictaggerlib.ctsettings import ct_ns
|
|
from comictaggerlib.imagefetcher import ImageFetcher, ImageFetcherException
|
|
from comictaggerlib.imagehasher import ImageHasher
|
|
from comictaggerlib.resulttypes import IssueResult
|
|
from comictalker.comictalker import ComicTalker, TalkerError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
from PIL import Image, ImageChops
|
|
|
|
pil_available = True
|
|
except ImportError:
|
|
pil_available = False
|
|
|
|
|
|
class SearchKeys(TypedDict):
|
|
series: str | None
|
|
issue_number: str | None
|
|
month: int | None
|
|
year: int | None
|
|
issue_count: int | None
|
|
|
|
|
|
class Score(TypedDict):
|
|
score: NotRequired[int]
|
|
url: str
|
|
hash: int
|
|
|
|
|
|
class IssueIdentifierNetworkError(Exception):
|
|
...
|
|
|
|
|
|
class IssueIdentifierCancelled(Exception):
|
|
...
|
|
|
|
|
|
class IssueIdentifier:
|
|
result_no_matches = 0
|
|
result_found_match_but_bad_cover_score = 1
|
|
result_found_match_but_not_first_page = 2
|
|
result_multiple_matches_with_bad_image_scores = 3
|
|
result_one_good_match = 4
|
|
result_multiple_good_matches = 5
|
|
|
|
def __init__(self, comic_archive: ComicArchive, config: ct_ns, talker: ComicTalker) -> None:
|
|
self.config = config
|
|
self.talker = talker
|
|
self.comic_archive: ComicArchive = comic_archive
|
|
self.image_hasher = 1
|
|
|
|
self.only_use_additional_meta_data = False
|
|
|
|
# a decent hamming score, good enough to call it a match
|
|
self.min_score_thresh: int = 16
|
|
|
|
# for alternate covers, be more stringent, since we're a bit more
|
|
# scattershot in comparisons
|
|
self.min_alternate_score_thresh = 12
|
|
|
|
# the min distance a hamming score must be to separate itself from
|
|
# closest neighbor
|
|
self.min_score_distance = 4
|
|
|
|
# a very strong hamming score, almost certainly the same image
|
|
self.strong_score_thresh = 8
|
|
|
|
# used to eliminate series names that are too long based on our search
|
|
# string
|
|
self.series_match_thresh = config.identifier_series_match_identify_thresh
|
|
|
|
# used to eliminate unlikely publishers
|
|
self.publisher_filter = [s.strip().casefold() for s in config.identifier_publisher_filter]
|
|
|
|
self.additional_metadata = GenericMetadata()
|
|
self.output_function: Callable[[str], None] = IssueIdentifier.default_write_output
|
|
self.callback: Callable[[int, int], None] | None = None
|
|
self.cover_url_callback: Callable[[bytes], None] | None = None
|
|
self.search_result = self.result_no_matches
|
|
self.cover_page_index = 0
|
|
self.cancel = False
|
|
|
|
self.match_list: list[IssueResult] = []
|
|
|
|
def set_score_min_threshold(self, thresh: int) -> None:
|
|
self.min_score_thresh = thresh
|
|
|
|
def set_score_min_distance(self, distance: int) -> None:
|
|
self.min_score_distance = distance
|
|
|
|
def set_additional_metadata(self, md: GenericMetadata) -> None:
|
|
self.additional_metadata = md
|
|
|
|
def set_name_series_match_threshold(self, delta: int) -> None:
|
|
self.series_match_thresh = delta
|
|
|
|
def set_publisher_filter(self, flt: list[str]) -> None:
|
|
self.publisher_filter = flt
|
|
|
|
def set_hasher_algorithm(self, algo: int) -> None:
|
|
self.image_hasher = algo
|
|
|
|
def set_output_function(self, func: Callable[[str], None]) -> None:
|
|
self.output_function = func
|
|
|
|
def calculate_hash(self, image_data: bytes) -> int:
|
|
if self.image_hasher == 3:
|
|
return ImageHasher(data=image_data).p_hash()
|
|
if self.image_hasher == 2:
|
|
return -1 # ImageHasher(data=image_data).average_hash2()
|
|
|
|
return ImageHasher(data=image_data).average_hash()
|
|
|
|
def get_aspect_ratio(self, image_data: bytes) -> float:
|
|
try:
|
|
im = Image.open(io.BytesIO(image_data))
|
|
w, h = im.size
|
|
return float(h) / float(w)
|
|
except Exception:
|
|
return 1.5
|
|
|
|
def crop_cover(self, image_data: bytes) -> bytes:
|
|
im = Image.open(io.BytesIO(image_data))
|
|
w, h = im.size
|
|
|
|
try:
|
|
cropped_im = im.crop((int(w / 2), 0, w, h))
|
|
except Exception:
|
|
logger.exception("cropCover() error")
|
|
return b""
|
|
|
|
output = io.BytesIO()
|
|
cropped_im.convert("RGB").save(output, format="PNG")
|
|
cropped_image_data = output.getvalue()
|
|
output.close()
|
|
|
|
return cropped_image_data
|
|
|
|
# Adapted from https://stackoverflow.com/a/10616717/20629671
|
|
def crop_border(self, image_data: bytes, ratio: int) -> bytes | None:
|
|
im = Image.open(io.BytesIO(image_data))
|
|
|
|
# RGBA doesn't work????
|
|
tmp = im.convert("RGB")
|
|
|
|
bg = Image.new("RGB", tmp.size, "black")
|
|
diff = ImageChops.difference(tmp, bg)
|
|
diff = ImageChops.add(diff, diff, 2.0, -100)
|
|
|
|
bbox = diff.getbbox()
|
|
|
|
width_percent = 0
|
|
height_percent = 0
|
|
|
|
# If bbox is None that should mean it's solid black
|
|
if bbox:
|
|
width = bbox[2] - bbox[0]
|
|
height = bbox[3] - bbox[1]
|
|
|
|
# Convert to percent
|
|
width_percent = int(100 - ((width / im.width) * 100))
|
|
height_percent = int(100 - ((height / im.height) * 100))
|
|
logger.debug(
|
|
"Width: %s Height: %s, ratio: %s %s ratio met: %s",
|
|
im.width,
|
|
im.height,
|
|
width_percent,
|
|
height_percent,
|
|
width_percent > ratio or height_percent > ratio,
|
|
)
|
|
|
|
# If there is a difference return the image otherwise return None
|
|
if width_percent > ratio or height_percent > ratio:
|
|
output = io.BytesIO()
|
|
im.crop(bbox).save(output, format="PNG")
|
|
cropped_image_data = output.getvalue()
|
|
output.close()
|
|
return cropped_image_data
|
|
return None
|
|
|
|
def set_progress_callback(self, cb_func: Callable[[int, int], None]) -> None:
|
|
self.callback = cb_func
|
|
|
|
def set_cover_url_callback(self, cb_func: Callable[[bytes], None]) -> None:
|
|
self.cover_url_callback = cb_func
|
|
|
|
def get_search_keys(self) -> SearchKeys:
|
|
ca = self.comic_archive
|
|
|
|
search_keys: SearchKeys
|
|
if self.only_use_additional_meta_data:
|
|
search_keys = SearchKeys(
|
|
series=self.additional_metadata.series,
|
|
issue_number=self.additional_metadata.issue,
|
|
year=self.additional_metadata.year,
|
|
month=self.additional_metadata.month,
|
|
issue_count=self.additional_metadata.issue_count,
|
|
)
|
|
return search_keys
|
|
|
|
# see if the archive has any useful meta data for searching with
|
|
try:
|
|
if ca.has_cix():
|
|
internal_metadata = ca.read_cix()
|
|
else:
|
|
internal_metadata = ca.read_cbi()
|
|
except Exception as e:
|
|
internal_metadata = GenericMetadata()
|
|
logger.error("Failed to load metadata for %s: %s", ca.path, e)
|
|
|
|
# try to get some metadata from filename
|
|
md_from_filename = ca.metadata_from_filename(
|
|
self.config.filename_complicated_parser,
|
|
self.config.filename_remove_c2c,
|
|
self.config.filename_remove_fcbd,
|
|
self.config.filename_remove_publisher,
|
|
)
|
|
|
|
working_md = md_from_filename.copy()
|
|
|
|
working_md.overlay(internal_metadata)
|
|
working_md.overlay(self.additional_metadata)
|
|
|
|
# preference order:
|
|
# 1. Additional metadata
|
|
# 1. Internal metadata
|
|
# 1. Filename metadata
|
|
search_keys = SearchKeys(
|
|
series=working_md.series,
|
|
issue_number=working_md.issue,
|
|
year=working_md.year,
|
|
month=working_md.month,
|
|
issue_count=working_md.issue_count,
|
|
)
|
|
|
|
return search_keys
|
|
|
|
@staticmethod
|
|
def default_write_output(text: str) -> None:
|
|
sys.stdout.write(text)
|
|
sys.stdout.flush()
|
|
|
|
def log_msg(self, msg: Any, newline: bool = True) -> None:
|
|
msg = str(msg)
|
|
if newline:
|
|
msg += "\n"
|
|
self.output_function(msg)
|
|
|
|
def get_issue_cover_match_score(
|
|
self,
|
|
primary_img_url: str,
|
|
alt_urls: list[str],
|
|
local_cover_hash_list: list[int],
|
|
use_remote_alternates: bool = False,
|
|
use_log: bool = True,
|
|
) -> Score:
|
|
# local_cover_hash_list is a list of pre-calculated hashes.
|
|
# use_remote_alternates - indicates to use alternate covers from CV
|
|
|
|
# If there is no URL return 0
|
|
if not primary_img_url:
|
|
return Score(score=0, url="", hash=0)
|
|
|
|
try:
|
|
url_image_data = ImageFetcher(self.config.runtime_config.user_cache_dir).fetch(
|
|
primary_img_url, blocking=True
|
|
)
|
|
except ImageFetcherException as e:
|
|
self.log_msg(f"Network issue while fetching cover image from {self.talker.name}. Aborting...")
|
|
raise IssueIdentifierNetworkError from e
|
|
|
|
if self.cancel:
|
|
raise IssueIdentifierCancelled
|
|
|
|
# alert the GUI, if needed
|
|
if self.cover_url_callback is not None:
|
|
self.cover_url_callback(url_image_data)
|
|
|
|
remote_cover_list = [Score(url=primary_img_url, hash=self.calculate_hash(url_image_data))]
|
|
|
|
if self.cancel:
|
|
raise IssueIdentifierCancelled
|
|
|
|
if use_remote_alternates:
|
|
for alt_url in alt_urls:
|
|
try:
|
|
alt_url_image_data = ImageFetcher(self.config.runtime_config.user_cache_dir).fetch(
|
|
alt_url, blocking=True
|
|
)
|
|
except ImageFetcherException as e:
|
|
self.log_msg(f"Network issue while fetching alt. cover image from {self.talker.name}. Aborting...")
|
|
raise IssueIdentifierNetworkError from e
|
|
|
|
if self.cancel:
|
|
raise IssueIdentifierCancelled
|
|
|
|
# alert the GUI, if needed
|
|
if self.cover_url_callback is not None:
|
|
self.cover_url_callback(alt_url_image_data)
|
|
|
|
remote_cover_list.append(Score(url=alt_url, hash=self.calculate_hash(alt_url_image_data)))
|
|
|
|
if self.cancel:
|
|
raise IssueIdentifierCancelled
|
|
|
|
if use_log and use_remote_alternates:
|
|
self.log_msg(f"[{len(remote_cover_list) - 1} alt. covers]", False)
|
|
if use_log:
|
|
self.log_msg("[ ", False)
|
|
|
|
score_list = []
|
|
done = False
|
|
for local_cover_hash in local_cover_hash_list:
|
|
for remote_cover_item in remote_cover_list:
|
|
score = ImageHasher.hamming_distance(local_cover_hash, remote_cover_item["hash"])
|
|
score_list.append(Score(score=score, url=remote_cover_item["url"], hash=remote_cover_item["hash"]))
|
|
if use_log:
|
|
self.log_msg(score, False)
|
|
|
|
if score <= self.strong_score_thresh:
|
|
# such a good score, we can quit now, since for sure we have a winner
|
|
done = True
|
|
break
|
|
if done:
|
|
break
|
|
|
|
if use_log:
|
|
self.log_msg(" ]", False)
|
|
|
|
best_score_item = min(score_list, key=lambda x: x["score"])
|
|
|
|
return best_score_item
|
|
|
|
def search(self) -> list[IssueResult]:
|
|
ca = self.comic_archive
|
|
self.match_list = []
|
|
self.cancel = False
|
|
self.search_result = self.result_no_matches
|
|
|
|
if not pil_available:
|
|
self.log_msg("Python Imaging Library (PIL) is not available and is needed for issue identification.")
|
|
return self.match_list
|
|
|
|
if not ca.seems_to_be_a_comic_archive():
|
|
self.log_msg(f"Sorry, but {ca.path} is not a comic archive!")
|
|
return self.match_list
|
|
|
|
cover_image_data = ca.get_page(self.cover_page_index)
|
|
cover_hash = self.calculate_hash(cover_image_data)
|
|
|
|
# check the aspect ratio
|
|
# if it's wider than it is high, it's probably a two page spread
|
|
# if so, crop it and calculate a second hash
|
|
narrow_cover_hash = None
|
|
aspect_ratio = self.get_aspect_ratio(cover_image_data)
|
|
if aspect_ratio < 1.0:
|
|
right_side_image_data = self.crop_cover(cover_image_data)
|
|
if right_side_image_data is not None:
|
|
narrow_cover_hash = self.calculate_hash(right_side_image_data)
|
|
|
|
keys = self.get_search_keys()
|
|
# normalize the issue number, None will return as ""
|
|
keys["issue_number"] = IssueString(keys["issue_number"]).as_string()
|
|
|
|
# we need, at minimum, a series and issue number
|
|
if not (keys["series"] and keys["issue_number"]):
|
|
self.log_msg("Not enough info for a search!")
|
|
return []
|
|
|
|
self.log_msg(f"Using {self.talker.name} to search for:")
|
|
self.log_msg("\tSeries: " + keys["series"])
|
|
self.log_msg("\tIssue: " + keys["issue_number"])
|
|
if keys["issue_count"] is not None:
|
|
self.log_msg("\tCount: " + str(keys["issue_count"]))
|
|
if keys["year"] is not None:
|
|
self.log_msg("\tYear: " + str(keys["year"]))
|
|
if keys["month"] is not None:
|
|
self.log_msg("\tMonth: " + str(keys["month"]))
|
|
|
|
self.log_msg(f"Searching for {keys['series']} #{keys['issue_number']} ...")
|
|
try:
|
|
ct_search_results = self.talker.search_for_series(keys["series"])
|
|
except TalkerError as e:
|
|
self.log_msg(f"Error searching for series.\n{e}")
|
|
return []
|
|
|
|
if self.cancel:
|
|
return []
|
|
|
|
if ct_search_results is None:
|
|
return []
|
|
|
|
series_second_round_list = []
|
|
|
|
for item in ct_search_results:
|
|
length_approved = False
|
|
publisher_approved = True
|
|
date_approved = True
|
|
|
|
# remove any series that starts after the issue year
|
|
if keys["year"] is not None and item.start_year is not None:
|
|
if keys["year"] < item.start_year:
|
|
date_approved = False
|
|
|
|
for name in [item.name, *item.aliases]:
|
|
if utils.titles_match(keys["series"], name, self.series_match_thresh):
|
|
length_approved = True
|
|
break
|
|
# remove any series from publishers on the filter
|
|
if item.publisher is not None:
|
|
publisher = item.publisher
|
|
if publisher is not None and publisher.casefold() in self.publisher_filter:
|
|
publisher_approved = False
|
|
|
|
if length_approved and publisher_approved and date_approved:
|
|
series_second_round_list.append(item)
|
|
|
|
self.log_msg("Searching in " + str(len(series_second_round_list)) + " series")
|
|
|
|
if self.callback is not None:
|
|
self.callback(0, len(series_second_round_list))
|
|
|
|
# now sort the list by name length
|
|
series_second_round_list.sort(key=lambda x: len(x.name), reverse=False)
|
|
|
|
series_by_id = {series.id: series for series in series_second_round_list}
|
|
|
|
issue_list = None
|
|
try:
|
|
if len(series_by_id) > 0:
|
|
issue_list = self.talker.fetch_issues_by_series_issue_num_and_year(
|
|
list(series_by_id.keys()), keys["issue_number"], keys["year"]
|
|
)
|
|
except TalkerError as e:
|
|
self.log_msg(f"Issue with while searching for series details. Aborting...\n{e}")
|
|
return []
|
|
|
|
if issue_list is None:
|
|
return []
|
|
|
|
shortlist = []
|
|
# now re-associate the issues and series
|
|
# is this really needed?
|
|
for issue in issue_list:
|
|
if issue.series_id in series_by_id:
|
|
shortlist.append((series_by_id[issue.series_id], issue))
|
|
|
|
if keys["year"] is None:
|
|
self.log_msg(f"Found {len(shortlist)} series that have an issue #{keys['issue_number']}")
|
|
else:
|
|
self.log_msg(
|
|
f"Found {len(shortlist)} series that have an issue #{keys['issue_number']} from {keys['year']}"
|
|
)
|
|
|
|
# now we have a shortlist of series with the desired issue number
|
|
# Do first round of cover matching
|
|
counter = len(shortlist)
|
|
for series, issue in shortlist:
|
|
if self.callback is not None:
|
|
self.callback(counter, len(shortlist) * 3)
|
|
counter += 1
|
|
|
|
self.log_msg(
|
|
f"Examining covers for ID: {series.id} {series.name} ({series.start_year}) ...",
|
|
newline=False,
|
|
)
|
|
|
|
# Now check the cover match against the primary image
|
|
hash_list = [cover_hash]
|
|
if narrow_cover_hash is not None:
|
|
hash_list.append(narrow_cover_hash)
|
|
|
|
cropped_border = self.crop_border(cover_image_data, self.config.identifier_border_crop_percent)
|
|
if cropped_border is not None:
|
|
hash_list.append(self.calculate_hash(cropped_border))
|
|
logger.info("Adding cropped cover to the hashlist")
|
|
|
|
try:
|
|
image_url = issue.cover_image or ""
|
|
alt_urls = issue.alternate_images
|
|
|
|
score_item = self.get_issue_cover_match_score(
|
|
image_url, alt_urls, hash_list, use_remote_alternates=False
|
|
)
|
|
except Exception:
|
|
logger.exception("Scoring series failed")
|
|
self.match_list = []
|
|
return self.match_list
|
|
|
|
match: IssueResult = {
|
|
"series": f"{series.name} ({series.start_year})",
|
|
"distance": score_item["score"],
|
|
"issue_number": keys["issue_number"],
|
|
"cv_issue_count": series.count_of_issues,
|
|
"url_image_hash": score_item["hash"],
|
|
"issue_title": issue.title or "",
|
|
"issue_id": issue.issue_id or "",
|
|
"series_id": series.id,
|
|
"month": issue.month,
|
|
"year": issue.year,
|
|
"publisher": None,
|
|
"image_url": image_url,
|
|
"alt_image_urls": alt_urls,
|
|
"description": issue.description or "",
|
|
}
|
|
if series.publisher is not None:
|
|
match["publisher"] = series.publisher
|
|
|
|
self.match_list.append(match)
|
|
|
|
self.log_msg(f" --> {match['distance']}", newline=False)
|
|
|
|
self.log_msg("")
|
|
|
|
if len(self.match_list) == 0:
|
|
self.log_msg(":-( no matches!")
|
|
self.search_result = self.result_no_matches
|
|
return self.match_list
|
|
|
|
# sort list by image match scores
|
|
self.match_list.sort(key=lambda k: k["distance"])
|
|
|
|
lst = []
|
|
for i in self.match_list:
|
|
lst.append(i["distance"])
|
|
|
|
self.log_msg(f"Compared to covers in {len(self.match_list)} issue(s):", newline=False)
|
|
self.log_msg(str(lst))
|
|
|
|
def print_match(item: IssueResult) -> None:
|
|
self.log_msg(
|
|
"-----> {} #{} {} ({}/{}) -- score: {}".format(
|
|
item["series"],
|
|
item["issue_number"],
|
|
item["issue_title"],
|
|
item["month"],
|
|
item["year"],
|
|
item["distance"],
|
|
)
|
|
)
|
|
|
|
best_score: int = self.match_list[0]["distance"]
|
|
|
|
if best_score >= self.min_score_thresh:
|
|
# we have 1 or more low-confidence matches (all bad cover scores)
|
|
# look at a few more pages in the archive, and also alternate covers online
|
|
self.log_msg("Very weak scores for the cover. Analyzing alternate pages and covers...")
|
|
hash_list = [cover_hash]
|
|
if narrow_cover_hash is not None:
|
|
hash_list.append(narrow_cover_hash)
|
|
for page_index in range(1, min(3, ca.get_number_of_pages())):
|
|
image_data = ca.get_page(page_index)
|
|
page_hash = self.calculate_hash(image_data)
|
|
hash_list.append(page_hash)
|
|
|
|
second_match_list = []
|
|
counter = 2 * len(self.match_list)
|
|
for m in self.match_list:
|
|
if self.callback is not None:
|
|
self.callback(counter, len(self.match_list) * 3)
|
|
counter += 1
|
|
self.log_msg(f"Examining alternate covers for ID: {m['series_id']} {m['series']} ...", newline=False)
|
|
try:
|
|
score_item = self.get_issue_cover_match_score(
|
|
m["image_url"],
|
|
m["alt_image_urls"],
|
|
hash_list,
|
|
use_remote_alternates=True,
|
|
)
|
|
except Exception:
|
|
logger.exception("failed examining alt covers")
|
|
self.match_list = []
|
|
return self.match_list
|
|
self.log_msg(f"--->{score_item['score']}")
|
|
self.log_msg("")
|
|
|
|
if score_item["score"] < self.min_alternate_score_thresh:
|
|
second_match_list.append(m)
|
|
m["distance"] = score_item["score"]
|
|
|
|
if len(second_match_list) == 0:
|
|
if len(self.match_list) == 1:
|
|
self.log_msg("No matching pages in the issue.")
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
print_match(self.match_list[0])
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.search_result = self.result_found_match_but_bad_cover_score
|
|
else:
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.log_msg("Multiple bad cover matches! Need to use other info...")
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.search_result = self.result_multiple_matches_with_bad_image_scores
|
|
return self.match_list
|
|
|
|
# We did good, found something!
|
|
self.log_msg("Success in secondary/alternate cover matching!")
|
|
|
|
self.match_list = second_match_list
|
|
# sort new list by image match scores
|
|
self.match_list.sort(key=lambda k: k["distance"])
|
|
best_score = self.match_list[0]["distance"]
|
|
self.log_msg("[Second round cover matching: best score = {best_score}]")
|
|
# now drop down into the rest of the processing
|
|
|
|
if self.callback is not None:
|
|
self.callback(99, 100)
|
|
|
|
# now pare down list, remove any item more than specified distant from the top scores
|
|
for match_item in reversed(self.match_list):
|
|
if match_item["distance"] > best_score + self.min_score_distance:
|
|
self.match_list.remove(match_item)
|
|
|
|
# One more test for the case choosing limited series first issue vs a trade with the same cover:
|
|
# if we have a given issue count > 1 and the series from CV has count==1, remove it from match list
|
|
if len(self.match_list) >= 2 and keys["issue_count"] is not None and keys["issue_count"] != 1:
|
|
new_list = []
|
|
for match in self.match_list:
|
|
if match["cv_issue_count"] != 1:
|
|
new_list.append(match)
|
|
else:
|
|
self.log_msg(
|
|
f"Removing series {match['series']} [{match['series_id']}] from consideration (only 1 issue)"
|
|
)
|
|
|
|
if len(new_list) > 0:
|
|
self.match_list = new_list
|
|
|
|
if len(self.match_list) == 1:
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
print_match(self.match_list[0])
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.search_result = self.result_one_good_match
|
|
|
|
elif len(self.match_list) == 0:
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.log_msg("No matches found :(")
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
self.search_result = self.result_no_matches
|
|
else:
|
|
# we've got multiple good matches:
|
|
self.log_msg("More than one likely candidate.")
|
|
self.search_result = self.result_multiple_good_matches
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
for match_item in self.match_list:
|
|
print_match(match_item)
|
|
self.log_msg("--------------------------------------------------------------------------")
|
|
|
|
return self.match_list
|