Merge branch 'unicode_search' into develop
This commit is contained in:
commit
99030fae6b
@ -26,6 +26,7 @@ from shutil import which # noqa: F401
|
||||
from typing import Any, Mapping
|
||||
|
||||
import pycountry
|
||||
import thefuzz.fuzz
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -108,6 +109,10 @@ def remove_articles(text: str) -> str:
|
||||
"the",
|
||||
"the",
|
||||
"with",
|
||||
"ms",
|
||||
"mrs",
|
||||
"mr",
|
||||
"dr",
|
||||
]
|
||||
new_text = ""
|
||||
for word in text.split(" "):
|
||||
@ -121,19 +126,41 @@ def remove_articles(text: str) -> str:
|
||||
|
||||
def sanitize_title(text: str, basic: bool = False) -> str:
|
||||
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
|
||||
text = unicodedata.normalize("NFKD", text)
|
||||
# comicvine keeps apostrophes a part of the word
|
||||
text = text.replace("'", "")
|
||||
text = text.replace('"', "")
|
||||
if not basic:
|
||||
# comicvine ignores punctuation and accents, TODO: only remove punctuation accents and similar
|
||||
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
|
||||
text = unicodedata.normalize("NFKD", text).casefold()
|
||||
if basic:
|
||||
# comicvine keeps apostrophes a part of the word
|
||||
text = text.replace("'", "")
|
||||
text = text.replace('"', "")
|
||||
else:
|
||||
# comicvine ignores punctuation and accents
|
||||
# remove all characters that are not a letter, separator (space) or number
|
||||
# replace any "dash punctuation" with a space
|
||||
# makes sure that batman-superman and self-proclaimed stay separate words
|
||||
text = "".join(
|
||||
c if not unicodedata.category(c) in ("Pd") else " "
|
||||
for c in text
|
||||
if unicodedata.category(c)[0] in "LZN" or unicodedata.category(c) in ("Pd")
|
||||
)
|
||||
# remove extra space and articles and all lower case
|
||||
text = remove_articles(text).casefold().strip()
|
||||
text = remove_articles(text).strip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def titles_match(search_title: str, record_title: str, threshold: int = 90):
|
||||
sanitized_search = sanitize_title(search_title)
|
||||
sanitized_record = sanitize_title(record_title)
|
||||
ratio = thefuzz.fuzz.ratio(sanitized_search, sanitized_record)
|
||||
logger.debug(
|
||||
"search title: %s ; record title: %s ; ratio: %d ; match threshold: %d",
|
||||
search_title,
|
||||
record_title,
|
||||
ratio,
|
||||
threshold,
|
||||
)
|
||||
return ratio >= threshold
|
||||
|
||||
|
||||
def unique_file(file_name: pathlib.Path) -> pathlib.Path:
|
||||
name = file_name.name
|
||||
counter = 1
|
||||
|
@ -17,7 +17,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from PyQt5 import QtCore, QtGui, QtWidgets, uic
|
||||
from PyQt5 import QtCore, QtWidgets, uic
|
||||
|
||||
from comictaggerlib.settings import ComicTaggerSettings
|
||||
|
||||
@ -39,7 +39,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
|
||||
self.cbxSpecifySearchString.setChecked(False)
|
||||
self.cbxSplitWords.setChecked(False)
|
||||
self.leNameLengthMatchTolerance.setText(str(self.settings.id_length_delta_thresh))
|
||||
self.sbNameMatchSearchThresh.setValue(self.settings.id_series_match_identify_thresh)
|
||||
self.leSearchString.setEnabled(False)
|
||||
|
||||
self.cbxSaveOnLowConfidence.setChecked(self.settings.save_on_low_confidence)
|
||||
@ -50,13 +50,12 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.cbxWaitForRateLimit.setChecked(self.settings.wait_and_retry_on_rate_limit)
|
||||
self.cbxAutoImprint.setChecked(self.settings.auto_imprint)
|
||||
|
||||
nlmt_tip = """ <html>The <b>Name Length Match Tolerance</b> is for eliminating automatic
|
||||
search matches that are too long compared to your series name search. The higher
|
||||
nlmt_tip = """<html>The <b>Name Match Ratio Threshold: Auto-Identify</b> is for eliminating automatic
|
||||
search matches that are too long compared to your series name search. The lower
|
||||
it is, the more likely to have a good match, but each search will take longer and
|
||||
use more bandwidth. Too low, and only the very closest lexical matches will be
|
||||
explored.</html>"""
|
||||
use more bandwidth. Too high, and only the very closest matches will be explored.</html>"""
|
||||
|
||||
self.leNameLengthMatchTolerance.setToolTip(nlmt_tip)
|
||||
self.sbNameMatchSearchThresh.setToolTip(nlmt_tip)
|
||||
|
||||
ss_tip = """<html>
|
||||
The <b>series search string</b> specifies the search string to be used for all selected archives.
|
||||
@ -66,9 +65,6 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.leSearchString.setToolTip(ss_tip)
|
||||
self.cbxSpecifySearchString.setToolTip(ss_tip)
|
||||
|
||||
validator = QtGui.QIntValidator(0, 99, self)
|
||||
self.leNameLengthMatchTolerance.setValidator(validator)
|
||||
|
||||
self.cbxSpecifySearchString.stateChanged.connect(self.search_string_toggle)
|
||||
|
||||
self.auto_save_on_low = False
|
||||
@ -78,7 +74,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.remove_after_success = False
|
||||
self.wait_and_retry_on_rate_limit = False
|
||||
self.search_string = ""
|
||||
self.name_length_match_tolerance = self.settings.id_length_delta_thresh
|
||||
self.name_length_match_tolerance = self.settings.id_series_match_search_thresh
|
||||
self.split_words = self.cbxSplitWords.isChecked()
|
||||
|
||||
def search_string_toggle(self) -> None:
|
||||
@ -93,7 +89,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.assume_issue_one = self.cbxAssumeIssueOne.isChecked()
|
||||
self.ignore_leading_digits_in_filename = self.cbxIgnoreLeadingDigitsInFilename.isChecked()
|
||||
self.remove_after_success = self.cbxRemoveAfterSuccess.isChecked()
|
||||
self.name_length_match_tolerance = int(self.leNameLengthMatchTolerance.text())
|
||||
self.name_length_match_tolerance = int(self.leNameMatchThresh.text())
|
||||
self.wait_and_retry_on_rate_limit = self.cbxWaitForRateLimit.isChecked()
|
||||
self.split_words = self.cbxSplitWords.isChecked()
|
||||
|
||||
|
@ -42,7 +42,7 @@ def actual_issue_data_fetch(
|
||||
) -> GenericMetadata:
|
||||
# now get the particular issue data
|
||||
try:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(settings.id_series_match_search_thresh)
|
||||
comic_vine.wait_for_rate_limit = opts.wait_on_cv_rate_limit
|
||||
cv_md = comic_vine.fetch_issue_data(match["volume_id"], match["issue_number"], settings)
|
||||
except ComicVineTalkerException:
|
||||
@ -375,7 +375,7 @@ def process_file_cli(
|
||||
if opts.issue_id is not None:
|
||||
# we were given the actual ID to search with
|
||||
try:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(settings.id_series_match_search_thresh)
|
||||
comic_vine.wait_for_rate_limit = opts.wait_on_cv_rate_limit
|
||||
cv_md = comic_vine.fetch_issue_data_by_issue_id(opts.issue_id, settings)
|
||||
except ComicVineTalkerException:
|
||||
|
@ -84,7 +84,8 @@ class ComicCacher:
|
||||
+ "image_url TEXT,"
|
||||
+ "description TEXT,"
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')),"
|
||||
+ "source_name TEXT NOT NULL)"
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT)" # Newline separated
|
||||
)
|
||||
|
||||
cur.execute(
|
||||
@ -96,6 +97,7 @@ class ComicCacher:
|
||||
+ "start_year INT,"
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')), "
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT," # Newline separated
|
||||
+ "PRIMARY KEY (id, source_name))"
|
||||
)
|
||||
|
||||
@ -105,6 +107,7 @@ class ComicCacher:
|
||||
+ "url_list TEXT,"
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')), "
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT," # Newline separated
|
||||
+ "PRIMARY KEY (issue_id, source_name))"
|
||||
)
|
||||
|
||||
@ -121,6 +124,7 @@ class ComicCacher:
|
||||
+ "description TEXT,"
|
||||
+ "timestamp DATE DEFAULT (datetime('now','localtime')), "
|
||||
+ "source_name TEXT NOT NULL,"
|
||||
+ "aliases TEXT," # Newline separated
|
||||
+ "PRIMARY KEY (id, source_name))"
|
||||
)
|
||||
|
||||
@ -153,8 +157,8 @@ class ComicCacher:
|
||||
|
||||
cur.execute(
|
||||
"INSERT INTO VolumeSearchCache "
|
||||
+ "(source_name, search_term, id, name, start_year, publisher, count_of_issues, image_url, description) "
|
||||
+ "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
+ "(source_name, search_term, id, name, start_year, publisher, count_of_issues, image_url, description, aliases) "
|
||||
+ "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
source_name,
|
||||
search_term.casefold(),
|
||||
@ -165,6 +169,7 @@ class ComicCacher:
|
||||
record["count_of_issues"],
|
||||
url,
|
||||
record["description"],
|
||||
record["aliases"],
|
||||
),
|
||||
)
|
||||
|
||||
@ -197,6 +202,7 @@ class ComicCacher:
|
||||
"description": record[7],
|
||||
"publisher": {"name": record[4]},
|
||||
"image": {"super_url": record[6]},
|
||||
"aliases": record[10],
|
||||
}
|
||||
)
|
||||
|
||||
@ -268,6 +274,7 @@ class ComicCacher:
|
||||
"count_of_issues": cv_volume_record["count_of_issues"],
|
||||
"start_year": cv_volume_record["start_year"],
|
||||
"timestamp": timestamp,
|
||||
"aliases": cv_volume_record["aliases"],
|
||||
}
|
||||
self.upsert(cur, "volumes", data)
|
||||
|
||||
@ -295,6 +302,7 @@ class ComicCacher:
|
||||
"thumb_url": issue["image"]["thumb_url"],
|
||||
"description": issue["description"],
|
||||
"timestamp": timestamp,
|
||||
"aliases": issue["aliases"],
|
||||
}
|
||||
self.upsert(cur, "issues", data)
|
||||
|
||||
@ -313,7 +321,8 @@ class ComicCacher:
|
||||
|
||||
# fetch
|
||||
cur.execute(
|
||||
"SELECT source_name,id,name,publisher,count_of_issues,start_year FROM Volumes WHERE id=? AND source_name=?",
|
||||
"SELECT source_name,id,name,publisher,count_of_issues,start_year,aliases FROM Volumes"
|
||||
" WHERE id=? AND source_name=?",
|
||||
[volume_id, source_name],
|
||||
)
|
||||
|
||||
@ -330,6 +339,7 @@ class ComicCacher:
|
||||
"count_of_issues": row[4],
|
||||
"start_year": row[5],
|
||||
"publisher": {"name": row[3]},
|
||||
"aliases": row[6],
|
||||
}
|
||||
)
|
||||
|
||||
@ -352,7 +362,7 @@ class ComicCacher:
|
||||
|
||||
cur.execute(
|
||||
(
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,super_url,thumb_url,description"
|
||||
"SELECT source_name,id,name,issue_number,site_detail_url,cover_date,super_url,thumb_url,description,aliases"
|
||||
" FROM Issues WHERE volume_id=? AND source_name=?"
|
||||
),
|
||||
[volume_id, source_name],
|
||||
@ -370,6 +380,7 @@ class ComicCacher:
|
||||
"cover_date": row[5],
|
||||
"image": {"super_url": row[6], "thumb_url": row[7]},
|
||||
"description": row[8],
|
||||
"aliases": row[9],
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -91,11 +91,12 @@ class ComicVineTalker:
|
||||
|
||||
return "Comic Vine rate limit exceeded. Please wait a bit."
|
||||
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, series_match_thresh: int = 90) -> None:
|
||||
# Identity name for the information source
|
||||
self.source_name = "comicvine"
|
||||
|
||||
self.wait_for_rate_limit = False
|
||||
self.series_match_thresh = series_match_thresh
|
||||
|
||||
# key that is registered to comictagger
|
||||
default_api_key = "27431e6787042105bd3e47e169a624521f89f3a4"
|
||||
@ -227,7 +228,7 @@ class ComicVineTalker:
|
||||
"format": "json",
|
||||
"resources": "volume",
|
||||
"query": search_series_name,
|
||||
"field_list": "volume,name,id,start_year,publisher,image,description,count_of_issues",
|
||||
"field_list": "volume,name,id,start_year,publisher,image,description,count_of_issues,aliases",
|
||||
"page": 1,
|
||||
"limit": 100,
|
||||
}
|
||||
@ -245,10 +246,8 @@ class ComicVineTalker:
|
||||
# ORed together, and we get thousands of results. Good news is the
|
||||
# results are sorted by relevance, so we can be smart about halting the search.
|
||||
# 1. Don't fetch more than some sane amount of pages.
|
||||
max_results = 500
|
||||
# 2. Halt when not all of our search terms are present in a result
|
||||
# 3. Halt when the results contain more (plus threshold) words than our search
|
||||
result_word_count_max = len(search_series_name.split()) + 3
|
||||
# 2. Halt when any result on the current page is less than or equal to a set ratio using thefuzz
|
||||
max_results = 500 # 5 pages
|
||||
|
||||
total_result_count = min(total_result_count, max_results)
|
||||
|
||||
@ -267,19 +266,11 @@ class ComicVineTalker:
|
||||
while current_result_count < total_result_count:
|
||||
|
||||
if not literal:
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
last_result = utils.sanitize_title(search_results[-1]["name"])
|
||||
|
||||
# See if the last result's name has all the of the search terms.
|
||||
# If not, break out of this, loop, we're done.
|
||||
for term in search_series_name.split():
|
||||
if term not in last_result:
|
||||
stop_searching = True
|
||||
break
|
||||
|
||||
# Also, stop searching when the word count of last results is too much longer than our search terms list
|
||||
if len(last_result) > result_word_count_max:
|
||||
stop_searching = True
|
||||
# Stop searching once any entry falls below the threshold
|
||||
stop_searching = any(
|
||||
not utils.titles_match(search_series_name, volume["name"], self.series_match_thresh)
|
||||
for volume in cast(list[CVVolumeResults], cv_response["results"])
|
||||
)
|
||||
|
||||
if stop_searching:
|
||||
break
|
||||
@ -297,17 +288,6 @@ class ComicVineTalker:
|
||||
if callback is not None:
|
||||
callback(current_result_count, total_result_count)
|
||||
|
||||
# Literal searches simply return the matches no extra processing is doneo
|
||||
if not literal:
|
||||
# Remove any search results that don't contain all the search terms (iterate backwards for easy removal)
|
||||
for record in reversed(search_results):
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
record_name = utils.sanitize_title(record["name"])
|
||||
for term in search_series_name.split():
|
||||
if term not in record_name:
|
||||
search_results.remove(record)
|
||||
break
|
||||
|
||||
# Cache these search results, even if it's literal we cache the results
|
||||
# The most it will cause is extra processing time
|
||||
cvc.add_search_results(self.source_name, series_name, search_results)
|
||||
@ -328,7 +308,7 @@ class ComicVineTalker:
|
||||
params = {
|
||||
"api_key": self.api_key,
|
||||
"format": "json",
|
||||
"field_list": "name,id,start_year,publisher,count_of_issues",
|
||||
"field_list": "name,id,start_year,publisher,count_of_issues,aliases",
|
||||
}
|
||||
cv_response = self.get_cv_content(volume_url, params)
|
||||
|
||||
@ -351,7 +331,7 @@ class ComicVineTalker:
|
||||
"api_key": self.api_key,
|
||||
"filter": "volume:" + str(series_id),
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"offset": 0,
|
||||
}
|
||||
cv_response = self.get_cv_content(self.api_base_url + "/issues/", params)
|
||||
@ -395,7 +375,7 @@ class ComicVineTalker:
|
||||
params: dict[str, str | int] = {
|
||||
"api_key": self.api_key,
|
||||
"format": "json",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description",
|
||||
"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases",
|
||||
"filter": flt,
|
||||
}
|
||||
|
||||
|
@ -95,7 +95,7 @@ class IssueIdentifier:
|
||||
|
||||
# used to eliminate series names that are too long based on our search
|
||||
# string
|
||||
self.length_delta_thresh = settings.id_length_delta_thresh
|
||||
self.series_match_thresh = settings.id_series_match_identify_thresh
|
||||
|
||||
# used to eliminate unlikely publishers
|
||||
self.publisher_filter = [s.strip().casefold() for s in settings.id_publisher_filter.split(",")]
|
||||
@ -120,8 +120,8 @@ class IssueIdentifier:
|
||||
def set_additional_metadata(self, md: GenericMetadata) -> None:
|
||||
self.additional_metadata = md
|
||||
|
||||
def set_name_length_delta_threshold(self, delta: int) -> None:
|
||||
self.length_delta_thresh = delta
|
||||
def set_name_series_match_threshold(self, delta: int) -> None:
|
||||
self.series_match_thresh = delta
|
||||
|
||||
def set_publisher_filter(self, flt: list[str]) -> None:
|
||||
self.publisher_filter = flt
|
||||
@ -398,7 +398,7 @@ class IssueIdentifier:
|
||||
if keys["month"] is not None:
|
||||
self.log_msg("\tMonth: " + str(keys["month"]))
|
||||
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(self.settings.id_series_match_search_thresh)
|
||||
comic_vine.wait_for_rate_limit = self.wait_and_retry_on_rate_limit
|
||||
|
||||
comic_vine.set_log_func(self.output_function)
|
||||
@ -433,15 +433,13 @@ class IssueIdentifier:
|
||||
if int(keys["year"]) < int(item["start_year"]):
|
||||
date_approved = False
|
||||
|
||||
# assume that our search name is close to the actual name, say
|
||||
# within ,e.g. 5 chars
|
||||
# sanitize both the search string and the result so that
|
||||
# we are comparing the same type of data
|
||||
shortened_key = utils.sanitize_title(keys["series"])
|
||||
shortened_item_name = utils.sanitize_title(item["name"])
|
||||
if len(shortened_item_name) < (len(shortened_key) + self.length_delta_thresh):
|
||||
length_approved = True
|
||||
|
||||
aliases = []
|
||||
if item["aliases"]:
|
||||
aliases = item["aliases"].split("\n")
|
||||
for name in [item["name"], *aliases]:
|
||||
if utils.titles_match(keys["series"], name, self.series_match_thresh):
|
||||
length_approved = True
|
||||
break
|
||||
# remove any series from publishers on the filter
|
||||
if item["publisher"] is not None:
|
||||
publisher = item["publisher"]["name"]
|
||||
|
@ -97,7 +97,7 @@ class IssueSelectionWindow(QtWidgets.QDialog):
|
||||
QtWidgets.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.CursorShape.WaitCursor))
|
||||
|
||||
try:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(self.settings.id_series_match_search_thresh)
|
||||
comic_vine.fetch_volume_data(self.series_id)
|
||||
self.issue_list = comic_vine.fetch_issues_by_volume(self.series_id)
|
||||
except ComicVineTalkerException as e:
|
||||
|
@ -93,6 +93,7 @@ class CVIssuesResults(TypedDict):
|
||||
name: str
|
||||
site_detail_url: str
|
||||
volume: NotRequired[CVVolume]
|
||||
aliases: str
|
||||
|
||||
|
||||
class CVPublisher(TypedDict, total=False):
|
||||
@ -110,6 +111,7 @@ class CVVolumeResults(TypedDict):
|
||||
publisher: CVPublisher
|
||||
start_year: str
|
||||
resource_type: NotRequired[str]
|
||||
aliases: NotRequired[str | None]
|
||||
|
||||
|
||||
class CVCredits(TypedDict):
|
||||
|
@ -78,7 +78,8 @@ class ComicTaggerSettings:
|
||||
self.last_filelist_sorted_order = 0
|
||||
|
||||
# identifier settings
|
||||
self.id_length_delta_thresh = 5
|
||||
self.id_series_match_search_thresh = 90
|
||||
self.id_series_match_identify_thresh = 91
|
||||
self.id_publisher_filter = "Panini Comics, Abril, Planeta DeAgostini, Editorial Televisa, Dino Comics"
|
||||
|
||||
# Show/ask dialog flags
|
||||
@ -217,8 +218,10 @@ class ComicTaggerSettings:
|
||||
if self.config.has_option("auto", "last_filelist_sorted_order"):
|
||||
self.last_filelist_sorted_order = self.config.getint("auto", "last_filelist_sorted_order")
|
||||
|
||||
if self.config.has_option("identifier", "id_length_delta_thresh"):
|
||||
self.id_length_delta_thresh = self.config.getint("identifier", "id_length_delta_thresh")
|
||||
if self.config.has_option("identifier", "id_series_match_search_thresh"):
|
||||
self.id_series_match_search_thresh = self.config.getint("identifier", "id_series_match_search_thresh")
|
||||
if self.config.has_option("identifier", "id_series_match_identify_thresh"):
|
||||
self.id_series_match_identify_thresh = self.config.getint("identifier", "id_series_match_identify_thresh")
|
||||
if self.config.has_option("identifier", "id_publisher_filter"):
|
||||
self.id_publisher_filter = self.config.get("identifier", "id_publisher_filter")
|
||||
|
||||
@ -352,7 +355,8 @@ class ComicTaggerSettings:
|
||||
if not self.config.has_section("identifier"):
|
||||
self.config.add_section("identifier")
|
||||
|
||||
self.config.set("identifier", "id_length_delta_thresh", self.id_length_delta_thresh)
|
||||
self.config.set("identifier", "id_series_match_search_thresh", self.id_series_match_search_thresh)
|
||||
self.config.set("identifier", "id_series_match_identify_thresh", self.id_series_match_identify_thresh)
|
||||
self.config.set("identifier", "id_publisher_filter", self.id_publisher_filter)
|
||||
|
||||
if not self.config.has_section("dialogflags"):
|
||||
|
@ -155,13 +155,16 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.lblDefaultSettings.setText("Revert to default " + self.name.casefold())
|
||||
self.btnResetSettings.setText("Default " + self.name)
|
||||
|
||||
nldt_tip = """<html>The <b>Default Name Length Match Tolerance</b> is for eliminating automatic
|
||||
search matches that are too long compared to your series name search. The higher
|
||||
nmit_tip = """<html>The <b>Name Match Ratio Threshold: Auto-Identify</b> is for eliminating automatic
|
||||
search matches that are too long compared to your series name search. The lower
|
||||
it is, the more likely to have a good match, but each search will take longer and
|
||||
use more bandwidth. Too low, and only the very closest lexical matches will be
|
||||
explored.</html>"""
|
||||
use more bandwidth. Too high, and only the very closest matches will be explored.</html>"""
|
||||
nmst_tip = """<html>The <b>Name Match Ratio Threshold: Search</b> is for reducing the total
|
||||
number of results that are returned from a search. The lower it is, the more pages will
|
||||
be returned (max 5 pages or 500 results)</html>"""
|
||||
|
||||
self.leNameLengthDeltaThresh.setToolTip(nldt_tip)
|
||||
self.sbNameMatchIdentifyThresh.setToolTip(nmit_tip)
|
||||
self.sbNameMatchSearchThresh.setToolTip(nmst_tip)
|
||||
|
||||
pbl_tip = """<html>
|
||||
The <b>Publisher Filter</b> is for eliminating automatic matches to certain publishers
|
||||
@ -173,9 +176,6 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
validator = QtGui.QIntValidator(1, 4, self)
|
||||
self.leIssueNumPadding.setValidator(validator)
|
||||
|
||||
validator = QtGui.QIntValidator(0, 99, self)
|
||||
self.leNameLengthDeltaThresh.setValidator(validator)
|
||||
|
||||
self.leRenameTemplate.setToolTip(f"<pre>{html.escape(template_tooltip)}</pre>")
|
||||
self.settings_to_form()
|
||||
self.rename_error: Exception | None = None
|
||||
@ -225,7 +225,8 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
def settings_to_form(self) -> None:
|
||||
# Copy values from settings to form
|
||||
self.leRarExePath.setText(self.settings.rar_exe_path)
|
||||
self.leNameLengthDeltaThresh.setText(str(self.settings.id_length_delta_thresh))
|
||||
self.sbNameMatchIdentifyThresh.setValue(self.settings.id_series_match_identify_thresh)
|
||||
self.sbNameMatchSearchThresh.setValue(self.settings.id_series_match_search_thresh)
|
||||
self.tePublisherFilter.setPlainText(self.settings.id_publisher_filter)
|
||||
|
||||
self.cbxCheckForNewVersion.setChecked(self.settings.check_for_new_version)
|
||||
@ -287,15 +288,13 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
if self.settings.rar_exe_path:
|
||||
utils.add_to_path(os.path.dirname(self.settings.rar_exe_path))
|
||||
|
||||
if not str(self.leNameLengthDeltaThresh.text()).isdigit():
|
||||
self.leNameLengthDeltaThresh.setText("0")
|
||||
|
||||
if not str(self.leIssueNumPadding.text()).isdigit():
|
||||
self.leIssueNumPadding.setText("0")
|
||||
|
||||
self.settings.check_for_new_version = self.cbxCheckForNewVersion.isChecked()
|
||||
|
||||
self.settings.id_length_delta_thresh = int(self.leNameLengthDeltaThresh.text())
|
||||
self.settings.id_series_match_identify_thresh = self.sbNameMatchIdentifyThresh.value()
|
||||
self.settings.id_series_match_search_thresh = self.sbNameMatchSearchThresh.value()
|
||||
self.settings.id_publisher_filter = str(self.tePublisherFilter.toPlainText())
|
||||
|
||||
self.settings.complicated_parser = self.cbxComplicatedParser.isChecked()
|
||||
|
@ -1064,7 +1064,7 @@ Have fun!
|
||||
self.form_to_metadata()
|
||||
|
||||
try:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(self.settings.id_series_match_search_thresh)
|
||||
new_metadata = comic_vine.fetch_issue_data(selector.volume_id, selector.issue_number, self.settings)
|
||||
except ComicVineTalkerException as e:
|
||||
QtWidgets.QApplication.restoreOverrideCursor()
|
||||
@ -1674,7 +1674,7 @@ Have fun!
|
||||
QtWidgets.QApplication.setOverrideCursor(QtGui.QCursor(QtCore.Qt.CursorShape.WaitCursor))
|
||||
|
||||
try:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(self.settings.id_series_match_search_thresh)
|
||||
comic_vine.wait_for_rate_limit = self.settings.wait_and_retry_on_rate_limit
|
||||
cv_md = comic_vine.fetch_issue_data(match["volume_id"], match["issue_number"], self.settings)
|
||||
except ComicVineTalkerException:
|
||||
@ -1743,7 +1743,7 @@ Have fun!
|
||||
ii.cover_page_index = md.get_cover_page_index_list()[0]
|
||||
if self.atprogdialog is not None:
|
||||
ii.set_cover_url_callback(self.atprogdialog.set_test_image)
|
||||
ii.set_name_length_delta_threshold(dlg.name_length_match_tolerance)
|
||||
ii.set_name_series_match_threshold(dlg.name_length_match_tolerance)
|
||||
|
||||
matches: list[IssueResult] = ii.search()
|
||||
|
||||
|
@ -44,6 +44,16 @@
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="cbxAutoImprint">
|
||||
<property name="toolTip">
|
||||
<string>Checks the publisher against a list of imprints.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Auto Imprint</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="9" column="0">
|
||||
<widget class="QCheckBox" name="cbxSpecifySearchString">
|
||||
<property name="sizePolicy">
|
||||
@ -57,19 +67,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="cbxSaveOnLowConfidence">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Save on low confidence match</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="cbxIgnoreLeadingDigitsInFilename">
|
||||
<property name="sizePolicy">
|
||||
@ -83,8 +80,8 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="cbxRemoveAfterSuccess">
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="cbxSaveOnLowConfidence">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
@ -92,14 +89,43 @@
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Remove archives from list after successful tagging</string>
|
||||
<string>Save on low confidence match</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="cbxWaitForRateLimit">
|
||||
<item row="10" column="0">
|
||||
<widget class="QLineEdit" name="leSearchString">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="11" column="0">
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Wait and retry when Comic Vine rate limit is exceeded (experimental)</string>
|
||||
<string>Adjust Name Match Ratio Threshold: Auto-Identify</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0">
|
||||
<widget class="QCheckBox" name="cbxSplitWords">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Split words in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -129,16 +155,6 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="cbxAutoImprint">
|
||||
<property name="toolTip">
|
||||
<string>Checks the publisher against a list of imprints.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Auto Imprint</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="7" column="0">
|
||||
<widget class="QCheckBox" name="cbxRemoveMetadata">
|
||||
<property name="toolTip">
|
||||
@ -149,8 +165,8 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0">
|
||||
<widget class="QCheckBox" name="cbxSplitWords">
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="cbxRemoveAfterSuccess">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
@ -158,46 +174,33 @@
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Split words in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental)</string>
|
||||
<string>Remove archives from list after successful tagging</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="cbxWaitForRateLimit">
|
||||
<property name="text">
|
||||
<string>Wait and retry when Comic Vine rate limit is exceeded (experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="12" column="0">
|
||||
<widget class="QLineEdit" name="leNameLengthMatchTolerance">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<widget class="QSpinBox" name="sbNameMatchSearchThresh">
|
||||
<property name="maximumSize">
|
||||
<size>
|
||||
<width>50</width>
|
||||
<width>60</width>
|
||||
<height>16777215</height>
|
||||
</size>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="10" column="0">
|
||||
<widget class="QLineEdit" name="leSearchString">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<property name="suffix">
|
||||
<string>%</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="11" column="0">
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<property name="minimum">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Adjust Name Length Match Tolerance:</string>
|
||||
<property name="maximum">
|
||||
<number>100</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -139,7 +139,7 @@
|
||||
<item row="0" column="0">
|
||||
<widget class="QLabel" name="label_5">
|
||||
<property name="text">
|
||||
<string><html><head/><body><p>These settings are for the automatic issue identifier which searches online for matches. They will not affect &quot;manual&quot; searching.</p><p>Hover the mouse over an entry field for more info.</p></body></html></string>
|
||||
<string><html><head/><body><p>These settings are for the automatic issue identifier which searches online for matches. </p><p>Hover the mouse over an entry field for more info.</p></body></html></string>
|
||||
</property>
|
||||
<property name="wordWrap">
|
||||
<bool>true</bool>
|
||||
@ -164,47 +164,28 @@
|
||||
<string/>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Default Name Length Match Tolerance:</string>
|
||||
<string>Default Name Match Ratio Threshold: Search:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QLineEdit" name="leNameLengthDeltaThresh">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Maximum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label">
|
||||
<property name="text">
|
||||
<string>Default Name Match Ratio Threshold: Auto-Identify:</string>
|
||||
</property>
|
||||
<property name="maximumSize">
|
||||
<size>
|
||||
<width>50</width>
|
||||
<height>16777215</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string/>
|
||||
<property name="alignment">
|
||||
<set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<widget class="QLabel" name="label_9">
|
||||
<property name="text">
|
||||
<string>Publisher Filter:</string>
|
||||
<string>Always use Publisher Filter on "manual" searches:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QPlainTextEdit" name="tePublisherFilter">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QCheckBox" name="cbxUseFilter">
|
||||
<property name="toolTip">
|
||||
<string><html><head/><body><p>Applies the <span style=" font-weight:600;">Publisher Filter</span> on all searches.<br/>The search window has a dynamic toggle to show the unfiltered results.</p></body></html></string>
|
||||
@ -214,10 +195,58 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QLabel" name="label_9">
|
||||
<item row="3" column="0">
|
||||
<widget class="QLabel" name="label_4">
|
||||
<property name="text">
|
||||
<string>Always use Publisher Filter on "manual" searches:</string>
|
||||
<string>Publisher Filter:</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="1">
|
||||
<widget class="QPlainTextEdit" name="tePublisherFilter">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QSpinBox" name="sbNameMatchSearchThresh">
|
||||
<property name="maximumSize">
|
||||
<size>
|
||||
<width>60</width>
|
||||
<height>16777215</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string>%</string>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<number>100</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QSpinBox" name="sbNameMatchIdentifyThresh">
|
||||
<property name="maximumSize">
|
||||
<size>
|
||||
<width>60</width>
|
||||
<height>16777215</height>
|
||||
</size>
|
||||
</property>
|
||||
<property name="suffix">
|
||||
<string>%</string>
|
||||
</property>
|
||||
<property name="minimum">
|
||||
<number>1</number>
|
||||
</property>
|
||||
<property name="maximum">
|
||||
<number>100</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -42,7 +42,7 @@ class SearchThread(QtCore.QThread):
|
||||
searchComplete = pyqtSignal()
|
||||
progressUpdate = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, series_name: str, refresh: bool, literal: bool = False) -> None:
|
||||
def __init__(self, series_name: str, refresh: bool, literal: bool = False, series_match_thresh: int = 90) -> None:
|
||||
QtCore.QThread.__init__(self)
|
||||
self.series_name = series_name
|
||||
self.refresh: bool = refresh
|
||||
@ -50,9 +50,10 @@ class SearchThread(QtCore.QThread):
|
||||
self.cv_error = False
|
||||
self.cv_search_results: list[CVVolumeResults] = []
|
||||
self.literal = literal
|
||||
self.series_match_thresh = series_match_thresh
|
||||
|
||||
def run(self) -> None:
|
||||
comic_vine = ComicVineTalker()
|
||||
comic_vine = ComicVineTalker(self.series_match_thresh)
|
||||
try:
|
||||
self.cv_error = False
|
||||
self.cv_search_results = comic_vine.search_for_series(
|
||||
@ -176,7 +177,6 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.perform_query(refresh=False)
|
||||
|
||||
def auto_select(self) -> None:
|
||||
|
||||
if self.comic_archive is None:
|
||||
QtWidgets.QMessageBox.information(self, "Auto-Select", "You need to load a comic first!")
|
||||
return
|
||||
@ -310,7 +310,9 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.progdialog.canceled.connect(self.search_canceled)
|
||||
self.progdialog.setModal(True)
|
||||
self.progdialog.setMinimumDuration(300)
|
||||
self.search_thread = SearchThread(self.series_name, refresh, self.literal)
|
||||
self.search_thread = SearchThread(
|
||||
self.series_name, refresh, self.literal, self.settings.id_series_match_search_thresh
|
||||
)
|
||||
self.search_thread.searchComplete.connect(self.search_complete)
|
||||
self.search_thread.progressUpdate.connect(self.search_progress_update)
|
||||
self.search_thread.start()
|
||||
|
@ -7,5 +7,6 @@ py7zr
|
||||
pycountry
|
||||
requests==2.*
|
||||
text2digits
|
||||
thefuzz[speedup]>=0.19.0
|
||||
typing_extensions
|
||||
wordninja
|
||||
|
@ -15,6 +15,7 @@ search_results = [
|
||||
name="test",
|
||||
publisher=comictaggerlib.resulttypes.CVPublisher(name="test"),
|
||||
start_year="", # This is currently submitted as a string and returned as an int
|
||||
aliases=None,
|
||||
),
|
||||
comictaggerlib.resulttypes.CVVolumeResults(
|
||||
count_of_issues=1,
|
||||
@ -24,6 +25,7 @@ search_results = [
|
||||
name="test 2",
|
||||
publisher=comictaggerlib.resulttypes.CVPublisher(name="test"),
|
||||
start_year="", # This is currently submitted as a string and returned as an int
|
||||
aliases=None,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -98,6 +98,7 @@ cv_volume_result: dict[str, Any] = {
|
||||
"count_of_issues": 6,
|
||||
"id": 23437,
|
||||
"name": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||
"aliases": None,
|
||||
"publisher": {
|
||||
"api_detail_url": "https://comicvine.gamespot.com/api/publisher/4010-1190/",
|
||||
"id": 1190,
|
||||
|
@ -39,7 +39,7 @@ def test_fetch_issues_by_volume_issue_num_and_year(comicvine_api):
|
||||
cv_expected = testing.comicvine.cv_issue_result["results"].copy()
|
||||
testing.comicvine.filter_field_list(
|
||||
cv_expected,
|
||||
{"params": {"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description"}},
|
||||
{"params": {"field_list": "id,volume,issue_number,name,image,cover_date,site_detail_url,description,aliases"}},
|
||||
)
|
||||
assert cv[0] == cv_expected
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user