From db37ec720432ac59387a6d706f82b8372f6bbd13 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Tue, 7 Jun 2022 11:49:56 -0700 Subject: [PATCH] Add a literal search option --- comicapi/utils.py | 14 +++--- comictaggerlib/comicvinetalker.py | 64 ++++++++++++++----------- comictaggerlib/taggerwindow.py | 10 +++- comictaggerlib/ui/taggerwindow.ui | 9 ++++ comictaggerlib/volumeselectionwindow.py | 44 +++++++++++------ 5 files changed, 90 insertions(+), 51 deletions(-) diff --git a/comicapi/utils.py b/comicapi/utils.py index 6a1bb89..8907949 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -130,17 +130,17 @@ def remove_articles(text: str) -> str: return new_text -def sanitize_title(text: str) -> str: +def sanitize_title(text: str, basic: bool = False) -> str: # normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2 - # this will probably cause issues with titles in other character sets e.g. chinese, japanese - text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii") + text = unicodedata.normalize("NFKD", text) # comicvine keeps apostrophes a part of the word text = text.replace("'", "") text = text.replace('"', "") - # comicvine ignores punctuation and accents - text = re.sub(r"[^A-Za-z0-9]+", " ", text) - # remove extra space and articles and all lower case - text = remove_articles(text).lower().strip() + if not basic: + # comicvine ignores punctuation and accents, TODO: only remove punctuation accents and similar + text = re.sub(r"[^A-Za-z0-9]+", " ", text) + # remove extra space and articles and all lower case + text = remove_articles(text).casefold().strip() return text diff --git a/comictaggerlib/comicvinetalker.py b/comictaggerlib/comicvinetalker.py index 6ca1219..92b6a66 100644 --- a/comictaggerlib/comicvinetalker.py +++ b/comictaggerlib/comicvinetalker.py @@ -200,15 +200,21 @@ class ComicVineTalker: raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server") def search_for_series( - self, series_name: str, callback: Callable[[int, int], None] | None = None, refresh_cache: bool = False + self, + series_name: str, + callback: Callable[[int, int], None] | None = None, + refresh_cache: bool = False, + literal: bool = False, ) -> list[CVVolumeResults]: # Sanitize the series name for comicvine searching, comicvine search ignore symbols - search_series_name = utils.sanitize_title(series_name) + search_series_name = utils.sanitize_title(series_name, literal) + logger.info("Searching: %s", search_series_name) - # before we search online, look in our cache, since we might have done this same search recently + # Before we search online, look in our cache, since we might have done this same search recently + # For literal searches always retrieve from online cvc = ComicVineCacher() - if not refresh_cache: + if not refresh_cache and not literal: cached_search_results = cvc.get_search_results(series_name) if len(cached_search_results) > 0: @@ -258,25 +264,24 @@ class ComicVineTalker: stop_searching = False while current_result_count < total_result_count: - last_result = search_results[-1]["name"] + if not literal: + # Sanitize the series name for comicvine searching, comicvine search ignore symbols + last_result = utils.sanitize_title(search_results[-1]["name"]) - # Sanitize the series name for comicvine searching, comicvine search ignore symbols - last_result = utils.sanitize_title(last_result) + # See if the last result's name has all the of the search terms. + # If not, break out of this, loop, we're done. + for term in search_series_name.split(): + if term not in last_result: + stop_searching = True + break - # See if the last result's name has all the of the search terms. - # If not, break out of this, loop, we're done. - for term in search_series_name.split(): - if term not in last_result.lower(): + # Also, stop searching when the word count of last results is too much longer than our search terms list + if len(last_result) > result_word_count_max: stop_searching = True + + if stop_searching: break - # Also, stop searching when the word count of last results is too much longer than our search terms list - if len(last_result) > result_word_count_max: - stop_searching = True - - if stop_searching: - break - if callback is None: self.write_log(f"getting another page of results {current_result_count} of {total_result_count}...\n") page += 1 @@ -290,18 +295,19 @@ class ComicVineTalker: if callback is not None: callback(current_result_count, total_result_count) - # Remove any search results that don't contain all the search terms (iterate backwards for easy removal) - for i in range(len(search_results) - 1, -1, -1): - record = search_results[i] - # Sanitize the series name for comicvine searching, comicvine search ignore symbols - record_name = utils.sanitize_title(record["name"]) - for term in search_series_name.split(): + # Literal searches simply return the matches no extra processing is doneo + if not literal: + # Remove any search results that don't contain all the search terms (iterate backwards for easy removal) + for record in reversed(search_results): + # Sanitize the series name for comicvine searching, comicvine search ignore symbols + record_name = utils.sanitize_title(record["name"]) + for term in search_series_name.split(): + if term not in record_name: + search_results.remove(record) + break - if term not in record_name: - del search_results[i] - break - - # cache these search results + # Cache these search results, even if it's literal we cache the results + # The most it will cause is extra processing time cvc.add_search_results(series_name, search_results) return search_results diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 63650c0..4eaddc6 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -385,6 +385,8 @@ Have fun! self.actionAutoIdentify.setShortcut("Ctrl+I") self.actionAutoIdentify.triggered.connect(self.auto_identify_search) + self.actionLiteralSearch.triggered.connect(self.literal_search) + self.actionApplyCBLTransform.setShortcut("Ctrl+L") self.actionApplyCBLTransform.setStatusTip("Modify tags specifically for CBL format") self.actionApplyCBLTransform.triggered.connect(self.apply_cbl_transform) @@ -424,6 +426,7 @@ Have fun! self.actionParse_Filename.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png"))) self.actionParse_Filename_split_words.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png"))) self.actionSearchOnline.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png"))) + self.actionLiteralSearch.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png"))) self.actionAutoIdentify.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("auto.png"))) self.actionAutoTag.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png"))) self.actionAutoImprint.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png"))) @@ -434,6 +437,7 @@ Have fun! self.toolBar.addAction(self.actionLoadFolder) self.toolBar.addAction(self.actionWrite_Tags) self.toolBar.addAction(self.actionSearchOnline) + self.toolBar.addAction(self.actionLiteralSearch) self.toolBar.addAction(self.actionAutoIdentify) self.toolBar.addAction(self.actionAutoTag) self.toolBar.addAction(self.actionClearEntryForm) @@ -1015,7 +1019,10 @@ Have fun! self.query_online(autoselect=True) - def query_online(self, autoselect: bool = False) -> None: + def literal_search(self): + self.query_online(autoselect=False, literal=True) + + def query_online(self, autoselect: bool = False, literal: bool = False) -> None: issue_number = str(self.leIssueNum.text()).strip() @@ -1046,6 +1053,7 @@ Have fun! cast(ComicArchive, self.comic_archive), self.settings, autoselect, + literal, ) selector.setWindowTitle(f"Search: '{series_name}' - Select Series") diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui index 337d2d4..9a5ec15 100644 --- a/comictaggerlib/ui/taggerwindow.ui +++ b/comictaggerlib/ui/taggerwindow.ui @@ -1236,6 +1236,7 @@ + @@ -1461,6 +1462,14 @@ Show Log Window + + + Literal Search + + + perform a literal search on the series and return the first 50 results + + diff --git a/comictaggerlib/volumeselectionwindow.py b/comictaggerlib/volumeselectionwindow.py index ae67493..c0d8944 100644 --- a/comictaggerlib/volumeselectionwindow.py +++ b/comictaggerlib/volumeselectionwindow.py @@ -15,7 +15,9 @@ # limitations under the License. from __future__ import annotations +import itertools import logging +from collections import deque from PyQt5 import QtCore, QtWidgets, uic from PyQt5.QtCore import pyqtSignal @@ -40,20 +42,21 @@ class SearchThread(QtCore.QThread): searchComplete = pyqtSignal() progressUpdate = pyqtSignal(int, int) - def __init__(self, series_name: str, refresh: bool) -> None: + def __init__(self, series_name: str, refresh: bool, literal: bool = False) -> None: QtCore.QThread.__init__(self) self.series_name = series_name self.refresh: bool = refresh self.error_code: int | None = None self.cv_error = False self.cv_search_results: list[CVVolumeResults] = [] + self.literal = literal def run(self) -> None: comic_vine = ComicVineTalker() try: self.cv_error = False self.cv_search_results = comic_vine.search_for_series( - self.series_name, callback=self.prog_callback, refresh_cache=self.refresh + self.series_name, self.prog_callback, self.refresh, self.literal ) except ComicVineTalkerException as e: self.cv_search_results = [] @@ -101,6 +104,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog): comic_archive: ComicArchive, settings: ComicTaggerSettings, autoselect: bool = False, + literal: bool = False, ) -> None: super().__init__(parent) @@ -132,6 +136,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.immediate_autoselect = autoselect self.cover_index_list = cover_index_list self.cv_search_results: list[CVVolumeResults] = [] + self.literal = literal self.ii: IssueIdentifier | None = None self.iddialog: IDProgressWindow | None = None self.id_thread: IdentifyThread | None = None @@ -155,7 +160,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.twList.selectRow(0) def update_buttons(self) -> None: - enabled = bool(self.cv_search_results and len(self.cv_search_results) > 0) + enabled = bool(self.cv_search_results) self.btnRequery.setEnabled(enabled) self.btnIssues.setEnabled(enabled) @@ -305,7 +310,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.progdialog.canceled.connect(self.search_canceled) self.progdialog.setModal(True) self.progdialog.setMinimumDuration(300) - self.search_thread = SearchThread(self.series_name, refresh) + self.search_thread = SearchThread(self.series_name, refresh, self.literal) self.search_thread.searchComplete.connect(self.search_complete) self.search_thread.progressUpdate.connect(self.search_progress_update) self.search_thread.start() @@ -382,14 +387,25 @@ class VolumeSelectionWindow(QtWidgets.QDialog): # move sanitized matches to the front if self.settings.exact_series_matches_first: try: - sanitized = utils.sanitize_title(self.series_name) - exact_matches = list( - filter(lambda d: utils.sanitize_title(str(d["name"])) in sanitized, self.cv_search_results) - ) - non_matches = list( - filter(lambda d: utils.sanitize_title(str(d["name"])) not in sanitized, self.cv_search_results) - ) - self.cv_search_results = exact_matches + non_matches + sanitized = utils.sanitize_title(self.series_name, False).casefold() + sanitized_no_articles = utils.sanitize_title(self.series_name, True).casefold() + + deques: list[deque[CVVolumeResults]] = [deque(), deque(), deque()] + + def categorize(result): + # We don't remove anything on this one so that we only get exact matches + if utils.sanitize_title(result["name"], True).casefold() == sanitized_no_articles: + return 0 + + # this ensures that 'The Joker' is near the top even if you search 'Joker' + if utils.sanitize_title(result["name"], False).casefold() in sanitized: + return 1 + return 2 + + for comic in self.cv_search_results: + deques[categorize(comic)].append(comic) + logger.info("Length: %d, %d, %d", len(deques[0]), len(deques[1]), len(deques[2])) + self.cv_search_results = list(itertools.chain.from_iterable(deques)) except Exception: logger.exception("bad data error filtering exact/near matches") @@ -436,12 +452,12 @@ class VolumeSelectionWindow(QtWidgets.QDialog): self.twList.selectRow(0) self.twList.resizeColumnsToContents() - if len(self.cv_search_results) == 0: + if not self.cv_search_results: QtCore.QCoreApplication.processEvents() QtWidgets.QMessageBox.information(self, "Search Result", "No matches found!") QtCore.QTimer.singleShot(200, self.close_me) - if self.immediate_autoselect and len(self.cv_search_results) > 0: + if self.immediate_autoselect and self.cv_search_results: # defer the immediate autoselect so this dialog has time to pop up QtCore.QCoreApplication.processEvents() QtCore.QTimer.singleShot(10, self.do_immediate_autoselect)