diff --git a/comicapi/utils.py b/comicapi/utils.py
index 6a1bb89..8907949 100644
--- a/comicapi/utils.py
+++ b/comicapi/utils.py
@@ -130,17 +130,17 @@ def remove_articles(text: str) -> str:
return new_text
-def sanitize_title(text: str) -> str:
+def sanitize_title(text: str, basic: bool = False) -> str:
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
- # this will probably cause issues with titles in other character sets e.g. chinese, japanese
- text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
+ text = unicodedata.normalize("NFKD", text)
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
- # comicvine ignores punctuation and accents
- text = re.sub(r"[^A-Za-z0-9]+", " ", text)
- # remove extra space and articles and all lower case
- text = remove_articles(text).lower().strip()
+ if not basic:
+ # comicvine ignores punctuation and accents, TODO: only remove punctuation accents and similar
+ text = re.sub(r"[^A-Za-z0-9]+", " ", text)
+ # remove extra space and articles and all lower case
+ text = remove_articles(text).casefold().strip()
return text
diff --git a/comictaggerlib/comicvinetalker.py b/comictaggerlib/comicvinetalker.py
index 6ca1219..92b6a66 100644
--- a/comictaggerlib/comicvinetalker.py
+++ b/comictaggerlib/comicvinetalker.py
@@ -200,15 +200,21 @@ class ComicVineTalker:
raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server")
def search_for_series(
- self, series_name: str, callback: Callable[[int, int], None] | None = None, refresh_cache: bool = False
+ self,
+ series_name: str,
+ callback: Callable[[int, int], None] | None = None,
+ refresh_cache: bool = False,
+ literal: bool = False,
) -> list[CVVolumeResults]:
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
- search_series_name = utils.sanitize_title(series_name)
+ search_series_name = utils.sanitize_title(series_name, literal)
+ logger.info("Searching: %s", search_series_name)
- # before we search online, look in our cache, since we might have done this same search recently
+ # Before we search online, look in our cache, since we might have done this same search recently
+ # For literal searches always retrieve from online
cvc = ComicVineCacher()
- if not refresh_cache:
+ if not refresh_cache and not literal:
cached_search_results = cvc.get_search_results(series_name)
if len(cached_search_results) > 0:
@@ -258,25 +264,24 @@ class ComicVineTalker:
stop_searching = False
while current_result_count < total_result_count:
- last_result = search_results[-1]["name"]
+ if not literal:
+ # Sanitize the series name for comicvine searching, comicvine search ignore symbols
+ last_result = utils.sanitize_title(search_results[-1]["name"])
- # Sanitize the series name for comicvine searching, comicvine search ignore symbols
- last_result = utils.sanitize_title(last_result)
+ # See if the last result's name has all the of the search terms.
+ # If not, break out of this, loop, we're done.
+ for term in search_series_name.split():
+ if term not in last_result:
+ stop_searching = True
+ break
- # See if the last result's name has all the of the search terms.
- # If not, break out of this, loop, we're done.
- for term in search_series_name.split():
- if term not in last_result.lower():
+ # Also, stop searching when the word count of last results is too much longer than our search terms list
+ if len(last_result) > result_word_count_max:
stop_searching = True
+
+ if stop_searching:
break
- # Also, stop searching when the word count of last results is too much longer than our search terms list
- if len(last_result) > result_word_count_max:
- stop_searching = True
-
- if stop_searching:
- break
-
if callback is None:
self.write_log(f"getting another page of results {current_result_count} of {total_result_count}...\n")
page += 1
@@ -290,18 +295,19 @@ class ComicVineTalker:
if callback is not None:
callback(current_result_count, total_result_count)
- # Remove any search results that don't contain all the search terms (iterate backwards for easy removal)
- for i in range(len(search_results) - 1, -1, -1):
- record = search_results[i]
- # Sanitize the series name for comicvine searching, comicvine search ignore symbols
- record_name = utils.sanitize_title(record["name"])
- for term in search_series_name.split():
+ # Literal searches simply return the matches no extra processing is doneo
+ if not literal:
+ # Remove any search results that don't contain all the search terms (iterate backwards for easy removal)
+ for record in reversed(search_results):
+ # Sanitize the series name for comicvine searching, comicvine search ignore symbols
+ record_name = utils.sanitize_title(record["name"])
+ for term in search_series_name.split():
+ if term not in record_name:
+ search_results.remove(record)
+ break
- if term not in record_name:
- del search_results[i]
- break
-
- # cache these search results
+ # Cache these search results, even if it's literal we cache the results
+ # The most it will cause is extra processing time
cvc.add_search_results(series_name, search_results)
return search_results
diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py
index 63650c0..4eaddc6 100644
--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@@ -385,6 +385,8 @@ Have fun!
self.actionAutoIdentify.setShortcut("Ctrl+I")
self.actionAutoIdentify.triggered.connect(self.auto_identify_search)
+ self.actionLiteralSearch.triggered.connect(self.literal_search)
+
self.actionApplyCBLTransform.setShortcut("Ctrl+L")
self.actionApplyCBLTransform.setStatusTip("Modify tags specifically for CBL format")
self.actionApplyCBLTransform.triggered.connect(self.apply_cbl_transform)
@@ -424,6 +426,7 @@ Have fun!
self.actionParse_Filename.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
self.actionParse_Filename_split_words.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
self.actionSearchOnline.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png")))
+ self.actionLiteralSearch.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png")))
self.actionAutoIdentify.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("auto.png")))
self.actionAutoTag.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png")))
self.actionAutoImprint.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png")))
@@ -434,6 +437,7 @@ Have fun!
self.toolBar.addAction(self.actionLoadFolder)
self.toolBar.addAction(self.actionWrite_Tags)
self.toolBar.addAction(self.actionSearchOnline)
+ self.toolBar.addAction(self.actionLiteralSearch)
self.toolBar.addAction(self.actionAutoIdentify)
self.toolBar.addAction(self.actionAutoTag)
self.toolBar.addAction(self.actionClearEntryForm)
@@ -1015,7 +1019,10 @@ Have fun!
self.query_online(autoselect=True)
- def query_online(self, autoselect: bool = False) -> None:
+ def literal_search(self):
+ self.query_online(autoselect=False, literal=True)
+
+ def query_online(self, autoselect: bool = False, literal: bool = False) -> None:
issue_number = str(self.leIssueNum.text()).strip()
@@ -1046,6 +1053,7 @@ Have fun!
cast(ComicArchive, self.comic_archive),
self.settings,
autoselect,
+ literal,
)
selector.setWindowTitle(f"Search: '{series_name}' - Select Series")
diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui
index 337d2d4..9a5ec15 100644
--- a/comictaggerlib/ui/taggerwindow.ui
+++ b/comictaggerlib/ui/taggerwindow.ui
@@ -1236,6 +1236,7 @@
+
@@ -1461,6 +1462,14 @@
Show Log Window
+
+
+ Literal Search
+
+
+ perform a literal search on the series and return the first 50 results
+
+
diff --git a/comictaggerlib/volumeselectionwindow.py b/comictaggerlib/volumeselectionwindow.py
index ae67493..c0d8944 100644
--- a/comictaggerlib/volumeselectionwindow.py
+++ b/comictaggerlib/volumeselectionwindow.py
@@ -15,7 +15,9 @@
# limitations under the License.
from __future__ import annotations
+import itertools
import logging
+from collections import deque
from PyQt5 import QtCore, QtWidgets, uic
from PyQt5.QtCore import pyqtSignal
@@ -40,20 +42,21 @@ class SearchThread(QtCore.QThread):
searchComplete = pyqtSignal()
progressUpdate = pyqtSignal(int, int)
- def __init__(self, series_name: str, refresh: bool) -> None:
+ def __init__(self, series_name: str, refresh: bool, literal: bool = False) -> None:
QtCore.QThread.__init__(self)
self.series_name = series_name
self.refresh: bool = refresh
self.error_code: int | None = None
self.cv_error = False
self.cv_search_results: list[CVVolumeResults] = []
+ self.literal = literal
def run(self) -> None:
comic_vine = ComicVineTalker()
try:
self.cv_error = False
self.cv_search_results = comic_vine.search_for_series(
- self.series_name, callback=self.prog_callback, refresh_cache=self.refresh
+ self.series_name, self.prog_callback, self.refresh, self.literal
)
except ComicVineTalkerException as e:
self.cv_search_results = []
@@ -101,6 +104,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
comic_archive: ComicArchive,
settings: ComicTaggerSettings,
autoselect: bool = False,
+ literal: bool = False,
) -> None:
super().__init__(parent)
@@ -132,6 +136,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
self.immediate_autoselect = autoselect
self.cover_index_list = cover_index_list
self.cv_search_results: list[CVVolumeResults] = []
+ self.literal = literal
self.ii: IssueIdentifier | None = None
self.iddialog: IDProgressWindow | None = None
self.id_thread: IdentifyThread | None = None
@@ -155,7 +160,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
self.twList.selectRow(0)
def update_buttons(self) -> None:
- enabled = bool(self.cv_search_results and len(self.cv_search_results) > 0)
+ enabled = bool(self.cv_search_results)
self.btnRequery.setEnabled(enabled)
self.btnIssues.setEnabled(enabled)
@@ -305,7 +310,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
self.progdialog.canceled.connect(self.search_canceled)
self.progdialog.setModal(True)
self.progdialog.setMinimumDuration(300)
- self.search_thread = SearchThread(self.series_name, refresh)
+ self.search_thread = SearchThread(self.series_name, refresh, self.literal)
self.search_thread.searchComplete.connect(self.search_complete)
self.search_thread.progressUpdate.connect(self.search_progress_update)
self.search_thread.start()
@@ -382,14 +387,25 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
# move sanitized matches to the front
if self.settings.exact_series_matches_first:
try:
- sanitized = utils.sanitize_title(self.series_name)
- exact_matches = list(
- filter(lambda d: utils.sanitize_title(str(d["name"])) in sanitized, self.cv_search_results)
- )
- non_matches = list(
- filter(lambda d: utils.sanitize_title(str(d["name"])) not in sanitized, self.cv_search_results)
- )
- self.cv_search_results = exact_matches + non_matches
+ sanitized = utils.sanitize_title(self.series_name, False).casefold()
+ sanitized_no_articles = utils.sanitize_title(self.series_name, True).casefold()
+
+ deques: list[deque[CVVolumeResults]] = [deque(), deque(), deque()]
+
+ def categorize(result):
+ # We don't remove anything on this one so that we only get exact matches
+ if utils.sanitize_title(result["name"], True).casefold() == sanitized_no_articles:
+ return 0
+
+ # this ensures that 'The Joker' is near the top even if you search 'Joker'
+ if utils.sanitize_title(result["name"], False).casefold() in sanitized:
+ return 1
+ return 2
+
+ for comic in self.cv_search_results:
+ deques[categorize(comic)].append(comic)
+ logger.info("Length: %d, %d, %d", len(deques[0]), len(deques[1]), len(deques[2]))
+ self.cv_search_results = list(itertools.chain.from_iterable(deques))
except Exception:
logger.exception("bad data error filtering exact/near matches")
@@ -436,12 +452,12 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
self.twList.selectRow(0)
self.twList.resizeColumnsToContents()
- if len(self.cv_search_results) == 0:
+ if not self.cv_search_results:
QtCore.QCoreApplication.processEvents()
QtWidgets.QMessageBox.information(self, "Search Result", "No matches found!")
QtCore.QTimer.singleShot(200, self.close_me)
- if self.immediate_autoselect and len(self.cv_search_results) > 0:
+ if self.immediate_autoselect and self.cv_search_results:
# defer the immediate autoselect so this dialog has time to pop up
QtCore.QCoreApplication.processEvents()
QtCore.QTimer.singleShot(10, self.do_immediate_autoselect)