Add a literal search option
This commit is contained in:
parent
470b5c0a17
commit
db37ec7204
@ -130,17 +130,17 @@ def remove_articles(text: str) -> str:
|
||||
return new_text
|
||||
|
||||
|
||||
def sanitize_title(text: str) -> str:
|
||||
def sanitize_title(text: str, basic: bool = False) -> str:
|
||||
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
|
||||
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
|
||||
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
|
||||
text = unicodedata.normalize("NFKD", text)
|
||||
# comicvine keeps apostrophes a part of the word
|
||||
text = text.replace("'", "")
|
||||
text = text.replace('"', "")
|
||||
# comicvine ignores punctuation and accents
|
||||
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
|
||||
# remove extra space and articles and all lower case
|
||||
text = remove_articles(text).lower().strip()
|
||||
if not basic:
|
||||
# comicvine ignores punctuation and accents, TODO: only remove punctuation accents and similar
|
||||
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
|
||||
# remove extra space and articles and all lower case
|
||||
text = remove_articles(text).casefold().strip()
|
||||
|
||||
return text
|
||||
|
||||
|
@ -200,15 +200,21 @@ class ComicVineTalker:
|
||||
raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server")
|
||||
|
||||
def search_for_series(
|
||||
self, series_name: str, callback: Callable[[int, int], None] | None = None, refresh_cache: bool = False
|
||||
self,
|
||||
series_name: str,
|
||||
callback: Callable[[int, int], None] | None = None,
|
||||
refresh_cache: bool = False,
|
||||
literal: bool = False,
|
||||
) -> list[CVVolumeResults]:
|
||||
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
search_series_name = utils.sanitize_title(series_name)
|
||||
search_series_name = utils.sanitize_title(series_name, literal)
|
||||
logger.info("Searching: %s", search_series_name)
|
||||
|
||||
# before we search online, look in our cache, since we might have done this same search recently
|
||||
# Before we search online, look in our cache, since we might have done this same search recently
|
||||
# For literal searches always retrieve from online
|
||||
cvc = ComicVineCacher()
|
||||
if not refresh_cache:
|
||||
if not refresh_cache and not literal:
|
||||
cached_search_results = cvc.get_search_results(series_name)
|
||||
|
||||
if len(cached_search_results) > 0:
|
||||
@ -258,25 +264,24 @@ class ComicVineTalker:
|
||||
stop_searching = False
|
||||
while current_result_count < total_result_count:
|
||||
|
||||
last_result = search_results[-1]["name"]
|
||||
if not literal:
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
last_result = utils.sanitize_title(search_results[-1]["name"])
|
||||
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
last_result = utils.sanitize_title(last_result)
|
||||
# See if the last result's name has all the of the search terms.
|
||||
# If not, break out of this, loop, we're done.
|
||||
for term in search_series_name.split():
|
||||
if term not in last_result:
|
||||
stop_searching = True
|
||||
break
|
||||
|
||||
# See if the last result's name has all the of the search terms.
|
||||
# If not, break out of this, loop, we're done.
|
||||
for term in search_series_name.split():
|
||||
if term not in last_result.lower():
|
||||
# Also, stop searching when the word count of last results is too much longer than our search terms list
|
||||
if len(last_result) > result_word_count_max:
|
||||
stop_searching = True
|
||||
|
||||
if stop_searching:
|
||||
break
|
||||
|
||||
# Also, stop searching when the word count of last results is too much longer than our search terms list
|
||||
if len(last_result) > result_word_count_max:
|
||||
stop_searching = True
|
||||
|
||||
if stop_searching:
|
||||
break
|
||||
|
||||
if callback is None:
|
||||
self.write_log(f"getting another page of results {current_result_count} of {total_result_count}...\n")
|
||||
page += 1
|
||||
@ -290,18 +295,19 @@ class ComicVineTalker:
|
||||
if callback is not None:
|
||||
callback(current_result_count, total_result_count)
|
||||
|
||||
# Remove any search results that don't contain all the search terms (iterate backwards for easy removal)
|
||||
for i in range(len(search_results) - 1, -1, -1):
|
||||
record = search_results[i]
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
record_name = utils.sanitize_title(record["name"])
|
||||
for term in search_series_name.split():
|
||||
# Literal searches simply return the matches no extra processing is doneo
|
||||
if not literal:
|
||||
# Remove any search results that don't contain all the search terms (iterate backwards for easy removal)
|
||||
for record in reversed(search_results):
|
||||
# Sanitize the series name for comicvine searching, comicvine search ignore symbols
|
||||
record_name = utils.sanitize_title(record["name"])
|
||||
for term in search_series_name.split():
|
||||
if term not in record_name:
|
||||
search_results.remove(record)
|
||||
break
|
||||
|
||||
if term not in record_name:
|
||||
del search_results[i]
|
||||
break
|
||||
|
||||
# cache these search results
|
||||
# Cache these search results, even if it's literal we cache the results
|
||||
# The most it will cause is extra processing time
|
||||
cvc.add_search_results(series_name, search_results)
|
||||
|
||||
return search_results
|
||||
|
@ -385,6 +385,8 @@ Have fun!
|
||||
self.actionAutoIdentify.setShortcut("Ctrl+I")
|
||||
self.actionAutoIdentify.triggered.connect(self.auto_identify_search)
|
||||
|
||||
self.actionLiteralSearch.triggered.connect(self.literal_search)
|
||||
|
||||
self.actionApplyCBLTransform.setShortcut("Ctrl+L")
|
||||
self.actionApplyCBLTransform.setStatusTip("Modify tags specifically for CBL format")
|
||||
self.actionApplyCBLTransform.triggered.connect(self.apply_cbl_transform)
|
||||
@ -424,6 +426,7 @@ Have fun!
|
||||
self.actionParse_Filename.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
|
||||
self.actionParse_Filename_split_words.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
|
||||
self.actionSearchOnline.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png")))
|
||||
self.actionLiteralSearch.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png")))
|
||||
self.actionAutoIdentify.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("auto.png")))
|
||||
self.actionAutoTag.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png")))
|
||||
self.actionAutoImprint.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png")))
|
||||
@ -434,6 +437,7 @@ Have fun!
|
||||
self.toolBar.addAction(self.actionLoadFolder)
|
||||
self.toolBar.addAction(self.actionWrite_Tags)
|
||||
self.toolBar.addAction(self.actionSearchOnline)
|
||||
self.toolBar.addAction(self.actionLiteralSearch)
|
||||
self.toolBar.addAction(self.actionAutoIdentify)
|
||||
self.toolBar.addAction(self.actionAutoTag)
|
||||
self.toolBar.addAction(self.actionClearEntryForm)
|
||||
@ -1015,7 +1019,10 @@ Have fun!
|
||||
|
||||
self.query_online(autoselect=True)
|
||||
|
||||
def query_online(self, autoselect: bool = False) -> None:
|
||||
def literal_search(self):
|
||||
self.query_online(autoselect=False, literal=True)
|
||||
|
||||
def query_online(self, autoselect: bool = False, literal: bool = False) -> None:
|
||||
|
||||
issue_number = str(self.leIssueNum.text()).strip()
|
||||
|
||||
@ -1046,6 +1053,7 @@ Have fun!
|
||||
cast(ComicArchive, self.comic_archive),
|
||||
self.settings,
|
||||
autoselect,
|
||||
literal,
|
||||
)
|
||||
|
||||
selector.setWindowTitle(f"Search: '{series_name}' - Select Series")
|
||||
|
@ -1236,6 +1236,7 @@
|
||||
<addaction name="actionSearchOnline"/>
|
||||
<addaction name="actionAutoIdentify"/>
|
||||
<addaction name="actionAutoImprint"/>
|
||||
<addaction name="actionLiteralSearch"/>
|
||||
<addaction name="separator"/>
|
||||
<addaction name="actionApplyCBLTransform"/>
|
||||
<addaction name="actionReCalcPageDims"/>
|
||||
@ -1461,6 +1462,14 @@
|
||||
<string>Show Log Window</string>
|
||||
</property>
|
||||
</action>
|
||||
<action name="actionLiteralSearch">
|
||||
<property name="text">
|
||||
<string>Literal Search</string>
|
||||
</property>
|
||||
<property name="toolTip">
|
||||
<string>perform a literal search on the series and return the first 50 results</string>
|
||||
</property>
|
||||
</action>
|
||||
</widget>
|
||||
<layoutdefault spacing="6" margin="11"/>
|
||||
<resources/>
|
||||
|
@ -15,7 +15,9 @@
|
||||
# limitations under the License.
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import logging
|
||||
from collections import deque
|
||||
|
||||
from PyQt5 import QtCore, QtWidgets, uic
|
||||
from PyQt5.QtCore import pyqtSignal
|
||||
@ -40,20 +42,21 @@ class SearchThread(QtCore.QThread):
|
||||
searchComplete = pyqtSignal()
|
||||
progressUpdate = pyqtSignal(int, int)
|
||||
|
||||
def __init__(self, series_name: str, refresh: bool) -> None:
|
||||
def __init__(self, series_name: str, refresh: bool, literal: bool = False) -> None:
|
||||
QtCore.QThread.__init__(self)
|
||||
self.series_name = series_name
|
||||
self.refresh: bool = refresh
|
||||
self.error_code: int | None = None
|
||||
self.cv_error = False
|
||||
self.cv_search_results: list[CVVolumeResults] = []
|
||||
self.literal = literal
|
||||
|
||||
def run(self) -> None:
|
||||
comic_vine = ComicVineTalker()
|
||||
try:
|
||||
self.cv_error = False
|
||||
self.cv_search_results = comic_vine.search_for_series(
|
||||
self.series_name, callback=self.prog_callback, refresh_cache=self.refresh
|
||||
self.series_name, self.prog_callback, self.refresh, self.literal
|
||||
)
|
||||
except ComicVineTalkerException as e:
|
||||
self.cv_search_results = []
|
||||
@ -101,6 +104,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
comic_archive: ComicArchive,
|
||||
settings: ComicTaggerSettings,
|
||||
autoselect: bool = False,
|
||||
literal: bool = False,
|
||||
) -> None:
|
||||
super().__init__(parent)
|
||||
|
||||
@ -132,6 +136,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.immediate_autoselect = autoselect
|
||||
self.cover_index_list = cover_index_list
|
||||
self.cv_search_results: list[CVVolumeResults] = []
|
||||
self.literal = literal
|
||||
self.ii: IssueIdentifier | None = None
|
||||
self.iddialog: IDProgressWindow | None = None
|
||||
self.id_thread: IdentifyThread | None = None
|
||||
@ -155,7 +160,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.twList.selectRow(0)
|
||||
|
||||
def update_buttons(self) -> None:
|
||||
enabled = bool(self.cv_search_results and len(self.cv_search_results) > 0)
|
||||
enabled = bool(self.cv_search_results)
|
||||
|
||||
self.btnRequery.setEnabled(enabled)
|
||||
self.btnIssues.setEnabled(enabled)
|
||||
@ -305,7 +310,7 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.progdialog.canceled.connect(self.search_canceled)
|
||||
self.progdialog.setModal(True)
|
||||
self.progdialog.setMinimumDuration(300)
|
||||
self.search_thread = SearchThread(self.series_name, refresh)
|
||||
self.search_thread = SearchThread(self.series_name, refresh, self.literal)
|
||||
self.search_thread.searchComplete.connect(self.search_complete)
|
||||
self.search_thread.progressUpdate.connect(self.search_progress_update)
|
||||
self.search_thread.start()
|
||||
@ -382,14 +387,25 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
# move sanitized matches to the front
|
||||
if self.settings.exact_series_matches_first:
|
||||
try:
|
||||
sanitized = utils.sanitize_title(self.series_name)
|
||||
exact_matches = list(
|
||||
filter(lambda d: utils.sanitize_title(str(d["name"])) in sanitized, self.cv_search_results)
|
||||
)
|
||||
non_matches = list(
|
||||
filter(lambda d: utils.sanitize_title(str(d["name"])) not in sanitized, self.cv_search_results)
|
||||
)
|
||||
self.cv_search_results = exact_matches + non_matches
|
||||
sanitized = utils.sanitize_title(self.series_name, False).casefold()
|
||||
sanitized_no_articles = utils.sanitize_title(self.series_name, True).casefold()
|
||||
|
||||
deques: list[deque[CVVolumeResults]] = [deque(), deque(), deque()]
|
||||
|
||||
def categorize(result):
|
||||
# We don't remove anything on this one so that we only get exact matches
|
||||
if utils.sanitize_title(result["name"], True).casefold() == sanitized_no_articles:
|
||||
return 0
|
||||
|
||||
# this ensures that 'The Joker' is near the top even if you search 'Joker'
|
||||
if utils.sanitize_title(result["name"], False).casefold() in sanitized:
|
||||
return 1
|
||||
return 2
|
||||
|
||||
for comic in self.cv_search_results:
|
||||
deques[categorize(comic)].append(comic)
|
||||
logger.info("Length: %d, %d, %d", len(deques[0]), len(deques[1]), len(deques[2]))
|
||||
self.cv_search_results = list(itertools.chain.from_iterable(deques))
|
||||
except Exception:
|
||||
logger.exception("bad data error filtering exact/near matches")
|
||||
|
||||
@ -436,12 +452,12 @@ class VolumeSelectionWindow(QtWidgets.QDialog):
|
||||
self.twList.selectRow(0)
|
||||
self.twList.resizeColumnsToContents()
|
||||
|
||||
if len(self.cv_search_results) == 0:
|
||||
if not self.cv_search_results:
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtWidgets.QMessageBox.information(self, "Search Result", "No matches found!")
|
||||
QtCore.QTimer.singleShot(200, self.close_me)
|
||||
|
||||
if self.immediate_autoselect and len(self.cv_search_results) > 0:
|
||||
if self.immediate_autoselect and self.cv_search_results:
|
||||
# defer the immediate autoselect so this dialog has time to pop up
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QTimer.singleShot(10, self.do_immediate_autoselect)
|
||||
|
Loading…
Reference in New Issue
Block a user