diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py
index 3842d21..129688e 100644
--- a/comicapi/comicarchive.py
+++ b/comicapi/comicarchive.py
@@ -28,6 +28,7 @@ import zipfile
import natsort
import py7zr
+import wordninja
try:
from unrar.cffi import rarfile
@@ -1134,12 +1135,17 @@ class ComicArchive:
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
+ split_words: bool = False,
) -> GenericMetadata:
metadata = GenericMetadata()
+ filename = self.path.name
+ if split_words:
+ filename = " ".join(wordninja.split(self.path.stem)) + self.path.suffix
+
if complicated_parser:
- lex = filenamelexer.Lex(self.path.name)
+ lex = filenamelexer.Lex(filename)
p = filenameparser.Parse(
lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher
)
@@ -1159,7 +1165,7 @@ class ComicArchive:
metadata.format = "Annual"
else:
fnp = filenameparser.FileNameParser()
- fnp.parse_filename(str(self.path))
+ fnp.parse_filename(filename)
if fnp.issue:
metadata.issue = fnp.issue
diff --git a/comictaggerlib/autotagstartwindow.py b/comictaggerlib/autotagstartwindow.py
index c178921..41937b6 100644
--- a/comictaggerlib/autotagstartwindow.py
+++ b/comictaggerlib/autotagstartwindow.py
@@ -44,6 +44,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
self.cbxRemoveAfterSuccess.setCheckState(QtCore.Qt.CheckState.Unchecked)
self.cbxSpecifySearchString.setCheckState(QtCore.Qt.CheckState.Unchecked)
self.cbxAutoImprint.setCheckState(QtCore.Qt.CheckState.Unchecked)
+ self.cbxSplitWords.setCheckState(QtCore.Qt.Unchecked)
self.leNameLengthMatchTolerance.setText(str(self.settings.id_length_delta_thresh))
self.leSearchString.setEnabled(False)
@@ -91,6 +92,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
self.wait_and_retry_on_rate_limit = False
self.search_string = ""
self.name_length_match_tolerance = self.settings.id_length_delta_thresh
+ self.split_words = self.cbxSplitWords.isChecked()
def search_string_toggle(self) -> None:
enable = self.cbxSpecifySearchString.isChecked()
@@ -106,6 +108,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
self.remove_after_success = self.cbxRemoveAfterSuccess.isChecked()
self.name_length_match_tolerance = int(self.leNameLengthMatchTolerance.text())
self.wait_and_retry_on_rate_limit = self.cbxWaitForRateLimit.isChecked()
+ self.splitWords = self.cbxSplitWords.isChecked()
# persist some settings
self.settings.save_on_low_confidence = self.auto_save_on_low
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 9482446..409392b 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -185,7 +185,11 @@ def create_local_metadata(
# now, overlay the parsed filename info
if opts.parse_filename:
f_md = ca.metadata_from_filename(
- settings.complicated_parser, settings.remove_c2c, settings.remove_fcbd, settings.remove_publisher
+ settings.complicated_parser,
+ settings.remove_c2c,
+ settings.remove_fcbd,
+ settings.remove_publisher,
+ opts.split_words,
)
if opts.overwrite_metadata:
md = f_md
@@ -193,7 +197,8 @@ def create_local_metadata(
md.overlay(f_md)
if has_desired_tags:
- md = ca.read_metadata(opts.data_style if opts.data_style is not None else 0)
+ t_md = ca.read_metadata(opts.data_style if opts.data_style is not None else 0)
+ md.overlay(t_md)
# finally, use explicit stuff
if opts.overwrite_metadata and not opts.metadata.is_empty:
diff --git a/comictaggerlib/options.py b/comictaggerlib/options.py
index 79e9413..4e9247c 100644
--- a/comictaggerlib/options.py
+++ b/comictaggerlib/options.py
@@ -59,6 +59,8 @@ If no options are given, {0} will run in windowed mode.
-f, --parsefilename Parse the filename to get some info,
specifically series name, issue number,
volume, and publication year.
+ --split-words Splits words before parsing the filename.
+ e.g. 'judgedredd' to 'judge dredd'
-i, --interactive Interactively query the user when there are
multiple matches for an online search.
--nosummary Suppress the default summary after a save
@@ -149,6 +151,7 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki
self.copy_source: Optional[int] = None
self.config_path = ""
self.overwrite_metadata = False
+ self.split_words = False
def display_msg_and_quit(self, msg: Optional[str], code: int, show_help: bool = False) -> None:
appname = os.path.basename(sys.argv[0])
@@ -296,6 +299,7 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki
"darkmode",
"config=",
"overwrite",
+ "split-words",
],
)
@@ -352,6 +356,8 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki
self.abort_export_on_conflict = True
if o in ("-f", "--parsefilename"):
self.parse_filename = True
+ if o == "--split-words":
+ self.split_words = True
if o in ("-w", "--wait-on-cv-rate-limit"):
self.wait_and_retry_on_rate_limit = True
if o == "--config":
diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py
index 42d5f0f..8a1960f 100644
--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@@ -356,6 +356,10 @@ Have fun!
self.actionParse_Filename.setStatusTip("Try to extract tags from filename")
self.actionParse_Filename.triggered.connect(self.use_filename)
+ self.actionParse_Filename_split_words.setShortcut("Ctrl+Shift+F")
+ self.actionParse_Filename_split_words.setStatusTip("Try to extract tags from filename and split words")
+ self.actionParse_Filename_split_words.triggered.connect(self.use_filename_split)
+
self.actionSearchOnline.setShortcut("Ctrl+W")
self.actionSearchOnline.setStatusTip("Search online for tags")
self.actionSearchOnline.triggered.connect(self.query_online)
@@ -399,6 +403,7 @@ Have fun!
self.actionLoadFolder.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("longbox.png")))
self.actionWrite_Tags.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("save.png")))
self.actionParse_Filename.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
+ self.actionParse_Filename_split_words.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png")))
self.actionSearchOnline.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png")))
self.actionAutoIdentify.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("auto.png")))
self.actionAutoTag.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png")))
@@ -572,7 +577,7 @@ Please choose options below, and select OK.
self.settings.complicated_parser,
self.settings.remove_c2c,
self.settings.remove_fcbd,
- remove_publisher=self.settings.remove_publisher,
+ self.settings.remove_publisher,
)
if len(self.metadata.pages) == 0 and self.comic_archive is not None:
self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages())
@@ -609,6 +614,7 @@ Please choose options below, and select OK.
self.actionViewRawCBLTags.setEnabled(False)
self.actionViewRawCRTags.setEnabled(False)
self.actionParse_Filename.setEnabled(False)
+ self.actionParse_Filename_split_words.setEnabled(False)
self.actionAutoIdentify.setEnabled(False)
self.actionRename.setEnabled(False)
self.actionApplyCBLTransform.setEnabled(False)
@@ -620,6 +626,7 @@ Please choose options below, and select OK.
has_cbi = self.comic_archive.has_cbi()
self.actionParse_Filename.setEnabled(True)
+ self.actionParse_Filename_split_words.setEnabled(True)
self.actionAutoIdentify.setEnabled(True)
self.actionAutoTag.setEnabled(True)
self.actionRename.setEnabled(True)
@@ -942,6 +949,9 @@ Please choose options below, and select OK.
self.metadata = md
def use_filename(self) -> None:
+ self._use_filename()
+
+ def _use_filename(self, split_words: bool = False) -> None:
if self.comic_archive is not None:
# copy the form onto metadata object
self.form_to_metadata()
@@ -949,12 +959,16 @@ Please choose options below, and select OK.
self.settings.complicated_parser,
self.settings.remove_c2c,
self.settings.remove_fcbd,
- remove_publisher=self.settings.remove_publisher,
+ self.settings.remove_publisher,
+ split_words,
)
if new_metadata is not None:
self.metadata.overlay(new_metadata)
self.metadata_to_form()
+ def use_filename_split(self) -> None:
+ self._use_filename(True)
+
def select_folder(self) -> None:
self.select_file(folder_mode=True)
@@ -1676,7 +1690,8 @@ Please choose options below, and select OK.
self.settings.complicated_parser,
self.settings.remove_c2c,
self.settings.remove_fcbd,
- remove_publisher=self.settings.remove_publisher,
+ self.settings.remove_publisher,
+ dlg.split_words,
)
if dlg.ignore_leading_digits_in_filename and md.series is not None:
# remove all leading numbers
diff --git a/comictaggerlib/ui/autotagstartwindow.ui b/comictaggerlib/ui/autotagstartwindow.ui
index 7bbe128..f2d252c 100644
--- a/comictaggerlib/ui/autotagstartwindow.ui
+++ b/comictaggerlib/ui/autotagstartwindow.ui
@@ -10,7 +10,7 @@
0
0
519
- 378
+ 440
@@ -44,7 +44,7 @@
-
-
-
+
-
@@ -149,7 +149,20 @@
- -
+
-
+
+
+
+ 0
+ 0
+
+
+
+ Split words in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental)
+
+
+
+ -
@@ -165,7 +178,7 @@
- -
+
-
@@ -175,7 +188,7 @@
- -
+
-
diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui
index 2dc7c01..c8981e7 100644
--- a/comictaggerlib/ui/taggerwindow.ui
+++ b/comictaggerlib/ui/taggerwindow.ui
@@ -1232,6 +1232,7 @@
+
@@ -1446,6 +1447,14 @@
Normalize the publisher and map imprints to their parent publisher (e.g. Vertigo is an imprint of DC Comics)
+
+
+ Parse Filename and split words
+
+
+ Parse Filename and split words
+
+