diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 3842d21..129688e 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -28,6 +28,7 @@ import zipfile import natsort import py7zr +import wordninja try: from unrar.cffi import rarfile @@ -1134,12 +1135,17 @@ class ComicArchive: remove_c2c: bool = False, remove_fcbd: bool = False, remove_publisher: bool = False, + split_words: bool = False, ) -> GenericMetadata: metadata = GenericMetadata() + filename = self.path.name + if split_words: + filename = " ".join(wordninja.split(self.path.stem)) + self.path.suffix + if complicated_parser: - lex = filenamelexer.Lex(self.path.name) + lex = filenamelexer.Lex(filename) p = filenameparser.Parse( lex.items, remove_c2c=remove_c2c, remove_fcbd=remove_fcbd, remove_publisher=remove_publisher ) @@ -1159,7 +1165,7 @@ class ComicArchive: metadata.format = "Annual" else: fnp = filenameparser.FileNameParser() - fnp.parse_filename(str(self.path)) + fnp.parse_filename(filename) if fnp.issue: metadata.issue = fnp.issue diff --git a/comictaggerlib/autotagstartwindow.py b/comictaggerlib/autotagstartwindow.py index c178921..41937b6 100644 --- a/comictaggerlib/autotagstartwindow.py +++ b/comictaggerlib/autotagstartwindow.py @@ -44,6 +44,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.cbxRemoveAfterSuccess.setCheckState(QtCore.Qt.CheckState.Unchecked) self.cbxSpecifySearchString.setCheckState(QtCore.Qt.CheckState.Unchecked) self.cbxAutoImprint.setCheckState(QtCore.Qt.CheckState.Unchecked) + self.cbxSplitWords.setCheckState(QtCore.Qt.Unchecked) self.leNameLengthMatchTolerance.setText(str(self.settings.id_length_delta_thresh)) self.leSearchString.setEnabled(False) @@ -91,6 +92,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.wait_and_retry_on_rate_limit = False self.search_string = "" self.name_length_match_tolerance = self.settings.id_length_delta_thresh + self.split_words = self.cbxSplitWords.isChecked() def search_string_toggle(self) -> None: enable = self.cbxSpecifySearchString.isChecked() @@ -106,6 +108,7 @@ class AutoTagStartWindow(QtWidgets.QDialog): self.remove_after_success = self.cbxRemoveAfterSuccess.isChecked() self.name_length_match_tolerance = int(self.leNameLengthMatchTolerance.text()) self.wait_and_retry_on_rate_limit = self.cbxWaitForRateLimit.isChecked() + self.splitWords = self.cbxSplitWords.isChecked() # persist some settings self.settings.save_on_low_confidence = self.auto_save_on_low diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 9482446..409392b 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -185,7 +185,11 @@ def create_local_metadata( # now, overlay the parsed filename info if opts.parse_filename: f_md = ca.metadata_from_filename( - settings.complicated_parser, settings.remove_c2c, settings.remove_fcbd, settings.remove_publisher + settings.complicated_parser, + settings.remove_c2c, + settings.remove_fcbd, + settings.remove_publisher, + opts.split_words, ) if opts.overwrite_metadata: md = f_md @@ -193,7 +197,8 @@ def create_local_metadata( md.overlay(f_md) if has_desired_tags: - md = ca.read_metadata(opts.data_style if opts.data_style is not None else 0) + t_md = ca.read_metadata(opts.data_style if opts.data_style is not None else 0) + md.overlay(t_md) # finally, use explicit stuff if opts.overwrite_metadata and not opts.metadata.is_empty: diff --git a/comictaggerlib/options.py b/comictaggerlib/options.py index 79e9413..4e9247c 100644 --- a/comictaggerlib/options.py +++ b/comictaggerlib/options.py @@ -59,6 +59,8 @@ If no options are given, {0} will run in windowed mode. -f, --parsefilename Parse the filename to get some info, specifically series name, issue number, volume, and publication year. + --split-words Splits words before parsing the filename. + e.g. 'judgedredd' to 'judge dredd' -i, --interactive Interactively query the user when there are multiple matches for an online search. --nosummary Suppress the default summary after a save @@ -149,6 +151,7 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki self.copy_source: Optional[int] = None self.config_path = "" self.overwrite_metadata = False + self.split_words = False def display_msg_and_quit(self, msg: Optional[str], code: int, show_help: bool = False) -> None: appname = os.path.basename(sys.argv[0]) @@ -296,6 +299,7 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki "darkmode", "config=", "overwrite", + "split-words", ], ) @@ -352,6 +356,8 @@ For more help visit the wiki at: https://github.com/comictagger/comictagger/wiki self.abort_export_on_conflict = True if o in ("-f", "--parsefilename"): self.parse_filename = True + if o == "--split-words": + self.split_words = True if o in ("-w", "--wait-on-cv-rate-limit"): self.wait_and_retry_on_rate_limit = True if o == "--config": diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 42d5f0f..8a1960f 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -356,6 +356,10 @@ Have fun! self.actionParse_Filename.setStatusTip("Try to extract tags from filename") self.actionParse_Filename.triggered.connect(self.use_filename) + self.actionParse_Filename_split_words.setShortcut("Ctrl+Shift+F") + self.actionParse_Filename_split_words.setStatusTip("Try to extract tags from filename and split words") + self.actionParse_Filename_split_words.triggered.connect(self.use_filename_split) + self.actionSearchOnline.setShortcut("Ctrl+W") self.actionSearchOnline.setStatusTip("Search online for tags") self.actionSearchOnline.triggered.connect(self.query_online) @@ -399,6 +403,7 @@ Have fun! self.actionLoadFolder.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("longbox.png"))) self.actionWrite_Tags.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("save.png"))) self.actionParse_Filename.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png"))) + self.actionParse_Filename_split_words.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("parse.png"))) self.actionSearchOnline.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("search.png"))) self.actionAutoIdentify.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("auto.png"))) self.actionAutoTag.setIcon(QtGui.QIcon(ComicTaggerSettings.get_graphic("autotag.png"))) @@ -572,7 +577,7 @@ Please choose options below, and select OK. self.settings.complicated_parser, self.settings.remove_c2c, self.settings.remove_fcbd, - remove_publisher=self.settings.remove_publisher, + self.settings.remove_publisher, ) if len(self.metadata.pages) == 0 and self.comic_archive is not None: self.metadata.set_default_page_list(self.comic_archive.get_number_of_pages()) @@ -609,6 +614,7 @@ Please choose options below, and select OK. self.actionViewRawCBLTags.setEnabled(False) self.actionViewRawCRTags.setEnabled(False) self.actionParse_Filename.setEnabled(False) + self.actionParse_Filename_split_words.setEnabled(False) self.actionAutoIdentify.setEnabled(False) self.actionRename.setEnabled(False) self.actionApplyCBLTransform.setEnabled(False) @@ -620,6 +626,7 @@ Please choose options below, and select OK. has_cbi = self.comic_archive.has_cbi() self.actionParse_Filename.setEnabled(True) + self.actionParse_Filename_split_words.setEnabled(True) self.actionAutoIdentify.setEnabled(True) self.actionAutoTag.setEnabled(True) self.actionRename.setEnabled(True) @@ -942,6 +949,9 @@ Please choose options below, and select OK. self.metadata = md def use_filename(self) -> None: + self._use_filename() + + def _use_filename(self, split_words: bool = False) -> None: if self.comic_archive is not None: # copy the form onto metadata object self.form_to_metadata() @@ -949,12 +959,16 @@ Please choose options below, and select OK. self.settings.complicated_parser, self.settings.remove_c2c, self.settings.remove_fcbd, - remove_publisher=self.settings.remove_publisher, + self.settings.remove_publisher, + split_words, ) if new_metadata is not None: self.metadata.overlay(new_metadata) self.metadata_to_form() + def use_filename_split(self) -> None: + self._use_filename(True) + def select_folder(self) -> None: self.select_file(folder_mode=True) @@ -1676,7 +1690,8 @@ Please choose options below, and select OK. self.settings.complicated_parser, self.settings.remove_c2c, self.settings.remove_fcbd, - remove_publisher=self.settings.remove_publisher, + self.settings.remove_publisher, + dlg.split_words, ) if dlg.ignore_leading_digits_in_filename and md.series is not None: # remove all leading numbers diff --git a/comictaggerlib/ui/autotagstartwindow.ui b/comictaggerlib/ui/autotagstartwindow.ui index 7bbe128..f2d252c 100644 --- a/comictaggerlib/ui/autotagstartwindow.ui +++ b/comictaggerlib/ui/autotagstartwindow.ui @@ -10,7 +10,7 @@ 0 0 519 - 378 + 440 @@ -44,7 +44,7 @@ - + @@ -149,7 +149,20 @@ - + + + + + 0 + 0 + + + + Split words in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental) + + + + @@ -165,7 +178,7 @@ - + @@ -175,7 +188,7 @@ - + diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui index 2dc7c01..c8981e7 100644 --- a/comictaggerlib/ui/taggerwindow.ui +++ b/comictaggerlib/ui/taggerwindow.ui @@ -1232,6 +1232,7 @@ + @@ -1446,6 +1447,14 @@ Normalize the publisher and map imprints to their parent publisher (e.g. Vertigo is an imprint of DC Comics) + + + Parse Filename and split words + + + Parse Filename and split words + +