Add experimental word splitting to the filename parser
Adds a global setting as well as a setting that is only in effect during auto-tagging
This commit is contained in:
parent
444e67100c
commit
cdeca34791
@ -16,6 +16,7 @@
|
||||
|
||||
import zipfile
|
||||
import os
|
||||
import os.path
|
||||
import struct
|
||||
import sys
|
||||
import tempfile
|
||||
@ -23,6 +24,7 @@ import subprocess
|
||||
import platform
|
||||
import time
|
||||
import io
|
||||
import wordninja
|
||||
|
||||
import natsort
|
||||
from PyPDF2 import PdfFileReader
|
||||
@ -1080,11 +1082,14 @@ class ComicArchive:
|
||||
data = self.getPage(idx)
|
||||
p['ImageSize'] = str(len(data))
|
||||
|
||||
def metadataFromFilename(self, parse_scan_info=True):
|
||||
def metadataFromFilename(self, parse_scan_info=True, split_words=False):
|
||||
metadata = GenericMetadata()
|
||||
|
||||
fnp = FileNameParser()
|
||||
fnp.parseFilename(self.path)
|
||||
filename = self.path
|
||||
if split_words:
|
||||
filename = " ".join(wordninja.split(os.path.splitext(os.path.basename(self.path))[0]))
|
||||
fnp.parseFilename(filename)
|
||||
|
||||
if fnp.issue != "":
|
||||
metadata.issue = fnp.issue
|
||||
|
@ -180,10 +180,10 @@ class FileNameParser:
|
||||
series = re.sub("\(.*?\)", "", series)
|
||||
|
||||
# search for volume number
|
||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
|
||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)?\s*$', series)
|
||||
if match:
|
||||
series = match.group(1)
|
||||
volume = match.group(3)
|
||||
volume = match.group(3) or ""
|
||||
|
||||
# if a volume wasn't found, see if the last word is a year in parentheses
|
||||
# since that's a common way to designate the volume
|
||||
@ -283,6 +283,9 @@ class FileNameParser:
|
||||
self.volume,
|
||||
issue_end)
|
||||
|
||||
if self.issue == "" and self.volume != "":
|
||||
self.issue = self.volume
|
||||
|
||||
if self.issue != "":
|
||||
# strip off leading zeros
|
||||
self.issue = self.issue.lstrip("0")
|
||||
|
@ -45,6 +45,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
QtCore.Qt.Unchecked)
|
||||
self.cbxRemoveAfterSuccess.setCheckState(QtCore.Qt.Unchecked)
|
||||
self.cbxSpecifySearchString.setCheckState(QtCore.Qt.Unchecked)
|
||||
self.cbxSplitWords.setCheckState(QtCore.Qt.Unchecked)
|
||||
self.leNameLengthMatchTolerance.setText(
|
||||
str(self.settings.id_length_delta_thresh))
|
||||
self.leSearchString.setEnabled(False)
|
||||
@ -62,6 +63,8 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.cbxRemoveAfterSuccess.setCheckState(QtCore.Qt.Checked)
|
||||
if self.settings.wait_and_retry_on_rate_limit:
|
||||
self.cbxWaitForRateLimit.setCheckState(QtCore.Qt.Checked)
|
||||
if self.settings.split_words:
|
||||
self.cbxSplitWords.setCheckState(QtCore.Qt.Checked)
|
||||
|
||||
nlmtTip = (
|
||||
""" <html>The <b>Name Length Match Tolerance</b> is for eliminating automatic
|
||||
@ -96,6 +99,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.waitAndRetryOnRateLimit = False
|
||||
self.searchString = None
|
||||
self.nameLengthMatchTolerance = self.settings.id_length_delta_thresh
|
||||
self.splitWords = self.cbxSplitWords.isChecked()
|
||||
|
||||
def searchStringToggle(self):
|
||||
enable = self.cbxSpecifySearchString.isChecked()
|
||||
@ -112,6 +116,7 @@ class AutoTagStartWindow(QtWidgets.QDialog):
|
||||
self.nameLengthMatchTolerance = int(
|
||||
self.leNameLengthMatchTolerance.text())
|
||||
self.waitAndRetryOnRateLimit = self.cbxWaitForRateLimit.isChecked()
|
||||
self.splitWords = self.cbxSplitWords.isChecked()
|
||||
|
||||
# persist some settings
|
||||
self.settings.save_on_low_confidence = self.autoSaveOnLow
|
||||
|
@ -88,6 +88,7 @@ class ComicTaggerSettings:
|
||||
|
||||
# filename parsing settings
|
||||
self.parse_scan_info = True
|
||||
self.split_words = False
|
||||
|
||||
# Comic Vine settings
|
||||
self.use_series_start_as_volume = False
|
||||
@ -229,6 +230,9 @@ class ComicTaggerSettings:
|
||||
if self.config.has_option('filenameparser', 'parse_scan_info'):
|
||||
self.parse_scan_info = self.config.getboolean(
|
||||
'filenameparser', 'parse_scan_info')
|
||||
if self.config.has_option('filenameparser', 'split_words'):
|
||||
self.split_words = self.config.getboolean(
|
||||
'filenameparser', 'split_words')
|
||||
|
||||
if self.config.has_option('dialogflags', 'ask_about_cbi_in_rar'):
|
||||
self.ask_about_cbi_in_rar = self.config.getboolean(
|
||||
@ -396,6 +400,8 @@ class ComicTaggerSettings:
|
||||
|
||||
self.config.set(
|
||||
'filenameparser', 'parse_scan_info', self.parse_scan_info)
|
||||
self.config.set(
|
||||
'filenameparser', 'split_words', self.parse_scan_info)
|
||||
|
||||
if not self.config.has_section('comicvine'):
|
||||
self.config.add_section('comicvine')
|
||||
|
@ -128,6 +128,8 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
|
||||
if self.settings.parse_scan_info:
|
||||
self.cbxParseScanInfo.setCheckState(QtCore.Qt.Checked)
|
||||
if self.settings.split_words:
|
||||
self.cbxSplitWords.setCheckState(QtCore.Qt.Checked)
|
||||
|
||||
if self.settings.use_series_start_as_volume:
|
||||
self.cbxUseSeriesStartAsVolume.setCheckState(QtCore.Qt.Checked)
|
||||
@ -189,6 +191,7 @@ class SettingsWindow(QtWidgets.QDialog):
|
||||
self.tePublisherBlacklist.toPlainText())
|
||||
|
||||
self.settings.parse_scan_info = self.cbxParseScanInfo.isChecked()
|
||||
self.settings.split_words = self.cbxSplitWords.isChecked()
|
||||
|
||||
self.settings.use_series_start_as_volume = self.cbxUseSeriesStartAsVolume.isChecked()
|
||||
self.settings.clear_form_before_populating_from_cv = self.cbxClearFormBeforePopulating.isChecked()
|
||||
|
@ -595,7 +595,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
def actualLoadCurrentArchive(self):
|
||||
if self.metadata.isEmpty:
|
||||
self.metadata = self.comic_archive.metadataFromFilename(
|
||||
self.settings.parse_scan_info)
|
||||
self.settings.parse_scan_info, self.settings.split_words)
|
||||
if len(self.metadata.pages) == 0:
|
||||
self.metadata.setDefaultPageList(
|
||||
self.comic_archive.getNumberOfPages())
|
||||
@ -965,7 +965,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
# copy the form onto metadata object
|
||||
self.formToMetadata()
|
||||
new_metadata = self.comic_archive.metadataFromFilename(
|
||||
self.settings.parse_scan_info)
|
||||
self.settings.parse_scan_info, self.settings.split_words)
|
||||
if new_metadata is not None:
|
||||
self.metadata.overlay(new_metadata)
|
||||
self.metadataToForm()
|
||||
@ -1727,14 +1727,14 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
QtCore.QCoreApplication.processEvents()
|
||||
|
||||
def identifyAndTagSingleArchive(self, ca, match_results, dlg):
|
||||
def identifyAndTagSingleArchive(self, ca, match_results, dlg, split_words):
|
||||
success = False
|
||||
ii = IssueIdentifier(ca, self.settings)
|
||||
|
||||
# read in metadata, and parse file name if not there
|
||||
md = ca.readMetadata(self.save_data_style)
|
||||
if md.isEmpty:
|
||||
md = ca.metadataFromFilename(self.settings.parse_scan_info)
|
||||
md = ca.metadataFromFilename(self.settings.parse_scan_info, split_words)
|
||||
if dlg.ignoreLeadingDigitsInFilename and md.series is not None:
|
||||
# remove all leading numbers
|
||||
md.series = re.sub("([\d.]*)(.*)", "\\2", md.series)
|
||||
@ -1892,7 +1892,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
|
||||
if ca.isWritable():
|
||||
success, match_results = self.identifyAndTagSingleArchive(
|
||||
ca, match_results, atstartdlg)
|
||||
ca, match_results, atstartdlg, atstartdlg.splitWords)
|
||||
|
||||
if success and atstartdlg.removeAfterSuccess:
|
||||
archives_to_remove.append(ca)
|
||||
|
@ -10,7 +10,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>519</width>
|
||||
<height>378</height>
|
||||
<height>420</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="sizePolicy">
|
||||
@ -44,7 +44,7 @@
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="6" column="0">
|
||||
<item row="7" column="0">
|
||||
<widget class="QCheckBox" name="cbxSpecifySearchString">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
@ -96,6 +96,19 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="0">
|
||||
<widget class="QCheckBox" name="cbxSplitWords">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Minimum" vsizetype="Fixed">
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Split words in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="cbxWaitForRateLimit">
|
||||
<property name="text">
|
||||
@ -129,7 +142,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="10" column="0">
|
||||
<item row="11" column="0">
|
||||
<widget class="QLineEdit" name="leNameLengthMatchTolerance">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
@ -145,7 +158,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="7" column="0">
|
||||
<item row="8" column="0">
|
||||
<widget class="QLineEdit" name="leSearchString">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
|
||||
@ -155,7 +168,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0">
|
||||
<item row="9" column="0">
|
||||
<widget class="QLabel" name="label_3">
|
||||
<property name="sizePolicy">
|
||||
<sizepolicy hsizetype="Preferred" vsizetype="Fixed">
|
||||
|
@ -225,6 +225,19 @@
|
||||
<string>Parse Scan Info From Filename (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QCheckBox" name="cbxSplitWords">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>30</x>
|
||||
<y>60</y>
|
||||
<width>541</width>
|
||||
<height>25</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Split words apart in filenames (e.g. 'judgedredd' to 'judge dredd') (Experimental)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</widget>
|
||||
<widget class="QWidget" name="tab_3">
|
||||
<attribute name="title">
|
||||
|
Loading…
x
Reference in New Issue
Block a user