Add experimental word splitting to the filename parser

Adds a global setting as well as a setting that is only in effect
during auto-tagging
This commit is contained in:
lordwelch 2021-08-29 17:09:57 -07:00 committed by Timmy Welch
parent 55cacf8f37
commit af84e4a6e8
2 changed files with 12 additions and 4 deletions

View File

@ -16,6 +16,7 @@
import zipfile import zipfile
import os import os
import os.path
import struct import struct
import sys import sys
import tempfile import tempfile
@ -23,6 +24,7 @@ import subprocess
import platform import platform
import time import time
import io import io
import wordninja
import natsort import natsort
from PyPDF2 import PdfFileReader from PyPDF2 import PdfFileReader
@ -1080,11 +1082,14 @@ class ComicArchive:
data = self.getPage(idx) data = self.getPage(idx)
p['ImageSize'] = str(len(data)) p['ImageSize'] = str(len(data))
def metadataFromFilename(self, parse_scan_info=True): def metadataFromFilename(self, parse_scan_info=True, split_words=False):
metadata = GenericMetadata() metadata = GenericMetadata()
fnp = FileNameParser() fnp = FileNameParser()
fnp.parseFilename(self.path) filename = self.path
if split_words:
filename = " ".join(wordninja.split(os.path.splitext(os.path.basename(self.path))[0]))
fnp.parseFilename(filename)
if fnp.issue != "": if fnp.issue != "":
metadata.issue = fnp.issue metadata.issue = fnp.issue

View File

@ -180,10 +180,10 @@ class FileNameParser:
series = re.sub("\(.*?\)", "", series) series = re.sub("\(.*?\)", "", series)
# search for volume number # search for volume number
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)?\s*$', series)
if match: if match:
series = match.group(1) series = match.group(1)
volume = match.group(3) volume = match.group(3) or ""
# if a volume wasn't found, see if the last word is a year in parentheses # if a volume wasn't found, see if the last word is a year in parentheses
# since that's a common way to designate the volume # since that's a common way to designate the volume
@ -283,6 +283,9 @@ class FileNameParser:
self.volume, self.volume,
issue_end) issue_end)
if self.issue == "" and self.volume != "":
self.issue = self.volume
if self.issue != "": if self.issue != "":
# strip off leading zeros # strip off leading zeros
self.issue = self.issue.lstrip("0") self.issue = self.issue.lstrip("0")