Add experimental word splitting to the filename parser
Adds a global setting as well as a setting that is only in effect during auto-tagging
This commit is contained in:
parent
55cacf8f37
commit
af84e4a6e8
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import os
|
import os
|
||||||
|
import os.path
|
||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -23,6 +24,7 @@ import subprocess
|
|||||||
import platform
|
import platform
|
||||||
import time
|
import time
|
||||||
import io
|
import io
|
||||||
|
import wordninja
|
||||||
|
|
||||||
import natsort
|
import natsort
|
||||||
from PyPDF2 import PdfFileReader
|
from PyPDF2 import PdfFileReader
|
||||||
@ -1080,11 +1082,14 @@ class ComicArchive:
|
|||||||
data = self.getPage(idx)
|
data = self.getPage(idx)
|
||||||
p['ImageSize'] = str(len(data))
|
p['ImageSize'] = str(len(data))
|
||||||
|
|
||||||
def metadataFromFilename(self, parse_scan_info=True):
|
def metadataFromFilename(self, parse_scan_info=True, split_words=False):
|
||||||
metadata = GenericMetadata()
|
metadata = GenericMetadata()
|
||||||
|
|
||||||
fnp = FileNameParser()
|
fnp = FileNameParser()
|
||||||
fnp.parseFilename(self.path)
|
filename = self.path
|
||||||
|
if split_words:
|
||||||
|
filename = " ".join(wordninja.split(os.path.splitext(os.path.basename(self.path))[0]))
|
||||||
|
fnp.parseFilename(filename)
|
||||||
|
|
||||||
if fnp.issue != "":
|
if fnp.issue != "":
|
||||||
metadata.issue = fnp.issue
|
metadata.issue = fnp.issue
|
||||||
|
@ -180,10 +180,10 @@ class FileNameParser:
|
|||||||
series = re.sub("\(.*?\)", "", series)
|
series = re.sub("\(.*?\)", "", series)
|
||||||
|
|
||||||
# search for volume number
|
# search for volume number
|
||||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
|
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)?\s*$', series)
|
||||||
if match:
|
if match:
|
||||||
series = match.group(1)
|
series = match.group(1)
|
||||||
volume = match.group(3)
|
volume = match.group(3) or ""
|
||||||
|
|
||||||
# if a volume wasn't found, see if the last word is a year in parentheses
|
# if a volume wasn't found, see if the last word is a year in parentheses
|
||||||
# since that's a common way to designate the volume
|
# since that's a common way to designate the volume
|
||||||
@ -283,6 +283,9 @@ class FileNameParser:
|
|||||||
self.volume,
|
self.volume,
|
||||||
issue_end)
|
issue_end)
|
||||||
|
|
||||||
|
if self.issue == "" and self.volume != "":
|
||||||
|
self.issue = self.volume
|
||||||
|
|
||||||
if self.issue != "":
|
if self.issue != "":
|
||||||
# strip off leading zeros
|
# strip off leading zeros
|
||||||
self.issue = self.issue.lstrip("0")
|
self.issue = self.issue.lstrip("0")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user