diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py index 02ee666..a9b2e42 100644 --- a/comicapi/comicarchive.py +++ b/comicapi/comicarchive.py @@ -16,6 +16,7 @@ import zipfile import os +import os.path import struct import sys import tempfile @@ -23,6 +24,7 @@ import subprocess import platform import time import io +import wordninja import natsort from PyPDF2 import PdfFileReader @@ -1080,11 +1082,14 @@ class ComicArchive: data = self.getPage(idx) p['ImageSize'] = str(len(data)) - def metadataFromFilename(self, parse_scan_info=True): + def metadataFromFilename(self, parse_scan_info=True, split_words=False): metadata = GenericMetadata() fnp = FileNameParser() - fnp.parseFilename(self.path) + filename = self.path + if split_words: + filename = " ".join(wordninja.split(os.path.splitext(os.path.basename(self.path))[0])) + fnp.parseFilename(filename) if fnp.issue != "": metadata.issue = fnp.issue diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index 476d14b..dd29e24 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -180,10 +180,10 @@ class FileNameParser: series = re.sub("\(.*?\)", "", series) # search for volume number - match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) + match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)?\s*$', series) if match: series = match.group(1) - volume = match.group(3) + volume = match.group(3) or "" # if a volume wasn't found, see if the last word is a year in parentheses # since that's a common way to designate the volume @@ -283,6 +283,9 @@ class FileNameParser: self.volume, issue_end) + if self.issue == "" and self.volume != "": + self.issue = self.volume + if self.issue != "": # strip off leading zeros self.issue = self.issue.lstrip("0")