various fixes after merging comicstream-integr
This commit is contained in:
parent
b2532ce03a
commit
d5a13a4206
@ -12,7 +12,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ -31,247 +31,256 @@ from urllib import unquote
|
||||
|
||||
class FileNameParser:
|
||||
|
||||
def repl(self, m):
|
||||
return ' ' * len(m.group())
|
||||
|
||||
def fixSpaces( self, string, remove_dashes=True ):
|
||||
if remove_dashes:
|
||||
placeholders = ['[-_]',' +']
|
||||
else:
|
||||
placeholders = ['[_]',' +']
|
||||
for ph in placeholders:
|
||||
string = re.sub(ph, self.repl, string )
|
||||
return string #.strip()
|
||||
def repl(self, m):
|
||||
return ' ' * len(m.group())
|
||||
|
||||
def fixSpaces( self, string, remove_dashes=True ):
|
||||
if remove_dashes:
|
||||
placeholders = ['[-_]',' +']
|
||||
else:
|
||||
placeholders = ['[_]',' +']
|
||||
for ph in placeholders:
|
||||
string = re.sub(ph, self.repl, string )
|
||||
return string #.strip()
|
||||
|
||||
|
||||
def getIssueCount( self,filename, issue_end ):
|
||||
def getIssueCount( self,filename, issue_end ):
|
||||
|
||||
count = ""
|
||||
filename = filename[issue_end:]
|
||||
|
||||
# replace any name seperators with spaces
|
||||
tmpstr = self.fixSpaces(filename)
|
||||
found = False
|
||||
|
||||
match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
count = ""
|
||||
filename = filename[issue_end:]
|
||||
|
||||
if not found:
|
||||
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
|
||||
# replace any name seperators with spaces
|
||||
tmpstr = self.fixSpaces(filename)
|
||||
found = False
|
||||
|
||||
count = count.lstrip("0")
|
||||
match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
|
||||
return count
|
||||
|
||||
def getIssueNumber( self, filename ):
|
||||
if not found:
|
||||
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
|
||||
# Returns a tuple of issue number string, and start and end indexs in the filename
|
||||
# (The indexes will be used to split the string up for further parsing)
|
||||
|
||||
found = False
|
||||
issue = ''
|
||||
start = 0
|
||||
end = 0
|
||||
|
||||
# first, look for multiple "--", this means it's formatted differently from most:
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
filename = filename.replace("+", " ")
|
||||
|
||||
# replace parenthetical phrases with spaces
|
||||
filename = re.sub( "\(.*?\)", self.repl, filename)
|
||||
filename = re.sub( "\[.*?\]", self.repl, filename)
|
||||
count = count.lstrip("0")
|
||||
|
||||
# replace any name seperators with spaces
|
||||
filename = self.fixSpaces(filename)
|
||||
return count
|
||||
|
||||
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
|
||||
filename = re.sub( "of [\d]+", self.repl, filename)
|
||||
def getIssueNumber( self, filename ):
|
||||
|
||||
#print u"[{0}]".format(filename)
|
||||
|
||||
# we should now have a cleaned up filename version with all the words in
|
||||
# the same positions as original filename
|
||||
|
||||
# make a list of each word and its position
|
||||
word_list = list()
|
||||
for m in re.finditer("\S+", filename):
|
||||
word_list.append( (m.group(0), m.start(), m.end()) )
|
||||
|
||||
# remove the first word, since it can't be the issue number
|
||||
if len(word_list) > 1:
|
||||
word_list = word_list[1:]
|
||||
else:
|
||||
#only one word?? just bail.
|
||||
return issue, start, end
|
||||
|
||||
# Now try to search for the likely issue number word in the list
|
||||
|
||||
# first look for a word with "#" followed by digits with optional sufix
|
||||
# this is almost certainly the issue number
|
||||
for w in reversed(word_list):
|
||||
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
break
|
||||
# Returns a tuple of issue number string, and start and end indexs in the filename
|
||||
# (The indexes will be used to split the string up for further parsing)
|
||||
|
||||
# same as above but w/o a '#', and only look at the last word in the list
|
||||
if not found:
|
||||
w = word_list[-1]
|
||||
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
|
||||
# now try to look for a # followed by any characters
|
||||
if not found:
|
||||
for w in reversed(word_list):
|
||||
if re.match("#\S+", w[0]):
|
||||
found = True
|
||||
break
|
||||
|
||||
if found:
|
||||
issue = w[0]
|
||||
start = w[1]
|
||||
end = w[2]
|
||||
if issue[0] == '#':
|
||||
issue = issue[1:]
|
||||
|
||||
return issue, start, end
|
||||
|
||||
def getSeriesName(self, filename, issue_start ):
|
||||
|
||||
# use the issue number string index to split the filename string
|
||||
|
||||
if issue_start != 0:
|
||||
filename = filename[:issue_start]
|
||||
found = False
|
||||
issue = ''
|
||||
start = 0
|
||||
end = 0
|
||||
|
||||
# in case there is no issue number, remove some obvious stuff
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
filename = filename.replace("+", " ")
|
||||
tmpstr = self.fixSpaces(filename, remove_dashes=False)
|
||||
|
||||
series = tmpstr
|
||||
volume = ""
|
||||
# first, look for multiple "--", this means it's formatted differently from most:
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
#save the last word
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
except:
|
||||
last_word = ""
|
||||
|
||||
# remove any parenthetical phrases
|
||||
series = re.sub( "\(.*?\)", "", series)
|
||||
|
||||
# search for volume number
|
||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
|
||||
if match:
|
||||
series = match.group(1)
|
||||
volume = match.group(3)
|
||||
|
||||
# if a volume wasn't found, see if the last word is a year in parentheses
|
||||
# since that's a common way to designate the volume
|
||||
if volume == "":
|
||||
#match either (YEAR), (YEAR-), or (YEAR-YEAR2)
|
||||
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
|
||||
if match:
|
||||
volume = match.group(2)
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
series = series.strip()
|
||||
filename = filename.replace("+", " ")
|
||||
|
||||
# if we don't have an issue number (issue_start==0), look
|
||||
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
|
||||
# be removed to help search online
|
||||
if issue_start == 0:
|
||||
one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ]
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
if last_word.lower() in one_shot_words:
|
||||
series = series.rsplit(' ', 1)[0]
|
||||
except:
|
||||
pass
|
||||
|
||||
return series, volume.strip()
|
||||
# replace parenthetical phrases with spaces
|
||||
filename = re.sub( "\(.*?\)", self.repl, filename)
|
||||
filename = re.sub( "\[.*?\]", self.repl, filename)
|
||||
|
||||
def getYear( self,filename, issue_end):
|
||||
|
||||
filename = filename[issue_end:]
|
||||
# replace any name seperators with spaces
|
||||
filename = self.fixSpaces(filename)
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-numerics
|
||||
year = re.sub("[^0-9]", "", year)
|
||||
return year
|
||||
# remove any "of NN" phrase with spaces (problem: this could break on some titles)
|
||||
filename = re.sub( "of [\d]+", self.repl, filename)
|
||||
|
||||
def getRemainder( self, filename, year, count, issue_end ):
|
||||
|
||||
#make a guess at where the the non-interesting stuff begins
|
||||
remainder = ""
|
||||
|
||||
if "--" in filename:
|
||||
remainder = filename.split("--",1)[1]
|
||||
elif "__" in filename:
|
||||
remainder = filename.split("__",1)[1]
|
||||
elif issue_end != 0:
|
||||
remainder = filename[issue_end:]
|
||||
#print u"[{0}]".format(filename)
|
||||
|
||||
remainder = self.fixSpaces(remainder, remove_dashes=False)
|
||||
if year != "":
|
||||
remainder = remainder.replace(year,"",1)
|
||||
if count != "":
|
||||
remainder = remainder.replace("of "+count,"",1)
|
||||
|
||||
remainder = remainder.replace("()","")
|
||||
|
||||
return remainder.strip()
|
||||
|
||||
def parseFilename( self, filename ):
|
||||
# we should now have a cleaned up filename version with all the words in
|
||||
# the same positions as original filename
|
||||
|
||||
# remove the path
|
||||
filename = os.path.basename(filename)
|
||||
# make a list of each word and its position
|
||||
word_list = list()
|
||||
for m in re.finditer("\S+", filename):
|
||||
word_list.append( (m.group(0), m.start(), m.end()) )
|
||||
|
||||
# remove the extension
|
||||
filename = os.path.splitext(filename)[0]
|
||||
# remove the first word, since it can't be the issue number
|
||||
if len(word_list) > 1:
|
||||
word_list = word_list[1:]
|
||||
else:
|
||||
#only one word?? just bail.
|
||||
return issue, start, end
|
||||
|
||||
#url decode, just in case
|
||||
filename = unquote(filename)
|
||||
# Now try to search for the likely issue number word in the list
|
||||
|
||||
# sometimes archives get messed up names from too many decodings
|
||||
# often url encodings will break and leave "_28" and "_29" in place
|
||||
# of "(" and ")" see if there are a number of these, and replace them
|
||||
if filename.count("_28") > 1 and filename.count("_29") > 1:
|
||||
filename = filename.replace("_28", "(")
|
||||
filename = filename.replace("_29", ")")
|
||||
|
||||
self.issue, issue_start, issue_end = self.getIssueNumber(filename)
|
||||
self.series, self.volume = self.getSeriesName(filename, issue_start)
|
||||
self.year = self.getYear(filename, issue_end)
|
||||
self.issue_count = self.getIssueCount(filename, issue_end)
|
||||
self.remainder = self.getRemainder( filename, self.year, self.issue_count, issue_end )
|
||||
|
||||
if self.issue != "":
|
||||
# strip off leading zeros
|
||||
self.issue = self.issue.lstrip("0")
|
||||
if self.issue == "":
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
# first look for a word with "#" followed by digits with optional sufix
|
||||
# this is almost certainly the issue number
|
||||
for w in reversed(word_list):
|
||||
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
break
|
||||
|
||||
# same as above but w/o a '#', and only look at the last word in the list
|
||||
if not found:
|
||||
w = word_list[-1]
|
||||
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
|
||||
# now try to look for a # followed by any characters
|
||||
if not found:
|
||||
for w in reversed(word_list):
|
||||
if re.match("#\S+", w[0]):
|
||||
found = True
|
||||
break
|
||||
|
||||
if found:
|
||||
issue = w[0]
|
||||
start = w[1]
|
||||
end = w[2]
|
||||
if issue[0] == '#':
|
||||
issue = issue[1:]
|
||||
|
||||
return issue, start, end
|
||||
|
||||
def getSeriesName(self, filename, issue_start ):
|
||||
|
||||
# use the issue number string index to split the filename string
|
||||
|
||||
if issue_start != 0:
|
||||
filename = filename[:issue_start]
|
||||
|
||||
# in case there is no issue number, remove some obvious stuff
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
filename = filename.replace("+", " ")
|
||||
tmpstr = self.fixSpaces(filename, remove_dashes=False)
|
||||
|
||||
series = tmpstr
|
||||
volume = ""
|
||||
|
||||
#save the last word
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
except:
|
||||
last_word = ""
|
||||
|
||||
# remove any parenthetical phrases
|
||||
series = re.sub( "\(.*?\)", "", series)
|
||||
|
||||
# search for volume number
|
||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
|
||||
if match:
|
||||
series = match.group(1)
|
||||
volume = match.group(3)
|
||||
|
||||
# if a volume wasn't found, see if the last word is a year in parentheses
|
||||
# since that's a common way to designate the volume
|
||||
if volume == "":
|
||||
#match either (YEAR), (YEAR-), or (YEAR-YEAR2)
|
||||
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
|
||||
if match:
|
||||
volume = match.group(2)
|
||||
|
||||
series = series.strip()
|
||||
|
||||
# if we don't have an issue number (issue_start==0), look
|
||||
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
|
||||
# be removed to help search online
|
||||
if issue_start == 0:
|
||||
one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ]
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
if last_word.lower() in one_shot_words:
|
||||
series = series.rsplit(' ', 1)[0]
|
||||
except:
|
||||
pass
|
||||
|
||||
return series, volume.strip()
|
||||
|
||||
def getYear( self,filename, issue_end):
|
||||
|
||||
filename = filename[issue_end:]
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-numerics
|
||||
year = re.sub("[^0-9]", "", year)
|
||||
return year
|
||||
|
||||
def getRemainder( self, filename, year, count, volume, issue_end ):
|
||||
|
||||
#make a guess at where the the non-interesting stuff begins
|
||||
remainder = ""
|
||||
|
||||
if "--" in filename:
|
||||
remainder = filename.split("--",1)[1]
|
||||
elif "__" in filename:
|
||||
remainder = filename.split("__",1)[1]
|
||||
elif issue_end != 0:
|
||||
remainder = filename[issue_end:]
|
||||
|
||||
remainder = self.fixSpaces(remainder, remove_dashes=False)
|
||||
if volume != "":
|
||||
remainder = remainder.replace("Vol."+volume,"",1)
|
||||
if year != "":
|
||||
remainder = remainder.replace(year,"",1)
|
||||
if count != "":
|
||||
remainder = remainder.replace("of "+count,"",1)
|
||||
|
||||
remainder = remainder.replace("()","")
|
||||
remainder = remainder.replace(" "," ") # cleans some whitespace mess
|
||||
|
||||
return remainder.strip()
|
||||
|
||||
def parseFilename( self, filename ):
|
||||
|
||||
# remove the path
|
||||
filename = os.path.basename(filename)
|
||||
|
||||
# remove the extension
|
||||
filename = os.path.splitext(filename)[0]
|
||||
|
||||
#url decode, just in case
|
||||
filename = unquote(filename)
|
||||
|
||||
# sometimes archives get messed up names from too many decodings
|
||||
# often url encodings will break and leave "_28" and "_29" in place
|
||||
# of "(" and ")" see if there are a number of these, and replace them
|
||||
if filename.count("_28") > 1 and filename.count("_29") > 1:
|
||||
filename = filename.replace("_28", "(")
|
||||
filename = filename.replace("_29", ")")
|
||||
|
||||
self.issue, issue_start, issue_end = self.getIssueNumber(filename)
|
||||
self.series, self.volume = self.getSeriesName(filename, issue_start)
|
||||
|
||||
|
||||
# provides proper value when the filename doesn't have a issue number
|
||||
if issue_end == 0:
|
||||
issue_end=len(self.series)
|
||||
|
||||
self.year = self.getYear(filename, issue_end)
|
||||
self.issue_count = self.getIssueCount(filename, issue_end)
|
||||
self.remainder = self.getRemainder( filename, self.year, self.issue_count, self.volume, issue_end )
|
||||
|
||||
if self.issue != "":
|
||||
# strip off leading zeros
|
||||
self.issue = self.issue.lstrip("0")
|
||||
if self.issue == "":
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
|
@ -55,8 +55,8 @@ class AutoTagMatchWindow(QtGui.QDialog):
|
||||
gridlayout.addWidget(self.archiveCoverWidget)
|
||||
gridlayout.setContentsMargins(0, 0, 0, 0)
|
||||
|
||||
utils.reduceWidgetFontSize(self.twList)
|
||||
utils.reduceWidgetFontSize(self.teDescription, 1)
|
||||
reduceWidgetFontSize(self.twList)
|
||||
reduceWidgetFontSize(self.teDescription, 1)
|
||||
|
||||
self.setWindowFlags(self.windowFlags() |
|
||||
QtCore.Qt.WindowSystemMenuHint |
|
||||
|
@ -55,7 +55,7 @@ class AutoTagProgressWindow(QtGui.QDialog):
|
||||
QtCore.Qt.WindowSystemMenuHint |
|
||||
QtCore.Qt.WindowMaximizeButtonHint)
|
||||
|
||||
utils.reduceWidgetFontSize(self.textEdit)
|
||||
reduceWidgetFontSize(self.textEdit)
|
||||
|
||||
def setArchiveImage(self, img_data):
|
||||
self.setCoverImage(img_data, self.archiveCoverWidget)
|
||||
|
@ -127,7 +127,7 @@ def display_match_set_for_choice(label, match_set, opts, settings):
|
||||
i = int(i) - 1
|
||||
# save the data!
|
||||
# we know at this point, that the file is all good to go
|
||||
ca = ComicArchive(match_set.filename, settings.rar_exe_path)
|
||||
ca = ComicArchive(match_set.filename, settings.rar_exe_path, ComicTaggerSettings.getGraphic('nocover.png'))
|
||||
md = create_local_metadata(
|
||||
opts, ca, ca.hasMetadata(opts.data_style))
|
||||
cv_md = actual_issue_data_fetch(
|
||||
@ -220,7 +220,7 @@ def process_file_cli(filename, opts, settings, match_results):
|
||||
|
||||
batch_mode = len(opts.file_list) > 1
|
||||
|
||||
ca = ComicArchive(filename, settings.rar_exe_path)
|
||||
ca = ComicArchive(filename, settings.rar_exe_path, ComicTaggerSettings.getGraphic('nocover.png'))
|
||||
|
||||
if not os.path.lexists(filename):
|
||||
print >> sys.stderr, "Cannot find " + filename
|
||||
|
@ -70,7 +70,7 @@ class CoverImageWidget(QWidget):
|
||||
|
||||
uic.loadUi(ComicTaggerSettings.getUIFile('coverimagewidget.ui'), self)
|
||||
|
||||
utils.reduceWidgetFontSize(self.label)
|
||||
reduceWidgetFontSize(self.label)
|
||||
|
||||
self.mode = mode
|
||||
self.comicVine = ComicVineTalker()
|
||||
@ -249,7 +249,7 @@ class CoverImageWidget(QWidget):
|
||||
|
||||
# called when the image is done loading from internet
|
||||
def coverRemoteFetchComplete(self, image_data, issue_id):
|
||||
img = utils.getQImageFromData(image_data)
|
||||
img = getQImageFromData(image_data)
|
||||
self.current_pixmap = QPixmap(img)
|
||||
self.setDisplayPixmap(0, 0)
|
||||
#print("ATB cover fetch complete!")
|
||||
|
@ -1,292 +1 @@
|
||||
"""
|
||||
Functions for parsing comic info from filename
|
||||
|
||||
This should probably be re-written, but, well, it mostly works!
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
Copyright 2012-2014 Anthony Beville
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
# Some portions of this code were modified from pyComicMetaThis project
|
||||
# http://code.google.com/p/pycomicmetathis/
|
||||
|
||||
import re
|
||||
import os
|
||||
from urllib import unquote
|
||||
|
||||
|
||||
class FileNameParser:
|
||||
|
||||
def repl(self, m):
|
||||
return ' ' * len(m.group())
|
||||
|
||||
def fixSpaces(self, string, remove_dashes=True):
|
||||
if remove_dashes:
|
||||
placeholders = ['[-_]', ' +']
|
||||
else:
|
||||
placeholders = ['[_]', ' +']
|
||||
for ph in placeholders:
|
||||
string = re.sub(ph, self.repl, string)
|
||||
return string # .strip()
|
||||
|
||||
def getIssueCount(self, filename, issue_end):
|
||||
|
||||
count = ""
|
||||
filename = filename[issue_end:]
|
||||
|
||||
# replace any name seperators with spaces
|
||||
tmpstr = self.fixSpaces(filename)
|
||||
found = False
|
||||
|
||||
match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
|
||||
if not found:
|
||||
match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE)
|
||||
if match:
|
||||
count = match.group()
|
||||
found = True
|
||||
|
||||
count = count.lstrip("0")
|
||||
|
||||
return count
|
||||
|
||||
def getIssueNumber(self, filename):
|
||||
|
||||
# Returns a tuple of issue number string, and start and end indexs in the filename
|
||||
# (The indexes will be used to split the string up for further parsing)
|
||||
|
||||
found = False
|
||||
issue = ''
|
||||
start = 0
|
||||
end = 0
|
||||
|
||||
# first, look for multiple "--", this means it's formatted differently
|
||||
# from most:
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--"
|
||||
# is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__"
|
||||
# is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
filename = filename.replace("+", " ")
|
||||
|
||||
# replace parenthetical phrases with spaces
|
||||
filename = re.sub("\(.*?\)", self.repl, filename)
|
||||
filename = re.sub("\[.*?\]", self.repl, filename)
|
||||
|
||||
# replace any name seperators with spaces
|
||||
filename = self.fixSpaces(filename)
|
||||
|
||||
# remove any "of NN" phrase with spaces (problem: this could break on
|
||||
# some titles)
|
||||
filename = re.sub("of [\d]+", self.repl, filename)
|
||||
|
||||
# print(u"[{0}]".format(filename))
|
||||
|
||||
# we should now have a cleaned up filename version with all the words in
|
||||
# the same positions as original filename
|
||||
|
||||
# make a list of each word and its position
|
||||
word_list = list()
|
||||
for m in re.finditer("\S+", filename):
|
||||
word_list.append((m.group(0), m.start(), m.end()))
|
||||
|
||||
# remove the first word, since it can't be the issue number
|
||||
if len(word_list) > 1:
|
||||
word_list = word_list[1:]
|
||||
else:
|
||||
# only one word?? just bail.
|
||||
return issue, start, end
|
||||
|
||||
# Now try to search for the likely issue number word in the list
|
||||
|
||||
# first look for a word with "#" followed by digits with optional sufix
|
||||
# this is almost certainly the issue number
|
||||
for w in reversed(word_list):
|
||||
if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
break
|
||||
|
||||
# same as above but w/o a '#', and only look at the last word in the
|
||||
# list
|
||||
if not found:
|
||||
w = word_list[-1]
|
||||
if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]):
|
||||
found = True
|
||||
|
||||
# now try to look for a # followed by any characters
|
||||
if not found:
|
||||
for w in reversed(word_list):
|
||||
if re.match("#\S+", w[0]):
|
||||
found = True
|
||||
break
|
||||
|
||||
if found:
|
||||
issue = w[0]
|
||||
start = w[1]
|
||||
end = w[2]
|
||||
if issue[0] == '#':
|
||||
issue = issue[1:]
|
||||
|
||||
return issue, start, end
|
||||
|
||||
def getSeriesName(self, filename, issue_start):
|
||||
|
||||
# use the issue number string index to split the filename string
|
||||
|
||||
if issue_start != 0:
|
||||
filename = filename[:issue_start]
|
||||
|
||||
# in case there is no issue number, remove some obvious stuff
|
||||
if "--" in filename:
|
||||
# the pattern seems to be that anything to left of the first "--"
|
||||
# is the series name followed by issue
|
||||
filename = re.sub("--.*", self.repl, filename)
|
||||
|
||||
elif "__" in filename:
|
||||
# the pattern seems to be that anything to left of the first "__"
|
||||
# is the series name followed by issue
|
||||
filename = re.sub("__.*", self.repl, filename)
|
||||
|
||||
filename = filename.replace("+", " ")
|
||||
tmpstr = self.fixSpaces(filename, remove_dashes=False)
|
||||
|
||||
series = tmpstr
|
||||
volume = ""
|
||||
|
||||
# save the last word
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
except:
|
||||
last_word = ""
|
||||
|
||||
# remove any parenthetical phrases
|
||||
series = re.sub("\(.*?\)", "", series)
|
||||
|
||||
# search for volume number
|
||||
match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series)
|
||||
if match:
|
||||
series = match.group(1)
|
||||
volume = match.group(3)
|
||||
|
||||
# if a volume wasn't found, see if the last word is a year in parentheses
|
||||
# since that's a common way to designate the volume
|
||||
if volume == "":
|
||||
# match either (YEAR), (YEAR-), or (YEAR-YEAR2)
|
||||
match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
|
||||
if match:
|
||||
volume = match.group(2)
|
||||
|
||||
series = series.strip()
|
||||
|
||||
# if we don't have an issue number (issue_start==0), look
|
||||
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
|
||||
# be removed to help search online
|
||||
if issue_start == 0:
|
||||
one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"]
|
||||
try:
|
||||
last_word = series.split()[-1]
|
||||
if last_word.lower() in one_shot_words:
|
||||
series = series.rsplit(' ', 1)[0]
|
||||
except:
|
||||
pass
|
||||
|
||||
return series, volume.strip()
|
||||
|
||||
def getYear(self, filename, issue_end):
|
||||
|
||||
filename = filename[issue_end:]
|
||||
|
||||
year = ""
|
||||
# look for four digit number with "(" ")" or "--" around it
|
||||
match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename)
|
||||
if match:
|
||||
year = match.group()
|
||||
# remove non-numerics
|
||||
year = re.sub("[^0-9]", "", year)
|
||||
return year
|
||||
|
||||
def getRemainder(self, filename, year, count, volume, issue_end):
|
||||
|
||||
# make a guess at where the the non-interesting stuff begins
|
||||
remainder = ""
|
||||
|
||||
if "--" in filename:
|
||||
remainder = filename.split("--", 1)[1]
|
||||
elif "__" in filename:
|
||||
remainder = filename.split("__", 1)[1]
|
||||
elif issue_end != 0:
|
||||
remainder = filename[issue_end:]
|
||||
|
||||
remainder = self.fixSpaces(remainder, remove_dashes=False)
|
||||
if volume != "":
|
||||
remainder = remainder.replace("Vol." + volume, "", 1)
|
||||
if year != "":
|
||||
remainder = remainder.replace(year, "", 1)
|
||||
if count != "":
|
||||
remainder = remainder.replace("of " + count, "", 1)
|
||||
|
||||
remainder = remainder.replace("()", "")
|
||||
# cleans some whitespace mess
|
||||
remainder = remainder.replace(" ", " ")
|
||||
|
||||
return remainder.strip()
|
||||
|
||||
def parseFilename(self, filename):
|
||||
|
||||
# remove the path
|
||||
filename = os.path.basename(filename)
|
||||
|
||||
# remove the extension
|
||||
filename = os.path.splitext(filename)[0]
|
||||
|
||||
# url decode, just in case
|
||||
filename = unquote(filename)
|
||||
|
||||
# sometimes archives get messed up names from too many decodings
|
||||
# often url encodings will break and leave "_28" and "_29" in place
|
||||
# of "(" and ")" see if there are a number of these, and replace them
|
||||
if filename.count("_28") > 1 and filename.count("_29") > 1:
|
||||
filename = filename.replace("_28", "(")
|
||||
filename = filename.replace("_29", ")")
|
||||
|
||||
self.issue, issue_start, issue_end = self.getIssueNumber(filename)
|
||||
self.series, self.volume = self.getSeriesName(filename, issue_start)
|
||||
|
||||
# provides proper value when the filename doesn't have a issue number
|
||||
if issue_end == 0:
|
||||
issue_end = len(self.series)
|
||||
|
||||
self.year = self.getYear(filename, issue_end)
|
||||
self.issue_count = self.getIssueCount(filename, issue_end)
|
||||
self.remainder = self.getRemainder(
|
||||
filename, self.year, self.issue_count, self.volume, issue_end)
|
||||
|
||||
if self.issue != "":
|
||||
# strip off leading zeros
|
||||
self.issue = self.issue.lstrip("0")
|
||||
if self.issue == "":
|
||||
self.issue = "0"
|
||||
if self.issue[0] == ".":
|
||||
self.issue = "0" + self.issue
|
||||
from comicapi.filenameparser import *
|
@ -71,7 +71,7 @@ class FileSelectionList(QWidget):
|
||||
|
||||
self.settings = settings
|
||||
|
||||
utils.reduceWidgetFontSize(self.twList)
|
||||
reduceWidgetFontSize(self.twList)
|
||||
|
||||
self.twList.setColumnCount(6)
|
||||
#self.twlist.setHorizontalHeaderLabels (["File", "Folder", "CR", "CBL", ""])
|
||||
@ -204,7 +204,7 @@ class FileSelectionList(QWidget):
|
||||
break
|
||||
progdialog.setValue(idx)
|
||||
progdialog.setLabelText(f)
|
||||
utils.centerWindowOnParent(progdialog)
|
||||
centerWindowOnParent(progdialog)
|
||||
QCoreApplication.processEvents()
|
||||
row = self.addPathItem(f)
|
||||
if firstAdded is None and row is not None:
|
||||
@ -282,7 +282,7 @@ class FileSelectionList(QWidget):
|
||||
if self.isListDupe(path):
|
||||
return self.getCurrentListRow(path)
|
||||
|
||||
ca = ComicArchive(path, self.settings.rar_exe_path)
|
||||
ca = ComicArchive(path, self.settings.rar_exe_path, ComicTaggerSettings.getGraphic('nocover.png'))
|
||||
|
||||
if ca.seemsToBeAComicArchive():
|
||||
row = self.twList.rowCount()
|
||||
|
@ -60,8 +60,8 @@ class IssueSelectionWindow(QtGui.QDialog):
|
||||
gridlayout.addWidget(self.coverWidget)
|
||||
gridlayout.setContentsMargins(0, 0, 0, 0)
|
||||
|
||||
utils.reduceWidgetFontSize(self.twList)
|
||||
utils.reduceWidgetFontSize(self.teDescription, 1)
|
||||
reduceWidgetFontSize(self.twList)
|
||||
reduceWidgetFontSize(self.teDescription, 1)
|
||||
|
||||
self.setWindowFlags(self.windowFlags() |
|
||||
QtCore.Qt.WindowSystemMenuHint |
|
||||
|
@ -68,7 +68,7 @@ class PageLoader(QtCore.QThread):
|
||||
return
|
||||
|
||||
if image_data is not None:
|
||||
img = utils.getQImageFromData(image_data)
|
||||
img = getQImageFromData(image_data)
|
||||
|
||||
if self.abandoned:
|
||||
return
|
||||
|
@ -192,6 +192,10 @@ class TaggerWindow(QtGui.QMainWindow):
|
||||
validator = QtGui.QIntValidator(1, 12, self)
|
||||
self.lePubMonth.setValidator(validator)
|
||||
|
||||
# TODO: for now keep it simple, ideally we should check the full date
|
||||
validator = QtGui.QIntValidator(1, 31, self)
|
||||
self.lePubDay.setValidator(validator)
|
||||
|
||||
validator = QtGui.QIntValidator(1, 99999, self)
|
||||
self.leIssueCount.setValidator(validator)
|
||||
self.leVolumeNum.setValidator(validator)
|
||||
@ -784,6 +788,7 @@ class TaggerWindow(QtGui.QMainWindow):
|
||||
assignText(self.lePublisher, md.publisher)
|
||||
assignText(self.lePubMonth, md.month)
|
||||
assignText(self.lePubYear, md.year)
|
||||
assignText(self.lePubDay, md.day)
|
||||
assignText(self.leGenre, md.genre)
|
||||
assignText(self.leImprint, md.imprint)
|
||||
assignText(self.teComments, md.comments)
|
||||
@ -910,6 +915,7 @@ class TaggerWindow(QtGui.QMainWindow):
|
||||
md.publisher = xlate(self.lePublisher.text(), "str")
|
||||
md.month = xlate(self.lePubMonth.text(), "int")
|
||||
md.year = xlate(self.lePubYear.text(), "int")
|
||||
md.day = xlate(self.lePubDay.text(), "int")
|
||||
md.genre = xlate(self.leGenre.text(), "str")
|
||||
md.imprint = xlate(self.leImprint.text(), "str")
|
||||
md.comments = xlate(self.teComments.toPlainText(), "str")
|
||||
|
Loading…
Reference in New Issue
Block a user