Improve issue identification

Move title sanitizing code to utils module
Update issue identifier to compare sanitized names
This commit is contained in:
lordwelch 2021-09-26 17:06:30 -07:00
parent 9abd8f0cc8
commit a884974a9c

View File

@ -21,6 +21,7 @@ import re
import platform import platform
import locale import locale
import codecs import codecs
import unicodedata
class UtilsVars: class UtilsVars:
@ -151,6 +152,21 @@ def removearticles(text):
return newText return newText
def sanitize_title(text):
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace("\"", "")
# comicvine ignores punctuation and accents
text = re.sub(r'[^A-Za-z0-9]+',' ', text)
# remove extra space and articles and all lower case
text = removearticles(text).lower().strip()
return text
def unique_file(file_name): def unique_file(file_name):
counter = 1 counter = 1
# returns ('/path/file', '.ext') # returns ('/path/file', '.ext')