Improve issue identification
Move title sanitizing code to utils module Update issue identifier to compare sanitized names
This commit is contained in:
parent
9abd8f0cc8
commit
a884974a9c
@ -21,6 +21,7 @@ import re
|
||||
import platform
|
||||
import locale
|
||||
import codecs
|
||||
import unicodedata
|
||||
|
||||
|
||||
class UtilsVars:
|
||||
@ -151,6 +152,21 @@ def removearticles(text):
|
||||
return newText
|
||||
|
||||
|
||||
def sanitize_title(text):
|
||||
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
|
||||
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
|
||||
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii')
|
||||
# comicvine keeps apostrophes a part of the word
|
||||
text = text.replace("'", "")
|
||||
text = text.replace("\"", "")
|
||||
# comicvine ignores punctuation and accents
|
||||
text = re.sub(r'[^A-Za-z0-9]+',' ', text)
|
||||
# remove extra space and articles and all lower case
|
||||
text = removearticles(text).lower().strip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def unique_file(file_name):
|
||||
counter = 1
|
||||
# returns ('/path/file', '.ext')
|
||||
|
Loading…
x
Reference in New Issue
Block a user