242 lines
6.2 KiB
Python
242 lines
6.2 KiB
Python
"""Some generic utilities"""
|
||
|
||
# Copyright 2012-2014 Anthony Beville
|
||
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
|
||
import codecs
|
||
import locale
|
||
import logging
|
||
import os
|
||
import platform
|
||
import re
|
||
import sys
|
||
import unicodedata
|
||
from collections import defaultdict
|
||
|
||
import pycountry
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class UtilsVars:
|
||
already_fixed_encoding = False
|
||
|
||
|
||
def indent(elem, level=0):
|
||
# for making the XML output readable
|
||
i = "\n" + level * " "
|
||
if len(elem):
|
||
if not elem.text or not elem.text.strip():
|
||
elem.text = i + " "
|
||
if not elem.tail or not elem.tail.strip():
|
||
elem.tail = i
|
||
for ele in elem:
|
||
indent(ele, level + 1)
|
||
if not elem.tail or not elem.tail.strip():
|
||
elem.tail = i
|
||
else:
|
||
if level and (not elem.tail or not elem.tail.strip()):
|
||
elem.tail = i
|
||
|
||
|
||
def get_actual_preferred_encoding():
|
||
preferred_encoding = locale.getpreferredencoding()
|
||
if platform.system() == "Darwin":
|
||
preferred_encoding = "utf-8"
|
||
return preferred_encoding
|
||
|
||
|
||
def fix_output_encoding():
|
||
if not UtilsVars.already_fixed_encoding:
|
||
# this reads the environment and inits the right locale
|
||
locale.setlocale(locale.LC_ALL, "")
|
||
|
||
# try to make stdout/stderr encodings happy for unicode printing
|
||
preferred_encoding = get_actual_preferred_encoding()
|
||
sys.stdout = codecs.getwriter(preferred_encoding)(sys.stdout)
|
||
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
|
||
UtilsVars.already_fixed_encoding = True
|
||
|
||
|
||
def get_recursive_filelist(pathlist):
|
||
"""Get a recursive list of of all files under all path items in the list"""
|
||
|
||
filelist = []
|
||
for p in pathlist:
|
||
# if path is a folder, walk it recursively, and all files underneath
|
||
if not isinstance(p, str):
|
||
# it's probably a QString
|
||
p = str(p)
|
||
|
||
if os.path.isdir(p):
|
||
for root, _, files in os.walk(p):
|
||
for f in files:
|
||
if not isinstance(f, str):
|
||
# it's probably a QString
|
||
f = str(f)
|
||
filelist.append(os.path.join(root, f))
|
||
else:
|
||
filelist.append(p)
|
||
|
||
return filelist
|
||
|
||
|
||
def list_to_string(lst):
|
||
string = ""
|
||
if lst is not None:
|
||
for item in lst:
|
||
if len(string) > 0:
|
||
string += ", "
|
||
string += item
|
||
return string
|
||
|
||
|
||
def add_to_path(dirname):
|
||
if dirname is not None and dirname != "":
|
||
|
||
# verify that path doesn't already contain the given dirname
|
||
tmpdirname = re.escape(dirname)
|
||
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
|
||
|
||
match = re.search(pattern, os.environ["PATH"])
|
||
if not match:
|
||
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
|
||
|
||
|
||
def which(program):
|
||
"""Returns path of the executable, if it exists"""
|
||
|
||
def is_exe(fpath):
|
||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||
|
||
fpath, _ = os.path.split(program)
|
||
if fpath:
|
||
if is_exe(program):
|
||
return program
|
||
else:
|
||
for path in os.environ["PATH"].split(os.pathsep):
|
||
exe_file = os.path.join(path, program)
|
||
if is_exe(exe_file):
|
||
return exe_file
|
||
|
||
return None
|
||
|
||
|
||
def xlate(data, is_int=False):
|
||
if data is None or data == "":
|
||
return None
|
||
if is_int:
|
||
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890")))
|
||
if i == "0":
|
||
return "0"
|
||
if i == "":
|
||
return None
|
||
return int(i)
|
||
|
||
return str(data)
|
||
|
||
|
||
def remove_articles(text):
|
||
text = text.lower()
|
||
articles = [
|
||
"&",
|
||
"a",
|
||
"am",
|
||
"an",
|
||
"and",
|
||
"as",
|
||
"at",
|
||
"be",
|
||
"but",
|
||
"by",
|
||
"for",
|
||
"if",
|
||
"is",
|
||
"issue",
|
||
"it",
|
||
"it's",
|
||
"its",
|
||
"itself",
|
||
"of",
|
||
"or",
|
||
"so",
|
||
"the",
|
||
"the",
|
||
"with",
|
||
]
|
||
new_text = ""
|
||
for word in text.split(" "):
|
||
if word not in articles:
|
||
new_text += word + " "
|
||
|
||
new_text = new_text[:-1]
|
||
|
||
return new_text
|
||
|
||
|
||
def sanitize_title(text):
|
||
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
|
||
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
|
||
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
|
||
# comicvine keeps apostrophes a part of the word
|
||
text = text.replace("'", "")
|
||
text = text.replace('"', "")
|
||
# comicvine ignores punctuation and accents
|
||
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
|
||
# remove extra space and articles and all lower case
|
||
text = remove_articles(text).lower().strip()
|
||
|
||
return text
|
||
|
||
|
||
def unique_file(file_name):
|
||
counter = 1
|
||
file_name_parts = os.path.splitext(file_name)
|
||
while True:
|
||
if not os.path.lexists(file_name):
|
||
return file_name
|
||
file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1]
|
||
counter += 1
|
||
|
||
|
||
languages = defaultdict(lambda: None)
|
||
|
||
countries = defaultdict(lambda: None)
|
||
|
||
for c in pycountry.countries:
|
||
if "alpha_2" in c._fields:
|
||
countries[c.alpha_2] = c.name
|
||
|
||
for lng in pycountry.languages:
|
||
if "alpha_2" in lng._fields:
|
||
languages[lng.alpha_2] = lng.name
|
||
|
||
|
||
def get_language_from_iso(iso: str):
|
||
return languages[iso]
|
||
|
||
|
||
def get_language(string):
|
||
if string is None:
|
||
return None
|
||
|
||
lang = get_language_from_iso(string)
|
||
|
||
if lang is None:
|
||
try:
|
||
return pycountry.languages.lookup(string).name
|
||
except:
|
||
return None
|
||
return lang
|