264 lines
7.2 KiB
Python
264 lines
7.2 KiB
Python
"""Some generic utilities"""
|
||
|
||
# Copyright 2012-2014 Anthony Beville
|
||
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
|
||
import json
|
||
import logging
|
||
import os
|
||
import pathlib
|
||
import re
|
||
import unicodedata
|
||
from collections import defaultdict
|
||
from typing import Any, Iterable, List, Optional, Union
|
||
|
||
import pycountry
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class UtilsVars:
|
||
already_fixed_encoding = False
|
||
|
||
|
||
def get_recursive_filelist(pathlist: List[str]) -> List[str]:
|
||
"""Get a recursive list of of all files under all path items in the list"""
|
||
|
||
filelist = []
|
||
for p in pathlist:
|
||
# if path is a folder, walk it recursively, and all files underneath
|
||
if not isinstance(p, str):
|
||
# it's probably a QString
|
||
p = str(p)
|
||
|
||
if os.path.isdir(p):
|
||
for root, _, files in os.walk(p):
|
||
for f in files:
|
||
if not isinstance(f, str):
|
||
# it's probably a QString
|
||
f = str(f)
|
||
filelist.append(os.path.join(root, f))
|
||
else:
|
||
filelist.append(p)
|
||
|
||
return filelist
|
||
|
||
|
||
def list_to_string(lst: List[Union[str, Any]]) -> str:
|
||
string = ""
|
||
if lst is not None:
|
||
for item in lst:
|
||
if len(string) > 0:
|
||
string += ", "
|
||
string += item
|
||
return string
|
||
|
||
|
||
def add_to_path(dirname: str) -> None:
|
||
if dirname is not None and dirname != "":
|
||
|
||
# verify that path doesn't already contain the given dirname
|
||
tmpdirname = re.escape(dirname)
|
||
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
|
||
|
||
match = re.search(pattern, os.environ["PATH"])
|
||
if not match:
|
||
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
|
||
|
||
|
||
def which(program: str) -> Optional[str]:
|
||
"""Returns path of the executable, if it exists"""
|
||
|
||
def is_exe(fpath: str) -> bool:
|
||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||
|
||
fpath, _ = os.path.split(program)
|
||
if fpath:
|
||
if is_exe(program):
|
||
return program
|
||
else:
|
||
for path in os.environ["PATH"].split(os.pathsep):
|
||
exe_file = os.path.join(path, program)
|
||
if is_exe(exe_file):
|
||
return exe_file
|
||
|
||
return None
|
||
|
||
|
||
def xlate(data: Any, is_int: bool = False) -> Any:
|
||
if data is None or data == "":
|
||
return None
|
||
if is_int:
|
||
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890")))
|
||
if i == "0":
|
||
return "0"
|
||
if i == "":
|
||
return None
|
||
return int(i)
|
||
|
||
return str(data)
|
||
|
||
|
||
def remove_articles(text: str) -> str:
|
||
text = text.lower()
|
||
articles = [
|
||
"&",
|
||
"a",
|
||
"am",
|
||
"an",
|
||
"and",
|
||
"as",
|
||
"at",
|
||
"be",
|
||
"but",
|
||
"by",
|
||
"for",
|
||
"if",
|
||
"is",
|
||
"issue",
|
||
"it",
|
||
"it's",
|
||
"its",
|
||
"itself",
|
||
"of",
|
||
"or",
|
||
"so",
|
||
"the",
|
||
"the",
|
||
"with",
|
||
]
|
||
new_text = ""
|
||
for word in text.split(" "):
|
||
if word not in articles:
|
||
new_text += word + " "
|
||
|
||
new_text = new_text[:-1]
|
||
|
||
return new_text
|
||
|
||
|
||
def sanitize_title(text: str) -> str:
|
||
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
|
||
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
|
||
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
|
||
# comicvine keeps apostrophes a part of the word
|
||
text = text.replace("'", "")
|
||
text = text.replace('"', "")
|
||
# comicvine ignores punctuation and accents
|
||
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
|
||
# remove extra space and articles and all lower case
|
||
text = remove_articles(text).lower().strip()
|
||
|
||
return text
|
||
|
||
|
||
def unique_file(file_name: str) -> str:
|
||
counter = 1
|
||
file_name_parts = os.path.splitext(file_name)
|
||
while True:
|
||
if not os.path.lexists(file_name):
|
||
return file_name
|
||
file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1]
|
||
counter += 1
|
||
|
||
|
||
languages: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
|
||
|
||
countries: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
|
||
|
||
for c in pycountry.countries:
|
||
if "alpha_2" in c._fields:
|
||
countries[c.alpha_2] = c.name
|
||
|
||
for lng in pycountry.languages:
|
||
if "alpha_2" in lng._fields:
|
||
languages[lng.alpha_2] = lng.name
|
||
|
||
|
||
def get_language_from_iso(iso: Optional[str]) -> Optional[str]:
|
||
return languages[iso]
|
||
|
||
|
||
def get_language(string: Optional[str]) -> Optional[str]:
|
||
if string is None:
|
||
return None
|
||
|
||
lang = get_language_from_iso(string)
|
||
|
||
if lang is None:
|
||
try:
|
||
return str(pycountry.languages.lookup(string).name)
|
||
except:
|
||
return None
|
||
return lang
|
||
|
||
|
||
def get_publisher(publisher: str) -> tuple[str, str]:
|
||
if publisher is None:
|
||
return ("", "")
|
||
imprint = ""
|
||
|
||
for pub in publishers.values():
|
||
imprint, publisher, ok = pub[publisher]
|
||
if ok:
|
||
break
|
||
|
||
return (imprint, publisher)
|
||
|
||
|
||
def update_publishers(new_publishers: dict[str, dict[str, str]]) -> None:
|
||
for publisher in new_publishers:
|
||
if publisher in publishers:
|
||
publishers[publisher].update(new_publishers[publisher])
|
||
else:
|
||
publishers[publisher] = ImprintDict(publisher, new_publishers[publisher])
|
||
|
||
|
||
class ImprintDict(dict):
|
||
"""
|
||
ImprintDict takes a publisher and a dict or mapping of lowercased
|
||
imprint names to the proper imprint name. Retreiving a value from an
|
||
ImprintDict returns a tuple of (imprint, publisher, keyExists).
|
||
if the key does not exist the key is returned as the publisher unchanged
|
||
"""
|
||
|
||
def __init__(self, publisher: str, mapping: Iterable = (), **kwargs: Any):
|
||
super().__init__(mapping, **kwargs)
|
||
self.publisher = publisher
|
||
|
||
def __missing__(self, key: str) -> None:
|
||
return None
|
||
|
||
def __getitem__(self, k: str) -> tuple[str, str, bool]:
|
||
item = super().__getitem__(k.casefold())
|
||
if k.casefold() == self.publisher.casefold():
|
||
return ("", self.publisher, True)
|
||
if item is None:
|
||
return ("", k, False)
|
||
else:
|
||
return (item, self.publisher, True)
|
||
|
||
def copy(self) -> "ImprintDict":
|
||
return ImprintDict(self.publisher, super().copy())
|
||
|
||
|
||
publishers: dict[str, ImprintDict] = {}
|
||
|
||
|
||
def load_publishers() -> None:
|
||
try:
|
||
update_publishers(json.loads((pathlib.Path(__file__).parent / "data" / "publishers.json").read_text("utf-8")))
|
||
except Exception:
|
||
logger.exception("Failed to load publishers.json; The are no publishers or imprints loaded")
|