"""Some generic utilities"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
import os
import pathlib
import re
import unicodedata
from collections import defaultdict
from typing import Any, List, Optional, Union
import pycountry
logger = logging.getLogger(__name__)
class UtilsVars:
already_fixed_encoding = False
def get_recursive_filelist(pathlist: List[str]) -> List[str]:
"""Get a recursive list of of all files under all path items in the list"""
filelist = []
for p in pathlist:
# if path is a folder, walk it recursively, and all files underneath
if not isinstance(p, str):
# it's probably a QString
p = str(p)
if os.path.isdir(p):
for root, _, files in os.walk(p):
for f in files:
if not isinstance(f, str):
# it's probably a QString
f = str(f)
filelist.append(os.path.join(root, f))
return filelist
def list_to_string(lst: List[Union[str, Any]]) -> str:
string = ""
if lst is not None:
for item in lst:
if len(string) > 0:
string += ", "
string += item
return string
def add_to_path(dirname: str) -> None:
if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
match = re.search(pattern, os.environ["PATH"])
if not match:
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
def which(program: str) -> Optional[str]:
"""Returns path of the executable, if it exists"""
def is_exe(fpath: str) -> bool:
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def xlate(data: Any, is_int: bool = False) -> Any:
if data is None or data == "":
return None
if is_int:
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890")))
if i == "0":
return "0"
if i == "":
return None
return int(i)
return str(data)
def remove_articles(text: str) -> str:
text = text.lower()
articles = [
new_text = ""
for word in text.split(" "):
if word not in articles:
new_text += word + " "
new_text = new_text[:-1]
return new_text
def sanitize_title(text: str) -> str:
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄2 not 1/2
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
# comicvine ignores punctuation and accents
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
# remove extra space and articles and all lower case
text = remove_articles(text).lower().strip()
return text
def unique_file(file_name: str) -> str:
counter = 1
file_name_parts = os.path.splitext(file_name)
while True:
if not os.path.lexists(file_name):
return file_name
file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1]
counter += 1
languages: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
countries: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
for c in pycountry.countries:
if "alpha_2" in c._fields:
countries[c.alpha_2] = c.name
for lng in pycountry.languages:
if "alpha_2" in lng._fields:
languages[lng.alpha_2] = lng.name
def get_language_from_iso(iso: Optional[str]) -> Optional[str]:
return languages[iso]
def get_language(string: Optional[str]) -> Optional[str]:
if string is None:
return None
lang = get_language_from_iso(string)
if lang is None:
return str(pycountry.languages.lookup(string).name)
return None
return lang
def get_publisher(publisher: str) -> tuple[str, str]:
if publisher is None:
return ("", "")
imprint = ""
for pub in publishers.values():
imprint, publisher, ok = pub[publisher]
if ok:
return (imprint, publisher)
def update_publishers(new_publishers: dict[str, dict[str, str]]) -> None:
for publisher in new_publishers:
if publisher in publishers:
publishers[publisher] = ImprintDict(publisher, new_publishers[publisher])
class ImprintDict(dict):
ImprintDict takes a publisher and a dict or mapping of lowercased
imprint names to the proper imprint name. Retreiving a value from an
ImprintDict returns a tuple of (imprint, publisher, keyExists).
if the key does not exist the key is returned as the publisher unchanged
def __init__(self, publisher, mapping=(), **kwargs):
super().__init__(mapping, **kwargs)
self.publisher = publisher
def __missing__(self, key: str) -> None:
return None
def __getitem__(self, k: str) -> tuple[str, str, bool]:
item = super().__getitem__(k.casefold())
if k.casefold() == self.publisher.casefold():
return ("", self.publisher, True)
if item is None:
return ("", k, False)
return (item, self.publisher, True)
def copy(self) -> "ImprintDict":
return ImprintDict(self.publisher, super().copy())
publishers: dict[str, ImprintDict] = {}
def load_publishers() -> None:
update_publishers(json.loads((pathlib.Path(__file__).parent / "data" / "publishers.json").read_text("utf-8")))
except Exception:
logger.exception("Failed to load publishers.json; The are no publishers or imprints loaded")