comictagger/comicapi/utils.py
2022-05-24 11:45:17 -07:00

264 lines
7.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Some generic utilities"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
import os
import pathlib
import re
import unicodedata
from collections import defaultdict
from typing import Any, Iterable, List, Optional, Union
import pycountry
logger = logging.getLogger(__name__)
class UtilsVars:
already_fixed_encoding = False
def get_recursive_filelist(pathlist: List[str]) -> List[str]:
"""Get a recursive list of of all files under all path items in the list"""
filelist = []
for p in pathlist:
# if path is a folder, walk it recursively, and all files underneath
if not isinstance(p, str):
# it's probably a QString
p = str(p)
if os.path.isdir(p):
for root, _, files in os.walk(p):
for f in files:
if not isinstance(f, str):
# it's probably a QString
f = str(f)
filelist.append(os.path.join(root, f))
else:
filelist.append(p)
return filelist
def list_to_string(lst: List[Union[str, Any]]) -> str:
string = ""
if lst is not None:
for item in lst:
if len(string) > 0:
string += ", "
string += item
return string
def add_to_path(dirname: str) -> None:
if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
match = re.search(pattern, os.environ["PATH"])
if not match:
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
def which(program: str) -> Optional[str]:
"""Returns path of the executable, if it exists"""
def is_exe(fpath: str) -> bool:
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def xlate(data: Any, is_int: bool = False) -> Any:
if data is None or data == "":
return None
if is_int:
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890")))
if i == "0":
return "0"
if i == "":
return None
return int(i)
return str(data)
def remove_articles(text: str) -> str:
text = text.lower()
articles = [
"&",
"a",
"am",
"an",
"and",
"as",
"at",
"be",
"but",
"by",
"for",
"if",
"is",
"issue",
"it",
"it's",
"its",
"itself",
"of",
"or",
"so",
"the",
"the",
"with",
]
new_text = ""
for word in text.split(" "):
if word not in articles:
new_text += word + " "
new_text = new_text[:-1]
return new_text
def sanitize_title(text: str) -> str:
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
# comicvine ignores punctuation and accents
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
# remove extra space and articles and all lower case
text = remove_articles(text).lower().strip()
return text
def unique_file(file_name: str) -> str:
counter = 1
file_name_parts = os.path.splitext(file_name)
while True:
if not os.path.lexists(file_name):
return file_name
file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1]
counter += 1
languages: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
countries: dict[Optional[str], Optional[str]] = defaultdict(lambda: None)
for c in pycountry.countries:
if "alpha_2" in c._fields:
countries[c.alpha_2] = c.name
for lng in pycountry.languages:
if "alpha_2" in lng._fields:
languages[lng.alpha_2] = lng.name
def get_language_from_iso(iso: Optional[str]) -> Optional[str]:
return languages[iso]
def get_language(string: Optional[str]) -> Optional[str]:
if string is None:
return None
lang = get_language_from_iso(string)
if lang is None:
try:
return str(pycountry.languages.lookup(string).name)
except:
return None
return lang
def get_publisher(publisher: str) -> tuple[str, str]:
if publisher is None:
return ("", "")
imprint = ""
for pub in publishers.values():
imprint, publisher, ok = pub[publisher]
if ok:
break
return (imprint, publisher)
def update_publishers(new_publishers: dict[str, dict[str, str]]) -> None:
for publisher in new_publishers:
if publisher in publishers:
publishers[publisher].update(new_publishers[publisher])
else:
publishers[publisher] = ImprintDict(publisher, new_publishers[publisher])
class ImprintDict(dict):
"""
ImprintDict takes a publisher and a dict or mapping of lowercased
imprint names to the proper imprint name. Retreiving a value from an
ImprintDict returns a tuple of (imprint, publisher, keyExists).
if the key does not exist the key is returned as the publisher unchanged
"""
def __init__(self, publisher: str, mapping: Iterable = (), **kwargs: Any):
super().__init__(mapping, **kwargs)
self.publisher = publisher
def __missing__(self, key: str) -> None:
return None
def __getitem__(self, k: str) -> tuple[str, str, bool]:
item = super().__getitem__(k.casefold())
if k.casefold() == self.publisher.casefold():
return ("", self.publisher, True)
if item is None:
return ("", k, False)
else:
return (item, self.publisher, True)
def copy(self) -> "ImprintDict":
return ImprintDict(self.publisher, super().copy())
publishers: dict[str, ImprintDict] = {}
def load_publishers() -> None:
try:
update_publishers(json.loads((pathlib.Path(__file__).parent / "data" / "publishers.json").read_text("utf-8")))
except Exception:
logger.exception("Failed to load publishers.json; The are no publishers or imprints loaded")