comictagger/comicapi/utils.py
Timmy Welch e10f7dd7a7 Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6

Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':'  - format compatiblity with black
E501 - Line too long          - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
                                lot of overhead and there are already
                                many in the codebase

These changes, along with some manual fixes creates much more readable code.
See examples below:

diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:

-            if credit['role'].lower() in set(self.editor_synonyms):
-                ET.SubElement(
-                    root,
-                    'editor').text = "{0}".format(
-                    credit['person'])

@@ -174,2 +169,4 @@ class CoMet:
         self.indent(root)
+            if credit["role"].lower() in set(self.editor_synonyms):
+                ET.SubElement(root, "editor").text = str(credit["person"])

diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
             self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
-        self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
-            "Accept and Write Tags")
+        self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")

diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
                 if opts.raw:
-                    print((
-                        "{0}".format(
-                            str(
-                                ca.readRawCIX(),
-                                errors='ignore'))))
+                    print(ca.read_raw_cix())
                 else:
2022-04-02 14:21:37 -07:00

239 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Some generic utilities"""
# Copyright 2012-2014 Anthony Beville
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import codecs
import locale
import os
import platform
import re
import sys
import unicodedata
from collections import defaultdict
import pycountry
class UtilsVars:
already_fixed_encoding = False
def indent(elem, level=0):
# for making the XML output readable
i = "\n" + level * " "
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + " "
if not elem.tail or not elem.tail.strip():
elem.tail = i
for ele in elem:
indent(ele, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def get_actual_preferred_encoding():
preferred_encoding = locale.getpreferredencoding()
if platform.system() == "Darwin":
preferred_encoding = "utf-8"
return preferred_encoding
def fix_output_encoding():
if not UtilsVars.already_fixed_encoding:
# this reads the environment and inits the right locale
locale.setlocale(locale.LC_ALL, "")
# try to make stdout/stderr encodings happy for unicode printing
preferred_encoding = get_actual_preferred_encoding()
sys.stdout = codecs.getwriter(preferred_encoding)(sys.stdout)
sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr)
UtilsVars.already_fixed_encoding = True
def get_recursive_filelist(pathlist):
"""Get a recursive list of of all files under all path items in the list"""
filelist = []
for p in pathlist:
# if path is a folder, walk it recursively, and all files underneath
if not isinstance(p, str):
# it's probably a QString
p = str(p)
if os.path.isdir(p):
for root, _, files in os.walk(p):
for f in files:
if not isinstance(f, str):
# it's probably a QString
f = str(f)
filelist.append(os.path.join(root, f))
else:
filelist.append(p)
return filelist
def list_to_string(lst):
string = ""
if lst is not None:
for item in lst:
if len(string) > 0:
string += ", "
string += item
return string
def add_to_path(dirname):
if dirname is not None and dirname != "":
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
match = re.search(pattern, os.environ["PATH"])
if not match:
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
def which(program):
"""Returns path of the executable, if it exists"""
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, _ = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def xlate(data, is_int=False):
if data is None or data == "":
return None
if is_int:
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890"), "1234567890")))
if i == "0":
return "0"
if i == "":
return None
return int(i)
return str(data)
def remove_articles(text):
text = text.lower()
articles = [
"&",
"a",
"am",
"an",
"and",
"as",
"at",
"be",
"but",
"by",
"for",
"if",
"is",
"issue",
"it",
"it's",
"its",
"itself",
"of",
"or",
"so",
"the",
"the",
"with",
]
new_text = ""
for word in text.split(" "):
if word not in articles:
new_text += word + " "
new_text = new_text[:-1]
return new_text
def sanitize_title(text):
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
# this will probably cause issues with titles in other character sets e.g. chinese, japanese
text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii")
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
# comicvine ignores punctuation and accents
text = re.sub(r"[^A-Za-z0-9]+", " ", text)
# remove extra space and articles and all lower case
text = remove_articles(text).lower().strip()
return text
def unique_file(file_name):
counter = 1
file_name_parts = os.path.splitext(file_name)
while True:
if not os.path.lexists(file_name):
return file_name
file_name = file_name_parts[0] + " (" + str(counter) + ")" + file_name_parts[1]
counter += 1
languages = defaultdict(lambda: None)
countries = defaultdict(lambda: None)
for c in pycountry.countries:
if "alpha_2" in c._fields:
countries[c.alpha_2] = c.name
for lng in pycountry.languages:
if "alpha_2" in lng._fields:
languages[lng.alpha_2] = lng.name
def get_language_from_iso(iso: str):
return languages[iso]
def get_language(string):
if string is None:
return None
lang = get_language_from_iso(string)
if lang is None:
try:
return pycountry.languages.lookup(string).name
except:
return None
return lang