comictagger/comicapi/utils.py

408 lines
11 KiB
Python
Raw Normal View History

"""Some generic utilities"""
# Copyright 2012-2014 ComicTagger Authors
2022-06-02 18:32:16 -07:00
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
2022-06-02 18:32:16 -07:00
#
# http://www.apache.org/licenses/LICENSE-2.0
2022-06-02 18:32:16 -07:00
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2022-06-02 18:32:16 -07:00
from __future__ import annotations
import json
2022-04-04 18:59:26 -07:00
import logging
import os
import pathlib
2023-04-22 22:00:26 -07:00
import platform
import unicodedata
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
from collections import defaultdict
2023-04-22 22:00:26 -07:00
from collections.abc import Iterable, Mapping
2022-06-06 16:58:27 -07:00
from shutil import which # noqa: F401
from typing import Any
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
import comicapi.data
from comicapi import filenamelexer, filenameparser
2023-04-22 22:00:26 -07:00
try:
import icu
del icu
icu_available = True
except ImportError:
icu_available = False
2022-04-04 18:59:26 -07:00
logger = logging.getLogger(__name__)
2023-04-22 22:00:26 -07:00
def _custom_key(tup):
2023-06-22 20:11:40 -07:00
import natsort
2023-04-22 22:00:26 -07:00
lst = []
for x in natsort.os_sort_keygen()(tup):
ret = x
if len(x) > 1 and isinstance(x[1], int) and isinstance(x[0], str) and x[0] == "":
ret = ("a", *x[1:])
lst.append(ret)
return tuple(lst)
def os_sorted(lst: Iterable) -> Iterable:
2023-06-22 20:11:40 -07:00
import natsort
2023-04-22 22:00:26 -07:00
key = _custom_key
if icu_available or platform.system() == "Windows":
key = natsort.os_sort_keygen()
return sorted(lst, key=key)
def parse_filename(
filename: str,
complicated_parser: bool = False,
remove_c2c: bool = False,
remove_fcbd: bool = False,
remove_publisher: bool = False,
split_words: bool = False,
allow_issue_start_with_letter: bool = False,
protofolius_issue_number_scheme: bool = False,
) -> filenameparser.FilenameInfo:
if not filename:
return filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue="",
issue_count="",
publisher="",
remainder="",
series="",
title="",
volume="",
volume_count="",
year="",
format="",
)
if split_words:
import wordninja
filename, ext = os.path.splitext(filename)
filename = " ".join(wordninja.split(filename)) + ext
if complicated_parser:
lex = filenamelexer.Lex(filename, allow_issue_start_with_letter)
p = filenameparser.Parse(
lex.items,
remove_c2c=remove_c2c,
remove_fcbd=remove_fcbd,
remove_publisher=remove_publisher,
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
)
return p.filename_info
else:
fnp = filenameparser.FileNameParser()
fnp.parse_filename(filename)
fni = filenameparser.FilenameInfo(
alternate="",
annual=False,
archive="",
c2c=False,
fcbd=False,
issue=fnp.issue,
issue_count=fnp.issue_count,
publisher="",
remainder=fnp.remainder,
series=fnp.series,
title="",
volume=fnp.volume,
volume_count="",
year=fnp.year,
format="",
)
return fni
def combine_notes(existing_notes: str | None, new_notes: str | None, split: str) -> str:
2022-11-24 01:24:15 -08:00
split_notes, split_str, untouched_notes = (existing_notes or "").rpartition(split)
if split_notes or split_str:
return (split_notes + (new_notes or "")).strip()
else:
return (untouched_notes + "\n" + (new_notes or "")).strip()
def parse_date_str(date_str: str | None) -> tuple[int | None, int | None, int | None]:
2022-07-25 11:22:44 -07:00
day = None
month = None
year = None
if date_str:
parts = date_str.split("-")
year = xlate_int(parts[0])
2022-07-25 11:22:44 -07:00
if len(parts) > 1:
month = xlate_int(parts[1])
2022-07-25 11:22:44 -07:00
if len(parts) > 2:
day = xlate_int(parts[2])
2022-07-25 11:22:44 -07:00
return day, month, year
2022-06-02 18:32:16 -07:00
def get_recursive_filelist(pathlist: list[str]) -> list[str]:
"""Get a recursive list of of all files under all path items in the list"""
filelist: list[str] = []
for p in pathlist:
if os.path.isdir(p):
for root, _, files in os.walk(p):
for f in files:
filelist.append(os.path.join(root, f))
elif os.path.exists(p):
filelist.append(p)
return filelist
2022-05-17 13:57:04 -07:00
def add_to_path(dirname: str) -> None:
if dirname:
dirname = os.path.abspath(dirname)
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
paths = [os.path.normpath(x) for x in split(os.environ["PATH"], os.pathsep)]
if dirname not in paths:
paths.insert(0, dirname)
os.environ["PATH"] = os.pathsep.join(paths)
def xlate_int(data: Any) -> int | None:
data = xlate_float(data)
if data is None:
return None
return int(data)
def xlate_float(data: Any) -> float | None:
if isinstance(data, str):
data = data.strip()
if data is None or data == "":
return None
i: str | int | float
if isinstance(data, (int, float)):
i = data
else:
i = str(data).translate(defaultdict(lambda: None, zip((ord(c) for c in "1234567890."), "1234567890.")))
if i == "":
return None
try:
return float(i)
except ValueError:
return None
def xlate(data: Any) -> str | None:
if data is None or isinstance(data, str) and data.strip() == "":
return None
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
return str(data).strip()
def split(s: str | None, c: str) -> list[str]:
s = xlate(s)
if s:
return [x.strip() for x in s.strip().split(c) if x.strip()]
return []
2022-05-17 13:57:04 -07:00
def remove_articles(text: str) -> str:
text = text.casefold()
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
articles = [
"&",
"a",
"am",
"an",
"and",
"as",
"at",
"be",
"but",
"by",
"for",
"if",
"is",
"issue",
"it",
"it's",
"its",
"itself",
"of",
"or",
"so",
"the",
"the",
"with",
]
new_text = ""
for word in text.split():
if word not in articles:
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
new_text += word + " "
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
new_text = new_text[:-1]
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
return new_text
2022-06-07 11:49:56 -07:00
def sanitize_title(text: str, basic: bool = False) -> str:
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 12 not 1/2
2022-07-09 23:26:30 -07:00
text = unicodedata.normalize("NFKD", text).casefold()
# comicvine keeps apostrophes a part of the word
text = text.replace("'", "")
text = text.replace('"', "")
if not basic:
2022-07-09 23:26:30 -07:00
# comicvine ignores punctuation and accents
# remove all characters that are not a letter, separator (space) or number
# replace any "dash punctuation" with a space
# makes sure that batman-superman and self-proclaimed stay separate words
text = "".join(
c if unicodedata.category(c)[0] not in "P" else " " for c in text if unicodedata.category(c)[0] in "LZNP"
2022-07-09 23:26:30 -07:00
)
2022-06-07 11:49:56 -07:00
# remove extra space and articles and all lower case
2022-07-09 23:26:30 -07:00
text = remove_articles(text).strip()
return text
def titles_match(search_title: str, record_title: str, threshold: int = 90) -> bool:
2023-06-22 20:11:40 -07:00
import rapidfuzz.fuzz
2022-07-09 23:26:30 -07:00
sanitized_search = sanitize_title(search_title)
sanitized_record = sanitize_title(record_title)
2023-02-09 19:33:10 -08:00
ratio = int(rapidfuzz.fuzz.ratio(sanitized_search, sanitized_record))
2022-07-08 12:33:00 -07:00
logger.debug(
"search title: %s ; record title: %s ; ratio: %d ; match threshold: %d",
search_title,
record_title,
ratio,
threshold,
)
return ratio >= threshold
2022-07-09 23:26:30 -07:00
def unique_file(file_name: pathlib.Path) -> pathlib.Path:
name = file_name.stem
counter = 1
while True:
if not file_name.exists():
return file_name
file_name = file_name.with_stem(name + " (" + str(counter) + ")")
counter += 1
2023-06-22 20:11:40 -07:00
_languages: dict[str | None, str | None] = defaultdict(lambda: None)
_countries: dict[str | None, str | None] = defaultdict(lambda: None)
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
2023-06-22 20:11:40 -07:00
def countries() -> dict[str | None, str | None]:
if not _countries:
import isocodes
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
for alpha_2, c in isocodes.countries.by_alpha_2:
_countries[alpha_2] = c["name"]
2023-06-22 20:11:40 -07:00
return _countries
def languages() -> dict[str | None, str | None]:
if not _languages:
import isocodes
2023-06-22 20:11:40 -07:00
for alpha_2, lng in isocodes.extendend_languages._sorted_by_index(index="alpha_2"):
_languages[alpha_2] = lng["name"]
2023-06-22 20:11:40 -07:00
return _languages
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
2022-06-02 18:32:16 -07:00
def get_language_from_iso(iso: str | None) -> str | None:
2023-06-22 20:11:40 -07:00
return languages()[iso]
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
2022-10-02 19:33:12 -07:00
def get_language_iso(string: str | None) -> str | None:
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
if string is None:
return None
import isocodes
2023-06-22 20:11:40 -07:00
# Return current string if all else fails
2022-10-02 19:33:12 -07:00
lang = string.casefold()
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
found = None
for lng in isocodes.extendend_languages.items:
for x in ("alpha_2", "alpha_3", "bibliographic", "common_name", "name"):
if x in lng and lng[x].casefold() == lang:
found = lng
if found:
break
if found:
return found.get("alpha_2", None)
Code cleanup Remove no longer used google scripts Remove convenience files from comicataggerlib and import comicapi directly Add type-hints to facilitate auto-complete tools Make PyQt5 code more compatible with PyQt6 Implement automatic tooling isort and black for code formatting Line length has been set to 120 flake8 for code standards with exceptions: E203 - Whitespace before ':' - format compatiblity with black E501 - Line too long - flake8 line limit cannot be set E722 - Do not use bare except - fixing bare except statements is a lot of overhead and there are already many in the codebase These changes, along with some manual fixes creates much more readable code. See examples below: diff --git a/comicapi/comet.py b/comicapi/comet.py index d1741c5..52dc195 100644 --- a/comicapi/comet.py +++ b/comicapi/comet.py @@ -166,7 +166,2 @@ class CoMet: - if credit['role'].lower() in set(self.editor_synonyms): - ET.SubElement( - root, - 'editor').text = "{0}".format( - credit['person']) @@ -174,2 +169,4 @@ class CoMet: self.indent(root) + if credit["role"].lower() in set(self.editor_synonyms): + ET.SubElement(root, "editor").text = str(credit["person"]) diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py index 4338176..9219f01 100644 --- a/comictaggerlib/autotagmatchwindow.py +++ b/comictaggerlib/autotagmatchwindow.py @@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog): self.skipButton, QtWidgets.QDialogButtonBox.ActionRole) - self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText( - "Accept and Write Tags") + self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags") diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 688907d..dbd0c2e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results): if opts.raw: - print(( - "{0}".format( - str( - ca.readRawCIX(), - errors='ignore')))) + print(ca.read_raw_cix()) else:
2022-04-01 16:50:46 -07:00
return lang
def get_country_from_iso(iso: str | None) -> str | None:
2023-06-22 20:11:40 -07:00
return countries()[iso]
def get_publisher(publisher: str) -> tuple[str, str]:
imprint = ""
for pub in publishers.values():
imprint, publisher, ok = pub[publisher]
if ok:
break
return imprint, publisher
2022-06-02 18:32:16 -07:00
def update_publishers(new_publishers: Mapping[str, Mapping[str, str]]) -> None:
for publisher in new_publishers:
if publisher in publishers:
publishers[publisher].update(new_publishers[publisher])
else:
publishers[publisher] = ImprintDict(publisher, new_publishers[publisher])
2022-12-06 00:20:01 -08:00
class ImprintDict(dict): # type: ignore
"""
ImprintDict takes a publisher and a dict or mapping of lowercased
imprint names to the proper imprint name. Retrieving a value from an
ImprintDict returns a tuple of (imprint, publisher, keyExists).
if the key does not exist the key is returned as the publisher unchanged
"""
2022-12-06 00:20:01 -08:00
def __init__(self, publisher: str, mapping: tuple | Mapping = (), **kwargs: dict) -> None: # type: ignore
super().__init__(mapping, **kwargs)
self.publisher = publisher
def __missing__(self, key: str) -> None:
return None
def __getitem__(self, k: str) -> tuple[str, str, bool]:
item = super().__getitem__(k.casefold())
if k.casefold() == self.publisher.casefold():
return "", self.publisher, True
if item is None:
return "", k, False
else:
return item, self.publisher, True
2022-06-02 18:32:16 -07:00
def copy(self) -> ImprintDict:
2022-05-21 00:16:45 -07:00
return ImprintDict(self.publisher, super().copy())
publishers: dict[str, ImprintDict] = {}
def load_publishers() -> None:
try:
update_publishers(json.loads((comicapi.data.data_path / "publishers.json").read_text("utf-8")))
except Exception:
logger.exception("Failed to load publishers.json; The are no publishers or imprints loaded")