Fix issues

Refactor add_to_path with tests
Fix type hints for titles_match
Use casefold in get_language
Fix using the recursive flag in cli mode
Add http status code to ComicVine exceptions
Fix parenthesis getting removed when renaming
Add more tests
This commit is contained in:
Timmy Welch 2022-08-08 18:03:29 -07:00
parent e07fe9e8d1
commit aadeb07c49
10 changed files with 144 additions and 53 deletions

View File

@ -67,17 +67,10 @@ class FileNameParser:
# replace any name separators with spaces
tmpstr = self.fix_spaces(filename)
found = False
match = re.search(r"(?<=\sof\s)\d+(?=\s)", tmpstr, re.IGNORECASE)
match = re.search(r"(?:\s\(?of\s)(\d+)(?: |\))", tmpstr, re.IGNORECASE)
if match:
count = match.group()
found = True
if not found:
match = re.search(r"(?<=\(of\s)\d+(?=\))", tmpstr, re.IGNORECASE)
if match:
count = match.group()
count = match.group(1)
return count.lstrip("0")
@ -180,10 +173,12 @@ class FileNameParser:
if "--" in filename:
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
filename = re.sub(r"--.*", self.repl, filename)
# never happens
elif "__" in filename:
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
filename = re.sub(r"__.*", self.repl, filename)
# never happens
filename = filename.replace("+", " ")
tmpstr = self.fix_spaces(filename, remove_dashes=False)
@ -425,6 +420,7 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
likely_year = False
if p.firstItem and p.first_is_alt:
p.alt = True
p.firstItem = False
return parse_issue_number
# The issue number should hopefully not be in parentheses
@ -440,9 +436,6 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
# Series has already been started/parsed,
# filters out leading alternate numbers leading alternate number
if len(p.series_parts) > 0:
# Unset first item
if p.firstItem:
p.firstItem = False
return parse_issue_number
else:
p.operator_rejected.append(item)
@ -506,9 +499,6 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
p.series_parts.append(item)
p.used_items.append(item)
# Unset first item
if p.firstItem:
p.firstItem = False
p.get()
return parse_series
@ -590,6 +580,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
if series_append:
p.series_parts.append(item)
p.used_items.append(item)
if p.firstItem:
p.firstItem = False
return parse_series
# We found text, it's probably the title or series
@ -602,6 +594,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
# Usually the word 'of' eg 1 (of 6)
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
if p.firstItem:
p.firstItem = False
return parse_info_specifier
# Operator is a symbol that acts as some sort of separator eg - : ;
@ -622,9 +616,13 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None:
p.irrelevant.extend([item, p.input[p.pos], p.get()])
else:
p.backup()
if p.firstItem:
p.firstItem = False
return parse_series
# This is text that just happens to also be a month/day
else:
if p.firstItem:
p.firstItem = False
return parse_series
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
@ -853,10 +851,11 @@ def resolve_year(p: Parser) -> None:
p.used_items.append(vol)
# Remove volume from series and title
if selected_year in p.series_parts:
p.series_parts.remove(selected_year)
if selected_year in p.title_parts:
p.title_parts.remove(selected_year)
# note: this never happens
if vol in p.series_parts:
p.series_parts.remove(vol)
if vol in p.title_parts:
p.title_parts.remove(vol)
# Remove year from series and title
if selected_year in p.series_parts:

View File

@ -93,7 +93,7 @@ class GenericMetadata:
comments: str | None = None # use same way as Summary in CIX
volume_count: int | None = None
critical_rating: float | None = None # rating in cbl; CommunityRating in CIX
critical_rating: float | None = None # rating in CBL; CommunityRating in CIX
country: str | None = None
alternate_series: str | None = None

View File

@ -19,7 +19,6 @@ import json
import logging
import os
import pathlib
import re
import unicodedata
from collections import defaultdict
from shutil import which # noqa: F401
@ -57,22 +56,20 @@ def get_recursive_filelist(pathlist: list[str]) -> list[str]:
if os.path.isdir(p):
filelist.extend(x for x in glob.glob(f"{p}{os.sep}/**", recursive=True) if not os.path.isdir(x))
else:
filelist.append(p)
elif str(p) not in filelist:
filelist.append(str(p))
return filelist
def add_to_path(dirname: str) -> None:
if dirname is not None and dirname != "":
if dirname:
dirname = os.path.abspath(dirname)
paths = [os.path.normpath(x) for x in os.environ["PATH"].split(os.pathsep)]
# verify that path doesn't already contain the given dirname
tmpdirname = re.escape(dirname)
pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep)
match = re.search(pattern, os.environ["PATH"])
if not match:
os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"]
if dirname not in paths:
paths.insert(0, dirname)
os.environ["PATH"] = os.pathsep.join(paths)
def xlate(data: Any, is_int: bool = False, is_float: bool = False) -> Any:
@ -161,10 +158,10 @@ def sanitize_title(text: str, basic: bool = False) -> str:
return text
def titles_match(search_title: str, record_title: str, threshold: int = 90) -> int:
def titles_match(search_title: str, record_title: str, threshold: int = 90) -> bool:
sanitized_search = sanitize_title(search_title)
sanitized_record = sanitize_title(record_title)
ratio = thefuzz.fuzz.ratio(sanitized_search, sanitized_record)
ratio: int = thefuzz.fuzz.ratio(sanitized_search, sanitized_record)
logger.debug(
"search title: %s ; record title: %s ; ratio: %d ; match threshold: %d",
search_title,
@ -205,6 +202,7 @@ def get_language_from_iso(iso: str | None) -> str | None:
def get_language(string: str | None) -> str | None:
if string is None:
return None
string = string.casefold()
lang = get_language_from_iso(string)
@ -217,8 +215,6 @@ def get_language(string: str | None) -> str | None:
def get_publisher(publisher: str) -> tuple[str, str]:
if publisher is None:
return ("", "")
imprint = ""
for pub in publishers.values():

View File

@ -165,13 +165,13 @@ def post_process_matches(
def cli_mode(opts: argparse.Namespace, settings: ComicTaggerSettings) -> None:
if len(opts.files) < 1:
if len(opts.file_list) < 1:
logger.error("You must specify at least one filename. Use the -h option for more info")
return
match_results = OnlineMatchResults()
for f in opts.files:
for f in opts.file_list:
process_file_cli(f, opts, settings, match_results)
sys.stdout.flush()
@ -212,7 +212,7 @@ def create_local_metadata(opts: argparse.Namespace, ca: ComicArchive, settings:
def process_file_cli(
filename: str, opts: argparse.Namespace, settings: ComicTaggerSettings, match_results: OnlineMatchResults
) -> None:
batch_mode = len(opts.files) > 1
batch_mode = len(opts.file_list) > 1
ca = ComicArchive(filename, settings.rar_exe_path, ComicTaggerSettings.get_graphic("nocover.png"))

View File

@ -184,8 +184,8 @@ class ComicVineTalker:
def get_url_content(self, url: str, params: dict[str, Any]) -> Any:
# connect to server:
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
for tries in range(3):
try:
resp = requests.get(url, params=params, headers={"user-agent": "comictagger/" + ctversion.version})
@ -199,10 +199,15 @@ class ComicVineTalker:
break
except requests.exceptions.RequestException as e:
self.write_log(str(e) + "\n")
self.write_log(f"{e}\n")
raise ComicVineTalkerException(ComicVineTalkerException.Network, "Network Error!") from e
except json.JSONDecodeError as e:
self.write_log(f"{e}\n")
raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "ComicVine did not provide json")
raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server")
raise ComicVineTalkerException(
ComicVineTalkerException.Unknown, f"Error on Comic Vine server: {resp.status_code}"
)
def search_for_series(
self,

View File

@ -109,11 +109,12 @@ class MetadataFormatter(string.Formatter):
# format the object and append to the result
fmt_obj = self.format_field(obj, format_spec)
if fmt_obj == "" and len(result) > 0 and self.smart_cleanup and literal_text:
lstrip = True
if fmt_obj == "" and result and self.smart_cleanup and literal_text:
if self.str_contains(result[-1], "({["):
lstrip = True
if result:
if " " in result[-1]:
result[-1], _, _ = result[-1].rpartition(" ")
result[-1], _, _ = result[-1].rstrip().rpartition(" ")
result[-1] = result[-1].rstrip("-_({[#")
if self.smart_cleanup:
fmt_obj = " ".join(fmt_obj.split())
@ -122,6 +123,12 @@ class MetadataFormatter(string.Formatter):
return "".join(result), False
def str_contains(self, chars: str, string: str) -> bool:
for char in chars:
if char in string:
return True
return False
class FileRenamer:
def __init__(self, metadata: GenericMetadata | None, platform: str = "auto") -> None:

View File

@ -405,6 +405,8 @@ def parse_cmd_line() -> argparse.Namespace:
opts.copy = opts.copy[0]
if opts.recursive:
opts.file_list = utils.get_recursive_filelist(opts.file_list)
opts.file_list = utils.get_recursive_filelist(opts.files)
else:
opts.file_list = opts.files
return opts

View File

@ -114,6 +114,7 @@ imprints = [
("marvel", ("", "Marvel")),
("marvel comics", ("", "Marvel")),
("aircel", ("Aircel Comics", "Marvel")),
("nothing", ("", "nothing")),
]
additional_imprints = [

View File

@ -733,6 +733,20 @@ fnames = [
},
True,
),
(
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz",
"full-date, issue in parenthesis",
{
"issue": "1",
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
"title": "Anda's Game",
"volume": "",
"year": "2007",
"remainder": "",
"issue_count": "",
},
True,
),
]
rnames = [
@ -795,7 +809,7 @@ rnames = [
(
r"{publisher}\ {series} #{issue} - {title} ({year})", # backslashes separate directories
False,
"universal",
"Linux",
"Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz",
does_not_raise(),
),
@ -807,10 +821,10 @@ rnames = [
does_not_raise(),
),
(
"{series} # {issue} - {locations} ({year})",
"{series} #{issue} - {locations} ({year})",
False,
"universal",
"Cory Doctorow's Futuristic Tales of the Here and Now # 001 - lonely cottage (2007).cbz",
"Cory Doctorow's Futuristic Tales of the Here and Now #001 - lonely cottage (2007).cbz",
does_not_raise(),
),
(
@ -848,6 +862,20 @@ rnames = [
"Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game {test} #001 (2007).cbz",
does_not_raise(),
),
(
"{series} - {title} #{issue} ({year} {price})", # Test null value in parenthesis with a non-null value
False,
"universal",
"Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001 (2007).cbz",
does_not_raise(),
),
(
"{series} - {title} #{issue} (of {price})", # null value with literal text in parenthesis
False,
"universal",
"Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001.cbz",
does_not_raise(),
),
(
"{series} - {title} {1} #{issue} ({year})", # Test numeric key
False,

View File

@ -1,5 +1,7 @@
from __future__ import annotations
import os
import pytest
import comicapi.utils
@ -22,13 +24,16 @@ def test_recursive_list_with_file(tmp_path) -> None:
temp_txt = tmp_path / "info.txt"
temp_txt.write_text("this is here")
expected_result = {str(foo_png), str(temp_cbr), str(temp_file), str(temp_txt)}
result = set(comicapi.utils.get_recursive_filelist([tmp_path]))
temp_txt2 = tmp_path / "info2.txt"
temp_txt2.write_text("this is here")
expected_result = {str(foo_png), str(temp_cbr), str(temp_file), str(temp_txt), str(temp_txt2)}
result = set(comicapi.utils.get_recursive_filelist([str(temp_txt2), tmp_path]))
assert result == expected_result
values = [
xlate_values = [
({"data": "", "is_int": False, "is_float": False}, None),
({"data": None, "is_int": False, "is_float": False}, None),
({"data": None, "is_int": True, "is_float": False}, None),
@ -52,6 +57,54 @@ values = [
]
@pytest.mark.parametrize("value, result", values)
@pytest.mark.parametrize("value, result", xlate_values)
def test_xlate(value, result):
assert comicapi.utils.xlate(**value) == result
language_values = [
("en", "English"),
("EN", "English"),
("En", "English"),
("", None),
(None, None),
]
@pytest.mark.parametrize("value, result", language_values)
def test_get_language(value, result):
assert result == comicapi.utils.get_language(value)
def test_unique_file(tmp_path):
file = tmp_path / "test"
assert file == comicapi.utils.unique_file(file)
file.mkdir()
assert (tmp_path / "test (1)") == comicapi.utils.unique_file(file)
def test_add_to_path(monkeypatch):
monkeypatch.setenv("PATH", "/usr/bin")
comicapi.utils.add_to_path("/bin")
assert os.environ["PATH"] == "/bin:/usr/bin"
comicapi.utils.add_to_path("/usr/bin")
comicapi.utils.add_to_path("/usr/bin/")
assert os.environ["PATH"] == "/bin:/usr/bin"
titles = [
(("", ""), True),
(("Conan el Barbaro", "Conan el Bárbaro"), True),
(("鋼の錬金術師", "鋼の錬金術師"), True),
(("钢之炼金术师", "鋼の錬金術師"), False),
(("batmans grave", "The Batman's Grave"), True),
(("batman grave", "The Batman's Grave"), True),
(("bats grave", "The Batman's Grave"), False),
]
@pytest.mark.parametrize("value, result", titles)
def test_titles_match(value, result):
assert comicapi.utils.titles_match(value[0], value[1]) == result