From aadeb07c49e306c3375058bf3188731128c9763a Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Mon, 8 Aug 2022 18:03:29 -0700 Subject: [PATCH] Fix issues Refactor add_to_path with tests Fix type hints for titles_match Use casefold in get_language Fix using the recursive flag in cli mode Add http status code to ComicVine exceptions Fix parenthesis getting removed when renaming Add more tests --- comicapi/filenameparser.py | 37 +++++++++---------- comicapi/genericmetadata.py | 2 +- comicapi/utils.py | 26 ++++++------- comictaggerlib/cli.py | 6 +-- comictaggerlib/comicvinetalker.py | 13 +++++-- comictaggerlib/filerenamer.py | 13 +++++-- comictaggerlib/options.py | 4 +- testing/comicdata.py | 1 + testing/filenames.py | 34 +++++++++++++++-- tests/utils_test.py | 61 +++++++++++++++++++++++++++++-- 10 files changed, 144 insertions(+), 53 deletions(-) diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py index c69102e..9b1d86f 100644 --- a/comicapi/filenameparser.py +++ b/comicapi/filenameparser.py @@ -67,17 +67,10 @@ class FileNameParser: # replace any name separators with spaces tmpstr = self.fix_spaces(filename) - found = False - match = re.search(r"(?<=\sof\s)\d+(?=\s)", tmpstr, re.IGNORECASE) + match = re.search(r"(?:\s\(?of\s)(\d+)(?: |\))", tmpstr, re.IGNORECASE) if match: - count = match.group() - found = True - - if not found: - match = re.search(r"(?<=\(of\s)\d+(?=\))", tmpstr, re.IGNORECASE) - if match: - count = match.group() + count = match.group(1) return count.lstrip("0") @@ -180,10 +173,12 @@ class FileNameParser: if "--" in filename: # the pattern seems to be that anything to left of the first "--" is the series name followed by issue filename = re.sub(r"--.*", self.repl, filename) + # never happens elif "__" in filename: # the pattern seems to be that anything to left of the first "__" is the series name followed by issue filename = re.sub(r"__.*", self.repl, filename) + # never happens filename = filename.replace("+", " ") tmpstr = self.fix_spaces(filename, remove_dashes=False) @@ -425,6 +420,7 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: likely_year = False if p.firstItem and p.first_is_alt: p.alt = True + p.firstItem = False return parse_issue_number # The issue number should hopefully not be in parentheses @@ -440,9 +436,6 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # Series has already been started/parsed, # filters out leading alternate numbers leading alternate number if len(p.series_parts) > 0: - # Unset first item - if p.firstItem: - p.firstItem = False return parse_issue_number else: p.operator_rejected.append(item) @@ -506,9 +499,6 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: p.series_parts.append(item) p.used_items.append(item) - # Unset first item - if p.firstItem: - p.firstItem = False p.get() return parse_series @@ -590,6 +580,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: if series_append: p.series_parts.append(item) p.used_items.append(item) + if p.firstItem: + p.firstItem = False return parse_series # We found text, it's probably the title or series @@ -602,6 +594,8 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # Usually the word 'of' eg 1 (of 6) elif item.typ == filenamelexer.ItemType.InfoSpecifier: + if p.firstItem: + p.firstItem = False return parse_info_specifier # Operator is a symbol that acts as some sort of separator eg - : ; @@ -622,9 +616,13 @@ def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: p.irrelevant.extend([item, p.input[p.pos], p.get()]) else: p.backup() + if p.firstItem: + p.firstItem = False return parse_series # This is text that just happens to also be a month/day else: + if p.firstItem: + p.firstItem = False return parse_series # Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki @@ -853,10 +851,11 @@ def resolve_year(p: Parser) -> None: p.used_items.append(vol) # Remove volume from series and title - if selected_year in p.series_parts: - p.series_parts.remove(selected_year) - if selected_year in p.title_parts: - p.title_parts.remove(selected_year) + # note: this never happens + if vol in p.series_parts: + p.series_parts.remove(vol) + if vol in p.title_parts: + p.title_parts.remove(vol) # Remove year from series and title if selected_year in p.series_parts: diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 7d0098a..77700d7 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -93,7 +93,7 @@ class GenericMetadata: comments: str | None = None # use same way as Summary in CIX volume_count: int | None = None - critical_rating: float | None = None # rating in cbl; CommunityRating in CIX + critical_rating: float | None = None # rating in CBL; CommunityRating in CIX country: str | None = None alternate_series: str | None = None diff --git a/comicapi/utils.py b/comicapi/utils.py index ab0d50b..5d18724 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -19,7 +19,6 @@ import json import logging import os import pathlib -import re import unicodedata from collections import defaultdict from shutil import which # noqa: F401 @@ -57,22 +56,20 @@ def get_recursive_filelist(pathlist: list[str]) -> list[str]: if os.path.isdir(p): filelist.extend(x for x in glob.glob(f"{p}{os.sep}/**", recursive=True) if not os.path.isdir(x)) - else: - filelist.append(p) + elif str(p) not in filelist: + filelist.append(str(p)) return filelist def add_to_path(dirname: str) -> None: - if dirname is not None and dirname != "": + if dirname: + dirname = os.path.abspath(dirname) + paths = [os.path.normpath(x) for x in os.environ["PATH"].split(os.pathsep)] - # verify that path doesn't already contain the given dirname - tmpdirname = re.escape(dirname) - pattern = r"(^|{sep}){dir}({sep}|$)".format(dir=tmpdirname, sep=os.pathsep) - - match = re.search(pattern, os.environ["PATH"]) - if not match: - os.environ["PATH"] = dirname + os.pathsep + os.environ["PATH"] + if dirname not in paths: + paths.insert(0, dirname) + os.environ["PATH"] = os.pathsep.join(paths) def xlate(data: Any, is_int: bool = False, is_float: bool = False) -> Any: @@ -161,10 +158,10 @@ def sanitize_title(text: str, basic: bool = False) -> str: return text -def titles_match(search_title: str, record_title: str, threshold: int = 90) -> int: +def titles_match(search_title: str, record_title: str, threshold: int = 90) -> bool: sanitized_search = sanitize_title(search_title) sanitized_record = sanitize_title(record_title) - ratio = thefuzz.fuzz.ratio(sanitized_search, sanitized_record) + ratio: int = thefuzz.fuzz.ratio(sanitized_search, sanitized_record) logger.debug( "search title: %s ; record title: %s ; ratio: %d ; match threshold: %d", search_title, @@ -205,6 +202,7 @@ def get_language_from_iso(iso: str | None) -> str | None: def get_language(string: str | None) -> str | None: if string is None: return None + string = string.casefold() lang = get_language_from_iso(string) @@ -217,8 +215,6 @@ def get_language(string: str | None) -> str | None: def get_publisher(publisher: str) -> tuple[str, str]: - if publisher is None: - return ("", "") imprint = "" for pub in publishers.values(): diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py index 481d36f..2f6058e 100644 --- a/comictaggerlib/cli.py +++ b/comictaggerlib/cli.py @@ -165,13 +165,13 @@ def post_process_matches( def cli_mode(opts: argparse.Namespace, settings: ComicTaggerSettings) -> None: - if len(opts.files) < 1: + if len(opts.file_list) < 1: logger.error("You must specify at least one filename. Use the -h option for more info") return match_results = OnlineMatchResults() - for f in opts.files: + for f in opts.file_list: process_file_cli(f, opts, settings, match_results) sys.stdout.flush() @@ -212,7 +212,7 @@ def create_local_metadata(opts: argparse.Namespace, ca: ComicArchive, settings: def process_file_cli( filename: str, opts: argparse.Namespace, settings: ComicTaggerSettings, match_results: OnlineMatchResults ) -> None: - batch_mode = len(opts.files) > 1 + batch_mode = len(opts.file_list) > 1 ca = ComicArchive(filename, settings.rar_exe_path, ComicTaggerSettings.get_graphic("nocover.png")) diff --git a/comictaggerlib/comicvinetalker.py b/comictaggerlib/comicvinetalker.py index 63f7ee5..9e76c4b 100644 --- a/comictaggerlib/comicvinetalker.py +++ b/comictaggerlib/comicvinetalker.py @@ -184,8 +184,8 @@ class ComicVineTalker: def get_url_content(self, url: str, params: dict[str, Any]) -> Any: # connect to server: - # if there is a 500 error, try a few more times before giving up - # any other error, just bail + # if there is a 500 error, try a few more times before giving up + # any other error, just bail for tries in range(3): try: resp = requests.get(url, params=params, headers={"user-agent": "comictagger/" + ctversion.version}) @@ -199,10 +199,15 @@ class ComicVineTalker: break except requests.exceptions.RequestException as e: - self.write_log(str(e) + "\n") + self.write_log(f"{e}\n") raise ComicVineTalkerException(ComicVineTalkerException.Network, "Network Error!") from e + except json.JSONDecodeError as e: + self.write_log(f"{e}\n") + raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "ComicVine did not provide json") - raise ComicVineTalkerException(ComicVineTalkerException.Unknown, "Error on Comic Vine server") + raise ComicVineTalkerException( + ComicVineTalkerException.Unknown, f"Error on Comic Vine server: {resp.status_code}" + ) def search_for_series( self, diff --git a/comictaggerlib/filerenamer.py b/comictaggerlib/filerenamer.py index 975aacc..a296058 100644 --- a/comictaggerlib/filerenamer.py +++ b/comictaggerlib/filerenamer.py @@ -109,11 +109,12 @@ class MetadataFormatter(string.Formatter): # format the object and append to the result fmt_obj = self.format_field(obj, format_spec) - if fmt_obj == "" and len(result) > 0 and self.smart_cleanup and literal_text: - lstrip = True + if fmt_obj == "" and result and self.smart_cleanup and literal_text: + if self.str_contains(result[-1], "({["): + lstrip = True if result: if " " in result[-1]: - result[-1], _, _ = result[-1].rpartition(" ") + result[-1], _, _ = result[-1].rstrip().rpartition(" ") result[-1] = result[-1].rstrip("-_({[#") if self.smart_cleanup: fmt_obj = " ".join(fmt_obj.split()) @@ -122,6 +123,12 @@ class MetadataFormatter(string.Formatter): return "".join(result), False + def str_contains(self, chars: str, string: str) -> bool: + for char in chars: + if char in string: + return True + return False + class FileRenamer: def __init__(self, metadata: GenericMetadata | None, platform: str = "auto") -> None: diff --git a/comictaggerlib/options.py b/comictaggerlib/options.py index 03f52b4..884979c 100644 --- a/comictaggerlib/options.py +++ b/comictaggerlib/options.py @@ -405,6 +405,8 @@ def parse_cmd_line() -> argparse.Namespace: opts.copy = opts.copy[0] if opts.recursive: - opts.file_list = utils.get_recursive_filelist(opts.file_list) + opts.file_list = utils.get_recursive_filelist(opts.files) + else: + opts.file_list = opts.files return opts diff --git a/testing/comicdata.py b/testing/comicdata.py index 790b688..d2533fe 100644 --- a/testing/comicdata.py +++ b/testing/comicdata.py @@ -114,6 +114,7 @@ imprints = [ ("marvel", ("", "Marvel")), ("marvel comics", ("", "Marvel")), ("aircel", ("Aircel Comics", "Marvel")), + ("nothing", ("", "nothing")), ] additional_imprints = [ diff --git a/testing/filenames.py b/testing/filenames.py index 57902a6..25e56ab 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -733,6 +733,20 @@ fnames = [ }, True, ), + ( + "Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz", + "full-date, issue in parenthesis", + { + "issue": "1", + "series": "Cory Doctorow's Futuristic Tales of the Here and Now", + "title": "Anda's Game", + "volume": "", + "year": "2007", + "remainder": "", + "issue_count": "", + }, + True, + ), ] rnames = [ @@ -795,7 +809,7 @@ rnames = [ ( r"{publisher}\ {series} #{issue} - {title} ({year})", # backslashes separate directories False, - "universal", + "Linux", "Cory Doctorow's Futuristic Tales of the Here and Now #001 - Anda's Game (2007).cbz", does_not_raise(), ), @@ -807,10 +821,10 @@ rnames = [ does_not_raise(), ), ( - "{series} # {issue} - {locations} ({year})", + "{series} #{issue} - {locations} ({year})", False, "universal", - "Cory Doctorow's Futuristic Tales of the Here and Now # 001 - lonely cottage (2007).cbz", + "Cory Doctorow's Futuristic Tales of the Here and Now #001 - lonely cottage (2007).cbz", does_not_raise(), ), ( @@ -848,6 +862,20 @@ rnames = [ "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game {test} #001 (2007).cbz", does_not_raise(), ), + ( + "{series} - {title} #{issue} ({year} {price})", # Test null value in parenthesis with a non-null value + False, + "universal", + "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001 (2007).cbz", + does_not_raise(), + ), + ( + "{series} - {title} #{issue} (of {price})", # null value with literal text in parenthesis + False, + "universal", + "Cory Doctorow's Futuristic Tales of the Here and Now - Anda's Game #001.cbz", + does_not_raise(), + ), ( "{series} - {title} {1} #{issue} ({year})", # Test numeric key False, diff --git a/tests/utils_test.py b/tests/utils_test.py index d254028..2223de5 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +import os + import pytest import comicapi.utils @@ -22,13 +24,16 @@ def test_recursive_list_with_file(tmp_path) -> None: temp_txt = tmp_path / "info.txt" temp_txt.write_text("this is here") - expected_result = {str(foo_png), str(temp_cbr), str(temp_file), str(temp_txt)} - result = set(comicapi.utils.get_recursive_filelist([tmp_path])) + temp_txt2 = tmp_path / "info2.txt" + temp_txt2.write_text("this is here") + + expected_result = {str(foo_png), str(temp_cbr), str(temp_file), str(temp_txt), str(temp_txt2)} + result = set(comicapi.utils.get_recursive_filelist([str(temp_txt2), tmp_path])) assert result == expected_result -values = [ +xlate_values = [ ({"data": "", "is_int": False, "is_float": False}, None), ({"data": None, "is_int": False, "is_float": False}, None), ({"data": None, "is_int": True, "is_float": False}, None), @@ -52,6 +57,54 @@ values = [ ] -@pytest.mark.parametrize("value, result", values) +@pytest.mark.parametrize("value, result", xlate_values) def test_xlate(value, result): assert comicapi.utils.xlate(**value) == result + + +language_values = [ + ("en", "English"), + ("EN", "English"), + ("En", "English"), + ("", None), + (None, None), +] + + +@pytest.mark.parametrize("value, result", language_values) +def test_get_language(value, result): + assert result == comicapi.utils.get_language(value) + + +def test_unique_file(tmp_path): + file = tmp_path / "test" + assert file == comicapi.utils.unique_file(file) + + file.mkdir() + assert (tmp_path / "test (1)") == comicapi.utils.unique_file(file) + + +def test_add_to_path(monkeypatch): + monkeypatch.setenv("PATH", "/usr/bin") + comicapi.utils.add_to_path("/bin") + assert os.environ["PATH"] == "/bin:/usr/bin" + + comicapi.utils.add_to_path("/usr/bin") + comicapi.utils.add_to_path("/usr/bin/") + assert os.environ["PATH"] == "/bin:/usr/bin" + + +titles = [ + (("", ""), True), + (("Conan el Barbaro", "Conan el Bárbaro"), True), + (("鋼の錬金術師", "鋼の錬金術師"), True), + (("钢之炼金术师", "鋼の錬金術師"), False), + (("batmans grave", "The Batman's Grave"), True), + (("batman grave", "The Batman's Grave"), True), + (("bats grave", "The Batman's Grave"), False), +] + + +@pytest.mark.parametrize("value, result", titles) +def test_titles_match(value, result): + assert comicapi.utils.titles_match(value[0], value[1]) == result