comictagger/comictalker/comictalker.py

238 lines
8.6 KiB
Python
Raw Normal View History

# Copyright 2012-2014 ComicTagger Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
import pathlib
from typing import Any, Callable
2023-01-01 17:04:15 -08:00
import settngs
from comicapi.genericmetadata import ComicSeries, GenericMetadata
from comictalker.talker_utils import fix_url
logger = logging.getLogger(__name__)
class TalkerError(Exception):
"""Base class exception for information sources.
Attributes:
code -- a numerical code
1 - General
2 - Network
3 - Data
desc -- description of the error
source -- the name of the source producing the error
"""
2022-12-15 20:21:53 -08:00
codes = {1: "General", 2: "Network", 3: "Data", 4: "Other"}
2023-02-09 19:33:10 -08:00
def __init__(self, source: str, desc: str = "Unknown", code: int = 4, sub_code: int = 0) -> None:
super().__init__()
self.desc = desc
self.code = code
self.code_name = self.codes[code]
self.sub_code = sub_code
self.source = source
def __str__(self) -> str:
return f"{self.source} encountered a {self.code_name} error. {self.desc}"
class TalkerNetworkError(TalkerError):
"""Network class exception for information sources
Attributes:
sub_code -- numerical code for finer detail
1 -- connected refused
2 -- api key
3 -- rate limit
4 -- timeout
"""
net_codes = {
0: "General network error.",
1: "The connection was refused.",
2: "An API key error occurred.",
3: "Rate limit exceeded. Please wait a bit or enter a personal key if using the default.",
4: "The connection timed out.",
5: "Number of retries exceeded.",
}
def __init__(self, source: str = "", sub_code: int = 0, desc: str = "") -> None:
if desc == "":
desc = self.net_codes[sub_code]
super().__init__(source, desc, 2, sub_code)
class TalkerDataError(TalkerError):
"""Data class exception for information sources
Attributes:
sub_code -- numerical code for finer detail
1 -- unexpected data
2 -- malformed data
3 -- missing data
"""
data_codes = {
0: "General data error.",
1: "Unexpected data encountered.",
2: "Malformed data encountered.",
3: "Missing data encountered.",
}
def __init__(self, source: str = "", sub_code: int = 0, desc: str = "") -> None:
if desc == "":
desc = self.data_codes[sub_code]
super().__init__(source, desc, 3, sub_code)
class ComicTalker:
2022-11-24 15:26:48 -08:00
"""The base class for all comic source talkers"""
2023-02-09 19:33:10 -08:00
name: str = "Example"
id: str = "example"
website: str = "https://example.com"
logo_url: str = f"{website}/logo.png"
2023-02-09 19:33:10 -08:00
attribution: str = f"Metadata provided by <a href='{website}'>{name}</a>"
about: str = f"General information about <a href='{website}'>{name}</a> and any important notes"
2023-02-09 19:33:10 -08:00
def __init__(self, version: str, cache_folder: pathlib.Path) -> None:
self.cache_folder = cache_folder
self.version = version
self.api_key = self.default_api_key = ""
self.api_url = self.default_api_url = ""
def register_settings(self, parser: settngs.Manager) -> None:
"""
Allows registering settings using the settngs package with an argparse like interface.
The order that settings are declared is the order they will be displayed.
"""
return None
2023-02-09 19:33:10 -08:00
def parse_settings(self, settings: dict[str, Any]) -> dict[str, Any]:
"""
settings is a dictionary of settings defined in register_settings.
It is only guaranteed that the settings defined in register_settings will be present.
"""
if settings.get(f"{self.id}_key") is not None:
self.api_key = settings[f"{self.id}_key"]
if settings.get(f"{self.id}_url") is not None:
self.api_url = fix_url(settings[f"{self.id}_url"])
if self.api_key in ("", self.default_api_key):
self.api_key = self.default_api_key
settings[f"{self.id}_key"] = None
if self.api_url in ("", self.default_api_url):
self.api_url = self.default_api_url
settings[f"{self.id}_url"] = None
2023-02-09 19:33:10 -08:00
return settings
def check_status(self, settings: dict[str, Any]) -> tuple[str, bool]:
"""
This function should return (msg, True) if the given settings are valid,
where msg is a message to display to the user.
This function should return (msg, False) if the given settings are not valid,
where msg is a message to display to the user.
This function MUST NOT keep any values from the given settings, this is a test function only.
If the Talker uses the network it should ensure that it can authenticate with the given settings.
If settings provides an invalid URL, API key or other piece of information used to authenticate it MUST fail.
The only time that defaults should be used is when the value is an empty string or None
If the Talker does not use the network it should validate that all local data has been configured correctly.
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
Caching MUST NOT be implemented on this function.
2022-12-22 10:43:00 -08:00
"""
raise NotImplementedError
def search_for_series(
self,
series_name: str,
callback: Callable[[int, int], None] | None = None,
refresh_cache: bool = False,
literal: bool = False,
series_match_thresh: int = 90,
) -> list[ComicSeries]:
2022-12-22 10:43:00 -08:00
"""
This function should return a list of series that match the given series name
according to the source the Talker uses.
2022-12-22 10:43:00 -08:00
Sanitizing the series name is the responsibility of the talker.
2022-12-22 10:43:00 -08:00
If `literal` == True then it is requested that no filtering or
transformation/sanitizing of the title or results be performed by the talker.
2022-12-22 10:43:00 -08:00
A sensible amount of results should be returned.
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
Caching SHOULD be implemented on this function.
2022-12-22 10:43:00 -08:00
For example the `ComicVineTalker` stops requesting new pages after the results
become too different from the `series_name` by use of the `titles_match` function
provided by the `comicapi.utils` module, and only allows a maximum of 5 pages
"""
raise NotImplementedError
2022-12-22 10:43:00 -08:00
def fetch_comic_data(
self, issue_id: str | None = None, series_id: str | None = None, issue_number: str = ""
2022-12-22 10:43:00 -08:00
) -> GenericMetadata:
"""
This function should return an instance of GenericMetadata for a single issue.
It is guaranteed that either `issue_id` or (`series_id` and `issue_number` is set).
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
Caching MUST be implemented on this function.
2022-12-22 10:43:00 -08:00
Below is an example of how this function might be implemented:
if issue_number and series_id:
return self.fetch_issue_data(series_id, issue_number)
elif issue_id:
return self.fetch_issue_data_by_issue_id(issue_id)
else:
return GenericMetadata()
"""
raise NotImplementedError
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
def fetch_series(self, series_id: str) -> ComicSeries:
"""
This function should return an instance of ComicSeries from the given series ID.
Caching MUST be implemented on this function.
"""
raise NotImplementedError
def fetch_issues_in_series(self, series_id: str) -> list[GenericMetadata]:
"""Expected to return a list of issues with a given series ID"""
raise NotImplementedError
def fetch_issues_by_series_issue_num_and_year(
self, series_id_list: list[str], issue_number: str, year: int | None
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
) -> list[GenericMetadata]:
2022-12-22 10:43:00 -08:00
"""
This function should return a single issue for each series id in
the `series_id_list` and it should match the issue_number.
2022-12-22 10:43:00 -08:00
Preferably it should also only return issues published in the given `year`.
2022-12-22 10:43:00 -08:00
If there is no year given (`year` == None) or the Talker does not have issue publication info
return the results unfiltered.
Convert ComicIssue into GenericMetadata I could not find a good reason for ComicIssue to exist other than that it had more attributes than GenericMetadata, so it has been replaced. New attributes for GenericMetadata: series_id: a string uniquely identifying the series to tag_origin series_aliases: alternate series names that are not the canonical name title_aliases: alternate issue titles that are not the canonical name alternate_images: a list of urls to alternate cover images Updated attributes for GenericMetadata: genre -> genres: str -> list[str] comments -> description: str -> str story_arc -> story_arcs: str -> list[str] series_group -> series_groups: str -> list[str] character -> characters: str -> list[str] team -> teams: str -> list[str] location -> locations: str -> list[str] tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str]) ComicSeries has been relocated to the ComicAPI package, currently has no usage within ComicAPI. CreditMetadata has been renamed to Credit and has replaced Credit from ComicTalker. fetch_series has been added to ComicTalker, this is currently only used in the GUI when a series is selected and does not already contain the needed fields, this function should always be cached. A new split function has been added to ComicAPI, all uses of split on single characters have been updated to use this cleanup_html and the corresponding setting are now only used in ComicTagger proper, for display we want any html directly from the upstream. When applying the metadata we then strip the description of any html. A new conversion has been added to the MetadataFormatter: j: joins any lists into a string with ', '. Note this is a valid operation on strings as well, it will add ', ' in between every character. parse_settings now assigns the given ComicTaggerPaths object to the result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
Caching SHOULD be implemented on this function.
2022-12-22 10:43:00 -08:00
"""
raise NotImplementedError