From 44e9a47a8b9a5f422491e5411352153eda8b4139 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sat, 17 Feb 2024 15:08:36 -0800 Subject: [PATCH 1/6] Support multiple web_links --- comicapi/_url.py | 468 +++++++++++++++++++++++++++++++ comicapi/genericmetadata.py | 22 +- comicapi/metadata/comicrack.py | 6 +- comicapi/utils.py | 20 ++ comictaggerlib/cbltransformer.py | 13 +- comictaggerlib/taggerwindow.py | 4 +- comictalker/talkers/comicvine.py | 9 +- testing/comicvine.py | 4 +- 8 files changed, 529 insertions(+), 17 deletions(-) create mode 100644 comicapi/_url.py diff --git a/comicapi/_url.py b/comicapi/_url.py new file mode 100644 index 0000000..2278829 --- /dev/null +++ b/comicapi/_url.py @@ -0,0 +1,468 @@ +# mypy: disable-error-code="no-redef" +from __future__ import annotations + +try: + from urllib3.exceptions import HTTPError, LocationParseError, LocationValueError + from urllib3.util import Url, parse_url +except ImportError: + + import re + import typing + + class HTTPError(Exception): + """Base exception used by this module.""" + + class LocationValueError(ValueError, HTTPError): + """Raised when there is something wrong with a given URL input.""" + + class LocationParseError(LocationValueError): + """Raised when get_host or similar fails to parse the URL input.""" + + def __init__(self, location: str) -> None: + message = f"Failed to parse: {location}" + super().__init__(message) + + self.location = location + + def to_str(x: str | bytes, encoding: str | None = None, errors: str | None = None) -> str: + if isinstance(x, str): + return x + elif not isinstance(x, bytes): + raise TypeError(f"not expecting type {type(x).__name__}") + if encoding or errors: + return x.decode(encoding or "utf-8", errors=errors or "strict") + return x.decode() + + # We only want to normalize urls with an HTTP(S) scheme. + # urllib3 infers URLs without a scheme (None) to be http. + _NORMALIZABLE_SCHEMES = ("http", "https", None) + + # Almost all of these patterns were derived from the + # 'rfc3986' module: https://github.com/python-hyper/rfc3986 + _PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}") + _SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)") + _URI_RE = re.compile( + r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" r"(?:\?([^#]*))?" r"(?:#(.*))?$", + re.UNICODE | re.DOTALL, + ) + + _IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}" + _HEX_PAT = "[0-9A-Fa-f]{1,4}" + _LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT) + _subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT} + _variations = [ + # 6( h16 ":" ) ls32 + "(?:%(hex)s:){6}%(ls32)s", + # "::" 5( h16 ":" ) ls32 + "::(?:%(hex)s:){5}%(ls32)s", + # [ h16 ] "::" 4( h16 ":" ) ls32 + "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s", + # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s", + # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s", + # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s", + # [ *4( h16 ":" ) h16 ] "::" ls32 + "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s", + # [ *5( h16 ":" ) h16 ] "::" h16 + "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s", + # [ *6( h16 ":" ) h16 ] "::" + "(?:(?:%(hex)s:){0,6}%(hex)s)?::", + ] + + _UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~" + _IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")" + _ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+" + _IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]" + _REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*" + _TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$") + + _IPV4_RE = re.compile("^" + _IPV4_PAT + "$") + _IPV6_RE = re.compile("^" + _IPV6_PAT + "$") + _IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$") + _BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$") + _ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$") + + _HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % ( + _REG_NAME_PAT, + _IPV4_PAT, + _IPV6_ADDRZ_PAT, + ) + _HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL) + + _UNRESERVED_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~") + _SUB_DELIM_CHARS = set("!$&'()*+,;=") + _USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"} + _PATH_CHARS = _USERINFO_CHARS | {"@", "/"} + _QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"} + + class Url( + typing.NamedTuple( + "Url", + [ + ("scheme", typing.Optional[str]), + ("auth", typing.Optional[str]), + ("host", typing.Optional[str]), + ("port", typing.Optional[int]), + ("path", typing.Optional[str]), + ("query", typing.Optional[str]), + ("fragment", typing.Optional[str]), + ], + ) + ): + """ + Data structure for representing an HTTP URL. Used as a return value for + :func:`parse_url`. Both the scheme and host are normalized as they are + both case-insensitive according to RFC 3986. + """ + + def __new__( # type: ignore[no-untyped-def] + cls, + scheme: str | None = None, + auth: str | None = None, + host: str | None = None, + port: int | None = None, + path: str | None = None, + query: str | None = None, + fragment: str | None = None, + ): + if path and not path.startswith("/"): + path = "/" + path + if scheme is not None: + scheme = scheme.lower() + return super().__new__(cls, scheme, auth, host, port, path, query, fragment) + + @property + def hostname(self) -> str | None: + """For backwards-compatibility with urlparse. We're nice like that.""" + return self.host + + @property + def request_uri(self) -> str: + """Absolute path including the query string.""" + uri = self.path or "/" + + if self.query is not None: + uri += "?" + self.query + + return uri + + @property + def authority(self) -> str | None: + """ + Authority component as defined in RFC 3986 3.2. + This includes userinfo (auth), host and port. + + i.e. + userinfo@host:port + """ + userinfo = self.auth + netloc = self.netloc + if netloc is None or userinfo is None: + return netloc + else: + return f"{userinfo}@{netloc}" + + @property + def netloc(self) -> str | None: + """ + Network location including host and port. + + If you need the equivalent of urllib.parse's ``netloc``, + use the ``authority`` property instead. + """ + if self.host is None: + return None + if self.port: + return f"{self.host}:{self.port}" + return self.host + + @property + def url(self) -> str: + """ + Convert self into a url + + This function should more or less round-trip with :func:`.parse_url`. The + returned url may not be exactly the same as the url inputted to + :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls + with a blank port will have : removed). + + Example: + + .. code-block:: python + + import urllib3 + + U = urllib3.util.parse_url("https://google.com/mail/") + + print(U.url) + # "https://google.com/mail/" + + print( urllib3.util.Url("https", "username:password", + "host.com", 80, "/path", "query", "fragment" + ).url + ) + # "https://username:password@host.com:80/path?query#fragment" + """ + scheme, auth, host, port, path, query, fragment = self + url = "" + + # We use "is not None" we want things to happen with empty strings (or 0 port) + if scheme is not None: + url += scheme + "://" + if auth is not None: + url += auth + "@" + if host is not None: + url += host + if port is not None: + url += ":" + str(port) + if path is not None: + url += path + if query is not None: + url += "?" + query + if fragment is not None: + url += "#" + fragment + + return url + + def __str__(self) -> str: + return self.url + + @typing.overload + def _encode_invalid_chars(component: str, allowed_chars: typing.Container[str]) -> str: # Abstract + ... + + @typing.overload + def _encode_invalid_chars(component: None, allowed_chars: typing.Container[str]) -> None: # Abstract + ... + + def _encode_invalid_chars(component: str | None, allowed_chars: typing.Container[str]) -> str | None: + """Percent-encodes a URI component without reapplying + onto an already percent-encoded component. + """ + if component is None: + return component + + component = to_str(component) + + # Normalize existing percent-encoded bytes. + # Try to see if the component we're encoding is already percent-encoded + # so we can skip all '%' characters but still encode all others. + component, percent_encodings = _PERCENT_RE.subn(lambda match: match.group(0).upper(), component) + + uri_bytes = component.encode("utf-8", "surrogatepass") + is_percent_encoded = percent_encodings == uri_bytes.count(b"%") + encoded_component = bytearray() + + for i in range(0, len(uri_bytes)): + # Will return a single character bytestring + byte = uri_bytes[i : i + 1] + byte_ord = ord(byte) + if (is_percent_encoded and byte == b"%") or (byte_ord < 128 and byte.decode() in allowed_chars): + encoded_component += byte + continue + encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper())) + + return encoded_component.decode() + + def _remove_path_dot_segments(path: str) -> str: + # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code + segments = path.split("/") # Turn the path into a list of segments + output = [] # Initialize the variable to use to store output + + for segment in segments: + # '.' is the current directory, so ignore it, it is superfluous + if segment == ".": + continue + # Anything other than '..', should be appended to the output + if segment != "..": + output.append(segment) + # In this case segment == '..', if we can, we should pop the last + # element + elif output: + output.pop() + + # If the path starts with '/' and the output is empty or the first string + # is non-empty + if path.startswith("/") and (not output or output[0]): + output.insert(0, "") + + # If the path starts with '/.' or '/..' ensure we add one more empty + # string to add a trailing '/' + if path.endswith(("/.", "/..")): + output.append("") + + return "/".join(output) + + @typing.overload + def _normalize_host(host: None, scheme: str | None) -> None: ... + + @typing.overload + def _normalize_host(host: str, scheme: str | None) -> str: ... + + def _normalize_host(host: str | None, scheme: str | None) -> str | None: + if host: + if scheme in _NORMALIZABLE_SCHEMES: + is_ipv6 = _IPV6_ADDRZ_RE.match(host) + if is_ipv6: + # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as + # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID + # separator as necessary to return a valid RFC 4007 scoped IP. + match = _ZONE_ID_RE.search(host) + if match: + start, end = match.span(1) + zone_id = host[start:end] + + if zone_id.startswith("%25") and zone_id != "%25": + zone_id = zone_id[3:] + else: + zone_id = zone_id[1:] + zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS) + return f"{host[:start].lower()}%{zone_id}{host[end:]}" + else: + return host.lower() + elif not _IPV4_RE.match(host): + return to_str( + b".".join([_idna_encode(label) for label in host.split(".")]), + "ascii", + ) + return host + + def _idna_encode(name: str) -> bytes: + if not name.isascii(): + try: + import idna + except ImportError: + raise LocationParseError("Unable to parse URL without the 'idna' module") from None + + try: + return idna.encode(name.lower(), strict=True, std3_rules=True) + except idna.IDNAError: + raise LocationParseError(f"Name '{name}' is not a valid IDNA label") from None + + return name.lower().encode("ascii") + + def _encode_target(target: str) -> str: + """Percent-encodes a request target so that there are no invalid characters + + Pre-condition for this function is that 'target' must start with '/'. + If that is the case then _TARGET_RE will always produce a match. + """ + match = _TARGET_RE.match(target) + if not match: # Defensive: + raise LocationParseError(f"{target!r} is not a valid request URI") + + path, query = match.groups() + encoded_target = _encode_invalid_chars(path, _PATH_CHARS) + if query is not None: + query = _encode_invalid_chars(query, _QUERY_CHARS) + encoded_target += "?" + query + return encoded_target + + def parse_url(url: str) -> Url: + """ + Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is + performed to parse incomplete urls. Fields not provided will be None. + This parser is RFC 3986 and RFC 6874 compliant. + + The parser logic and helper functions are based heavily on + work done in the ``rfc3986`` module. + + :param str url: URL to parse into a :class:`.Url` namedtuple. + + Partly backwards-compatible with :mod:`urllib.parse`. + + Example: + + .. code-block:: python + + import urllib3 + + print( urllib3.util.parse_url('http://google.com/mail/')) + # Url(scheme='http', host='google.com', port=None, path='/mail/', ...) + + print( urllib3.util.parse_url('google.com:80')) + # Url(scheme=None, host='google.com', port=80, path=None, ...) + + print( urllib3.util.parse_url('/foo?bar')) + # Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) + """ + if not url: + # Empty + return Url() + + source_url = url + if not _SCHEME_RE.search(url): + url = "//" + url + + scheme: str | None + authority: str | None + auth: str | None + host: str | None + port: str | None + port_int: int | None + path: str | None + query: str | None + fragment: str | None + + try: + scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr] + normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES + + if scheme: + scheme = scheme.lower() + + if authority: + auth, _, host_port = authority.rpartition("@") + auth = auth or None + host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr] + if auth and normalize_uri: + auth = _encode_invalid_chars(auth, _USERINFO_CHARS) + if port == "": + port = None + else: + auth, host, port = None, None, None + + if port is not None: + port_int = int(port) + if not (0 <= port_int <= 65535): + raise LocationParseError(url) + else: + port_int = None + + host = _normalize_host(host, scheme) + + if normalize_uri and path: + path = _remove_path_dot_segments(path) + path = _encode_invalid_chars(path, _PATH_CHARS) + if normalize_uri and query: + query = _encode_invalid_chars(query, _QUERY_CHARS) + if normalize_uri and fragment: + fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS) + + except (ValueError, AttributeError) as e: + raise LocationParseError(source_url) from e + + # For the sake of backwards compatibility we put empty + # string values for path if there are any defined values + # beyond the path in the URL. + # TODO: Remove this when we break backwards compatibility. + if not path: + if query is not None or fragment is not None: + path = "" + else: + path = None + + return Url( + scheme=scheme, + auth=auth, + host=host, + port=port_int, + path=path, + query=query, + fragment=fragment, + ) + + +__all__ = ("Url", "parse_url", "HTTPError", "LocationParseError", "LocationValueError") diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py index 35c68cf..8767d9e 100644 --- a/comicapi/genericmetadata.py +++ b/comicapi/genericmetadata.py @@ -31,6 +31,8 @@ from typing_extensions import NamedTuple, Required from comicapi import utils +from ._url import Url, parse_url + logger = logging.getLogger(__name__) @@ -133,7 +135,7 @@ class GenericMetadata: year: int | None = None language: str | None = None # 2 letter iso code country: str | None = None - web_link: str | None = None + web_links: list[Url] = dataclasses.field(default_factory=list) format: str | None = None manga: str | None = None black_and_white: bool | None = None @@ -253,7 +255,7 @@ class GenericMetadata: assign("year", new_md.year) assign("language", new_md.language) assign("country", new_md.country) - assign("web_link", new_md.web_link) + assign("web_links", new_md.web_links) assign("format", new_md.format) assign("manga", new_md.manga) assign("black_and_white", new_md.black_and_white) @@ -487,7 +489,9 @@ md_test: GenericMetadata = GenericMetadata( alternate_count=7, imprint="craphound.com", notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]", - web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/", + web_links=[ + parse_url("https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/") + ], format="Series", manga="No", black_and_white=None, @@ -551,3 +555,15 @@ md_test: GenericMetadata = GenericMetadata( last_mark=None, _cover_image=None, ) + + +__all__ = ( + "Url", + "parse_url", + "PageType", + "ImageMetadata", + "Credit", + "ComicSeries", + "TagOrigin", + "GenericMetadata", +) diff --git a/comicapi/metadata/comicrack.py b/comicapi/metadata/comicrack.py index b9782d6..1b02826 100644 --- a/comicapi/metadata/comicrack.py +++ b/comicapi/metadata/comicrack.py @@ -57,7 +57,7 @@ class ComicRack(Metadata): "month", "year", "language", - "web_link", + "web_links", "format", "manga", "black_and_white", @@ -229,7 +229,7 @@ class ComicRack(Metadata): assign("Month", md.month) assign("Year", md.year) assign("LanguageISO", md.language) - assign("Web", md.web_link) + assign("Web", " ".join(u.url for u in md.web_links)) assign("Format", md.format) assign("Manga", md.manga) assign("BlackAndWhite", "Yes" if md.black_and_white else None) @@ -313,7 +313,7 @@ class ComicRack(Metadata): md.month = utils.xlate_int(get("Month")) md.year = utils.xlate_int(get("Year")) md.language = utils.xlate(get("LanguageISO")) - md.web_link = utils.xlate(get("Web")) + md.web_links = utils.split_urls(utils.xlate(get("Web"))) md.format = utils.xlate(get("Format")) md.manga = utils.xlate(get("Manga")) md.maturity_rating = utils.xlate(get("AgeRating")) diff --git a/comicapi/utils.py b/comicapi/utils.py index cb44cfe..9364c7d 100644 --- a/comicapi/utils.py +++ b/comicapi/utils.py @@ -29,6 +29,8 @@ from typing import Any, TypeVar, cast import comicapi.data from comicapi import filenamelexer, filenameparser +from ._url import Url, parse_url + try: import icu @@ -275,6 +277,24 @@ def split(s: str | None, c: str) -> list[str]: return [] +def split_urls(s: str | None) -> list[Url]: + if s is None: + return [] + # Find occurences of ' http' + if s.count("http") > 1 and s.count(" http") >= 1: + urls = [] + # Split urls out + url_strings = split(s, " http") + # Return the scheme 'http' and parse the url + for i, url_string in enumerate(url_strings): + if not url_string.startswith("http"): + url_string = "http" + url_string + urls.append(parse_url(url_string)) + return urls + else: + return [parse_url(s)] + + def remove_articles(text: str) -> str: text = text.casefold() articles = [ diff --git a/comictaggerlib/cbltransformer.py b/comictaggerlib/cbltransformer.py index f5674ef..0fe3256 100644 --- a/comictaggerlib/cbltransformer.py +++ b/comictaggerlib/cbltransformer.py @@ -78,12 +78,13 @@ class CBLTransformer: self.metadata.description += self.metadata.notes if self.config.Comic_Book_Lover__copy_weblink_to_comments: - if self.metadata.web_link is not None: - if self.metadata.description is None: - self.metadata.description = "" + for web_link in self.metadata.web_links: + temp_desc = self.metadata.description + if temp_desc is None: + temp_desc = "" else: - self.metadata.description += "\n\n" - if self.metadata.web_link not in self.metadata.description: - self.metadata.description += self.metadata.web_link + temp_desc += "\n\n" + if web_link.url and web_link.url not in temp_desc: + self.metadata.description = temp_desc + web_link.url return self.metadata diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 9e31465..4f0f8ec 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -844,7 +844,7 @@ class TaggerWindow(QtWidgets.QMainWindow): assign_text(self.leAltSeries, md.alternate_series) assign_text(self.leAltIssueNum, md.alternate_number) assign_text(self.leAltIssueCount, md.alternate_count) - assign_text(self.leWebLink, md.web_link) + assign_text(self.leWebLink, " ".join(u.url for u in md.web_links)) assign_text(self.teCharacters, "\n".join(md.characters)) assign_text(self.teTeams, "\n".join(md.teams)) assign_text(self.teLocations, "\n".join(md.locations)) @@ -967,7 +967,7 @@ class TaggerWindow(QtWidgets.QMainWindow): md.scan_info = utils.xlate(self.leScanInfo.text()) md.series_groups = utils.split(self.leSeriesGroup.text(), ",") md.alternate_series = self.leAltSeries.text() - md.web_link = utils.xlate(self.leWebLink.text()) + md.web_links = utils.split_urls(utils.xlate(self.leWebLink.text())) md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n")) md.teams = set(utils.split(self.teTeams.toPlainText(), "\n")) md.locations = set(utils.split(self.teLocations.toPlainText(), "\n")) diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py index 7611dd9..813b80a 100644 --- a/comictalker/talkers/comicvine.py +++ b/comictalker/talkers/comicvine.py @@ -29,6 +29,8 @@ import requests import settngs from pyrate_limiter import Limiter, RequestRate from typing_extensions import Required, TypedDict +from urllib3.exceptions import LocationParseError +from urllib3.util import parse_url from comicapi import utils from comicapi.genericmetadata import ComicSeries, GenericMetadata, TagOrigin @@ -643,10 +645,15 @@ class ComicVineTalker(ComicTalker): format=utils.xlate(series.format), volume_count=utils.xlate_int(series.count_of_volumes), title=utils.xlate(issue.get("name")), - web_link=utils.xlate(issue.get("site_detail_url")), series=utils.xlate(series.name), series_aliases=series.aliases, ) + url = utils.xlate(issue.get("site_detail_url")) + if url: + try: + md.web_links = [parse_url(url)] + except LocationParseError: + ... if issue.get("image") is None: md._cover_image = "" else: diff --git a/testing/comicvine.py b/testing/comicvine.py index 1edad87..72e79c7 100644 --- a/testing/comicvine.py +++ b/testing/comicvine.py @@ -185,7 +185,7 @@ comic_issue_result = comicapi.genericmetadata.GenericMetadata( issue=cv_issue_result["results"]["issue_number"], volume=None, title=cv_issue_result["results"]["name"], - web_link=cv_issue_result["results"]["site_detail_url"], + web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])], ) cv_md = comicapi.genericmetadata.GenericMetadata( @@ -213,7 +213,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata( alternate_count=None, imprint=None, notes=None, - web_link=cv_issue_result["results"]["site_detail_url"], + web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])], format=None, manga=None, black_and_white=None, From b2d386948821b1fd8503d4781b18e9a2da57e635 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sat, 17 Feb 2024 17:40:21 -0800 Subject: [PATCH 2/6] Update filerenaming for web_links Ensure the j specifier in MetadataFormatter converts to str before joining Add a web_link variable to the filerenamer --- comictaggerlib/filerenamer.py | 6 +++++- testing/filenames.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/comictaggerlib/filerenamer.py b/comictaggerlib/filerenamer.py index ad52062..1760e19 100644 --- a/comictaggerlib/filerenamer.py +++ b/comictaggerlib/filerenamer.py @@ -69,7 +69,7 @@ class MetadataFormatter(string.Formatter): if conversion == "t": return str(value).title() if conversion == "j": - return ", ".join(list(value)) + return ", ".join(list(str(v) for v in value)) return cast(str, super().convert_field(value, conversion)) def handle_replacements(self, string: str, replacements: list[Replacement]) -> str: @@ -218,6 +218,10 @@ class FileRenamer: fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements) md_dict = vars(md) + md_dict["web_link"] = "" + if md.web_links: + md_dict["web_link"] = md.web_links[0] + md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding) for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]: md_dict[role] = md.get_primary_credit(role) diff --git a/testing/filenames.py b/testing/filenames.py index ad0eb4e..399f2a8 100644 --- a/testing/filenames.py +++ b/testing/filenames.py @@ -1152,6 +1152,13 @@ rnames = [ "Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz", does_not_raise(), ), + ( + "{title} {web_links!j}", # Test that join forces str conversion + False, + "Linux", + "Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz", + does_not_raise(), + ), ( "{series}:{title} #{issue} ({year})", # on windows the ':' is replaced False, From 05423c827031655c8f6141e420a30caea97dda75 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sat, 24 Feb 2024 22:31:45 -0800 Subject: [PATCH 3/6] Use a QListWidget for web_links Fix web_link in md_attributes --- comictaggerlib/taggerwindow.py | 53 ++++++++++++++----- comictaggerlib/ui/taggerwindow.ui | 84 ++++++++++++++++++++++++++++--- 2 files changed, 119 insertions(+), 18 deletions(-) diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 4f0f8ec..5222671 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -27,11 +27,12 @@ import sys import webbrowser from datetime import datetime from typing import Any, Callable -from urllib.parse import urlparse import natsort import settngs +import urllib3.util from PyQt5 import QtCore, QtGui, QtNetwork, QtWidgets, uic +from urllib3.util.url import LocationParseError import comictaggerlib.ui from comicapi import utils @@ -112,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow): "alternate_count": self.leAltIssueCount, "imprint": self.leImprint, "notes": self.teNotes, - "web_link": self.leWebLink, + "web_links": self.leWebLink, "format": self.cbFormat, "manga": self.cbManga, "black_and_white": self.cbBW, @@ -532,6 +533,28 @@ class TaggerWindow(QtWidgets.QMainWindow): self.toolBar.addAction(self.actionPageBrowser) self.toolBar.addAction(self.actionAutoImprint) + self.leWebLink.addAction(self.actionAddWebLink) + self.leWebLink.addAction(self.actionRemoveWebLink) + + self.actionAddWebLink.triggered.connect(self.add_weblink_item) + self.actionRemoveWebLink.triggered.connect(self.remove_weblink_item) + + def add_weblink_item(self, url: str = "") -> None: + item = "" + if isinstance(url, str): + item = url + self.leWebLink.addItem(item) + self.leWebLink.item(self.leWebLink.count() - 1).setFlags( + QtCore.Qt.ItemFlag.ItemIsEditable + | QtCore.Qt.ItemFlag.ItemIsEnabled + | QtCore.Qt.ItemFlag.ItemIsDragEnabled + | QtCore.Qt.ItemFlag.ItemIsSelectable + ) + + def remove_weblink_item(self) -> None: + item = self.leWebLink.takeItem(self.leWebLink.currentRow()) + del item + def repackage_archive(self) -> None: ca_list = self.fileSelectionList.get_selected_archive_list() non_zip_count = 0 @@ -844,7 +867,10 @@ class TaggerWindow(QtWidgets.QMainWindow): assign_text(self.leAltSeries, md.alternate_series) assign_text(self.leAltIssueNum, md.alternate_number) assign_text(self.leAltIssueCount, md.alternate_count) - assign_text(self.leWebLink, " ".join(u.url for u in md.web_links)) + self.leWebLink: QtWidgets.QListWidget + self.leWebLink.clear() + for u in md.web_links: + self.add_weblink_item(u.url) assign_text(self.teCharacters, "\n".join(md.characters)) assign_text(self.teTeams, "\n".join(md.teams)) assign_text(self.teLocations, "\n".join(md.locations)) @@ -967,7 +993,7 @@ class TaggerWindow(QtWidgets.QMainWindow): md.scan_info = utils.xlate(self.leScanInfo.text()) md.series_groups = utils.split(self.leSeriesGroup.text(), ",") md.alternate_series = self.leAltSeries.text() - md.web_links = utils.split_urls(utils.xlate(self.leWebLink.text())) + md.web_links = [urllib3.util.parse_url(self.leWebLink.item(i).text()) for i in range(self.leWebLink.count())] md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n")) md.teams = set(utils.split(self.teTeams.toPlainText(), "\n")) md.locations = set(utils.split(self.teLocations.toPlainText(), "\n")) @@ -1336,14 +1362,17 @@ class TaggerWindow(QtWidgets.QMainWindow): self.set_dirty_flag() def open_web_link(self) -> None: - if self.leWebLink is not None: - web_link = self.leWebLink.text().strip() - try: - result = urlparse(web_link) - all([result.scheme in ["http", "https"], result.netloc]) - webbrowser.open_new_tab(web_link) - except ValueError: - QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid.")) + row = self.leWebLink.currentRow() + if row < 0: + if self.leWebLink.count() < 1: + return + row = 0 + web_link = self.leWebLink.item(row).text() + try: + urllib3.util.parse_url(web_link) + webbrowser.open_new_tab(web_link) + except LocationParseError: + QtWidgets.QMessageBox.warning(self, "Web Link", "Web Link is invalid.") def show_settings(self) -> None: settingswin = SettingsWindow(self, self.config, self.talkers) diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui index 4d253a3..3cc996c 100644 --- a/comictaggerlib/ui/taggerwindow.ui +++ b/comictaggerlib/ui/taggerwindow.ui @@ -952,10 +952,32 @@ - - - - false + + + + Qt::ActionsContextMenu + + + true + + + QAbstractItemView::DropOnly + + + Qt::MoveAction + + + true + + + QAbstractItemView::SingleSelection + + + + + + + Delete Item @@ -975,6 +997,13 @@ + + + + Add Item + + + @@ -1169,7 +1198,7 @@ 0 0 1096 - 28 + 30 @@ -1459,6 +1488,16 @@ Open Folder as Comic + + + Add Item + + + + + Remove Web Link + + @@ -1469,5 +1508,38 @@ - + + + btnWebLinkAdd + clicked() + actionAddWebLink + trigger() + + + 900 + 536 + + + -1 + -1 + + + + + btnWebLinkRemove + clicked() + actionRemoveWebLink + trigger() + + + 900 + 576 + + + -1 + -1 + + + + From 3c3700838b527aeb2a452733ce77eba0716735ca Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sun, 25 Feb 2024 08:26:29 -0800 Subject: [PATCH 4/6] Select item on add and set the dirty flag on change --- comictaggerlib/taggerwindow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index 5222671..cd633ef 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -550,6 +550,7 @@ class TaggerWindow(QtWidgets.QMainWindow): | QtCore.Qt.ItemFlag.ItemIsDragEnabled | QtCore.Qt.ItemFlag.ItemIsSelectable ) + self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True) def remove_weblink_item(self) -> None: item = self.leWebLink.takeItem(self.leWebLink.currentRow()) @@ -807,6 +808,8 @@ class TaggerWindow(QtWidgets.QMainWindow): widget.currentIndexChanged.connect(self.set_dirty_flag) if isinstance(widget, QtWidgets.QCheckBox): widget.stateChanged.connect(self.set_dirty_flag) + if isinstance(widget, QtWidgets.QListWidget): + widget.itemChanged.connect(self.set_dirty_flag) # recursive call on children for child in widget.children(): @@ -867,7 +870,6 @@ class TaggerWindow(QtWidgets.QMainWindow): assign_text(self.leAltSeries, md.alternate_series) assign_text(self.leAltIssueNum, md.alternate_number) assign_text(self.leAltIssueCount, md.alternate_count) - self.leWebLink: QtWidgets.QListWidget self.leWebLink.clear() for u in md.web_links: self.add_weblink_item(u.url) From 163cf4475156d81cb79da391d66c3a08d6f912c0 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Mon, 26 Feb 2024 19:04:33 -0800 Subject: [PATCH 5/6] Open the editor when adding a now web link --- comictaggerlib/taggerwindow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index cd633ef..ba46768 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -551,6 +551,8 @@ class TaggerWindow(QtWidgets.QMainWindow): | QtCore.Qt.ItemFlag.ItemIsSelectable ) self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True) + if not url: + self.leWebLink.editItem(self.leWebLink.item(self.leWebLink.count() - 1)) def remove_weblink_item(self) -> None: item = self.leWebLink.takeItem(self.leWebLink.currentRow()) From ff2547e7f21b2798ccc0c5a8a103bb21b9bef969 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Fri, 1 Mar 2024 15:26:11 -0800 Subject: [PATCH 6/6] Disable buttons for add/remove weblink --- comictaggerlib/taggerwindow.py | 4 ++-- comictaggerlib/ui/qtutils.py | 5 ++++- comictaggerlib/ui/taggerwindow.ui | 8 ++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py index ba46768..edaaff6 100644 --- a/comictaggerlib/taggerwindow.py +++ b/comictaggerlib/taggerwindow.py @@ -113,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow): "alternate_count": self.leAltIssueCount, "imprint": self.leImprint, "notes": self.teNotes, - "web_links": self.leWebLink, + "web_links": (self.leWebLink, self.btnOpenWebLink, self.btnAddWebLink, self.btnRemoveWebLink), "format": self.cbFormat, "manga": self.cbManga, "black_and_white": self.cbBW, @@ -125,7 +125,7 @@ class TaggerWindow(QtWidgets.QMainWindow): "characters": self.teCharacters, "teams": self.teTeams, "locations": self.teLocations, - "credits": [self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit], + "credits": (self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit), "credits.person": 2, "credits.role": 1, "credits.primary": 0, diff --git a/comictaggerlib/ui/qtutils.py b/comictaggerlib/ui/qtutils.py index 7104b9b..23d4e54 100644 --- a/comictaggerlib/ui/qtutils.py +++ b/comictaggerlib/ui/qtutils.py @@ -6,6 +6,7 @@ import io import logging import traceback import webbrowser +from collections.abc import Sequence from PyQt5.QtCore import QUrl from PyQt5.QtWidgets import QWidget @@ -155,7 +156,7 @@ if qt_available: active_palette = None def enable_widget(widget: QtWidgets.QWidget | list[QtWidgets.QWidget], enable: bool) -> None: - if isinstance(widget, list): + if isinstance(widget, Sequence): for w in widget: _enable_widget(w, enable) else: @@ -214,6 +215,8 @@ if qt_available: widget.setReadOnly(True) widget.setPalette(inactive_palette[0]) elif isinstance(widget, QtWidgets.QListWidget): + inactive_palette = palettes() + widget.setPalette(inactive_palette[0]) widget.setMovement(QtWidgets.QListWidget.Static) def replaceWidget( diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui index 3cc996c..b4074dc 100644 --- a/comictaggerlib/ui/taggerwindow.ui +++ b/comictaggerlib/ui/taggerwindow.ui @@ -975,7 +975,7 @@ - + Delete Item @@ -998,7 +998,7 @@ - + Add Item @@ -1510,7 +1510,7 @@ - btnWebLinkAdd + btnAddWebLink clicked() actionAddWebLink trigger() @@ -1526,7 +1526,7 @@ - btnWebLinkRemove + btnRemoveWebLink clicked() actionRemoveWebLink trigger()