diff --git a/comicapi/_url.py b/comicapi/_url.py
new file mode 100644
index 0000000..2278829
--- /dev/null
+++ b/comicapi/_url.py
@@ -0,0 +1,468 @@
+# mypy: disable-error-code="no-redef"
+from __future__ import annotations
+
+try:
+ from urllib3.exceptions import HTTPError, LocationParseError, LocationValueError
+ from urllib3.util import Url, parse_url
+except ImportError:
+
+ import re
+ import typing
+
+ class HTTPError(Exception):
+ """Base exception used by this module."""
+
+ class LocationValueError(ValueError, HTTPError):
+ """Raised when there is something wrong with a given URL input."""
+
+ class LocationParseError(LocationValueError):
+ """Raised when get_host or similar fails to parse the URL input."""
+
+ def __init__(self, location: str) -> None:
+ message = f"Failed to parse: {location}"
+ super().__init__(message)
+
+ self.location = location
+
+ def to_str(x: str | bytes, encoding: str | None = None, errors: str | None = None) -> str:
+ if isinstance(x, str):
+ return x
+ elif not isinstance(x, bytes):
+ raise TypeError(f"not expecting type {type(x).__name__}")
+ if encoding or errors:
+ return x.decode(encoding or "utf-8", errors=errors or "strict")
+ return x.decode()
+
+ # We only want to normalize urls with an HTTP(S) scheme.
+ # urllib3 infers URLs without a scheme (None) to be http.
+ _NORMALIZABLE_SCHEMES = ("http", "https", None)
+
+ # Almost all of these patterns were derived from the
+ # 'rfc3986' module: https://github.com/python-hyper/rfc3986
+ _PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
+ _SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
+ _URI_RE = re.compile(
+ r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" r"(?:\?([^#]*))?" r"(?:#(.*))?$",
+ re.UNICODE | re.DOTALL,
+ )
+
+ _IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
+ _HEX_PAT = "[0-9A-Fa-f]{1,4}"
+ _LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
+ _subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
+ _variations = [
+ # 6( h16 ":" ) ls32
+ "(?:%(hex)s:){6}%(ls32)s",
+ # "::" 5( h16 ":" ) ls32
+ "::(?:%(hex)s:){5}%(ls32)s",
+ # [ h16 ] "::" 4( h16 ":" ) ls32
+ "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
+ # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+ "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
+ # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+ "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
+ # [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
+ "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
+ # [ *4( h16 ":" ) h16 ] "::" ls32
+ "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
+ # [ *5( h16 ":" ) h16 ] "::" h16
+ "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
+ # [ *6( h16 ":" ) h16 ] "::"
+ "(?:(?:%(hex)s:){0,6}%(hex)s)?::",
+ ]
+
+ _UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
+ _IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
+ _ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
+ _IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
+ _REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
+ _TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
+
+ _IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
+ _IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
+ _IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
+ _BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
+ _ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")
+
+ _HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
+ _REG_NAME_PAT,
+ _IPV4_PAT,
+ _IPV6_ADDRZ_PAT,
+ )
+ _HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
+
+ _UNRESERVED_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~")
+ _SUB_DELIM_CHARS = set("!$&'()*+,;=")
+ _USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
+ _PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
+ _QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}
+
+ class Url(
+ typing.NamedTuple(
+ "Url",
+ [
+ ("scheme", typing.Optional[str]),
+ ("auth", typing.Optional[str]),
+ ("host", typing.Optional[str]),
+ ("port", typing.Optional[int]),
+ ("path", typing.Optional[str]),
+ ("query", typing.Optional[str]),
+ ("fragment", typing.Optional[str]),
+ ],
+ )
+ ):
+ """
+ Data structure for representing an HTTP URL. Used as a return value for
+ :func:`parse_url`. Both the scheme and host are normalized as they are
+ both case-insensitive according to RFC 3986.
+ """
+
+ def __new__( # type: ignore[no-untyped-def]
+ cls,
+ scheme: str | None = None,
+ auth: str | None = None,
+ host: str | None = None,
+ port: int | None = None,
+ path: str | None = None,
+ query: str | None = None,
+ fragment: str | None = None,
+ ):
+ if path and not path.startswith("/"):
+ path = "/" + path
+ if scheme is not None:
+ scheme = scheme.lower()
+ return super().__new__(cls, scheme, auth, host, port, path, query, fragment)
+
+ @property
+ def hostname(self) -> str | None:
+ """For backwards-compatibility with urlparse. We're nice like that."""
+ return self.host
+
+ @property
+ def request_uri(self) -> str:
+ """Absolute path including the query string."""
+ uri = self.path or "/"
+
+ if self.query is not None:
+ uri += "?" + self.query
+
+ return uri
+
+ @property
+ def authority(self) -> str | None:
+ """
+ Authority component as defined in RFC 3986 3.2.
+ This includes userinfo (auth), host and port.
+
+ i.e.
+ userinfo@host:port
+ """
+ userinfo = self.auth
+ netloc = self.netloc
+ if netloc is None or userinfo is None:
+ return netloc
+ else:
+ return f"{userinfo}@{netloc}"
+
+ @property
+ def netloc(self) -> str | None:
+ """
+ Network location including host and port.
+
+ If you need the equivalent of urllib.parse's ``netloc``,
+ use the ``authority`` property instead.
+ """
+ if self.host is None:
+ return None
+ if self.port:
+ return f"{self.host}:{self.port}"
+ return self.host
+
+ @property
+ def url(self) -> str:
+ """
+ Convert self into a url
+
+ This function should more or less round-trip with :func:`.parse_url`. The
+ returned url may not be exactly the same as the url inputted to
+ :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+ with a blank port will have : removed).
+
+ Example:
+
+ .. code-block:: python
+
+ import urllib3
+
+ U = urllib3.util.parse_url("https://google.com/mail/")
+
+ print(U.url)
+ # "https://google.com/mail/"
+
+ print( urllib3.util.Url("https", "username:password",
+ "host.com", 80, "/path", "query", "fragment"
+ ).url
+ )
+ # "https://username:password@host.com:80/path?query#fragment"
+ """
+ scheme, auth, host, port, path, query, fragment = self
+ url = ""
+
+ # We use "is not None" we want things to happen with empty strings (or 0 port)
+ if scheme is not None:
+ url += scheme + "://"
+ if auth is not None:
+ url += auth + "@"
+ if host is not None:
+ url += host
+ if port is not None:
+ url += ":" + str(port)
+ if path is not None:
+ url += path
+ if query is not None:
+ url += "?" + query
+ if fragment is not None:
+ url += "#" + fragment
+
+ return url
+
+ def __str__(self) -> str:
+ return self.url
+
+ @typing.overload
+ def _encode_invalid_chars(component: str, allowed_chars: typing.Container[str]) -> str: # Abstract
+ ...
+
+ @typing.overload
+ def _encode_invalid_chars(component: None, allowed_chars: typing.Container[str]) -> None: # Abstract
+ ...
+
+ def _encode_invalid_chars(component: str | None, allowed_chars: typing.Container[str]) -> str | None:
+ """Percent-encodes a URI component without reapplying
+ onto an already percent-encoded component.
+ """
+ if component is None:
+ return component
+
+ component = to_str(component)
+
+ # Normalize existing percent-encoded bytes.
+ # Try to see if the component we're encoding is already percent-encoded
+ # so we can skip all '%' characters but still encode all others.
+ component, percent_encodings = _PERCENT_RE.subn(lambda match: match.group(0).upper(), component)
+
+ uri_bytes = component.encode("utf-8", "surrogatepass")
+ is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
+ encoded_component = bytearray()
+
+ for i in range(0, len(uri_bytes)):
+ # Will return a single character bytestring
+ byte = uri_bytes[i : i + 1]
+ byte_ord = ord(byte)
+ if (is_percent_encoded and byte == b"%") or (byte_ord < 128 and byte.decode() in allowed_chars):
+ encoded_component += byte
+ continue
+ encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
+
+ return encoded_component.decode()
+
+ def _remove_path_dot_segments(path: str) -> str:
+ # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
+ segments = path.split("/") # Turn the path into a list of segments
+ output = [] # Initialize the variable to use to store output
+
+ for segment in segments:
+ # '.' is the current directory, so ignore it, it is superfluous
+ if segment == ".":
+ continue
+ # Anything other than '..', should be appended to the output
+ if segment != "..":
+ output.append(segment)
+ # In this case segment == '..', if we can, we should pop the last
+ # element
+ elif output:
+ output.pop()
+
+ # If the path starts with '/' and the output is empty or the first string
+ # is non-empty
+ if path.startswith("/") and (not output or output[0]):
+ output.insert(0, "")
+
+ # If the path starts with '/.' or '/..' ensure we add one more empty
+ # string to add a trailing '/'
+ if path.endswith(("/.", "/..")):
+ output.append("")
+
+ return "/".join(output)
+
+ @typing.overload
+ def _normalize_host(host: None, scheme: str | None) -> None: ...
+
+ @typing.overload
+ def _normalize_host(host: str, scheme: str | None) -> str: ...
+
+ def _normalize_host(host: str | None, scheme: str | None) -> str | None:
+ if host:
+ if scheme in _NORMALIZABLE_SCHEMES:
+ is_ipv6 = _IPV6_ADDRZ_RE.match(host)
+ if is_ipv6:
+ # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
+ # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
+ # separator as necessary to return a valid RFC 4007 scoped IP.
+ match = _ZONE_ID_RE.search(host)
+ if match:
+ start, end = match.span(1)
+ zone_id = host[start:end]
+
+ if zone_id.startswith("%25") and zone_id != "%25":
+ zone_id = zone_id[3:]
+ else:
+ zone_id = zone_id[1:]
+ zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
+ return f"{host[:start].lower()}%{zone_id}{host[end:]}"
+ else:
+ return host.lower()
+ elif not _IPV4_RE.match(host):
+ return to_str(
+ b".".join([_idna_encode(label) for label in host.split(".")]),
+ "ascii",
+ )
+ return host
+
+ def _idna_encode(name: str) -> bytes:
+ if not name.isascii():
+ try:
+ import idna
+ except ImportError:
+ raise LocationParseError("Unable to parse URL without the 'idna' module") from None
+
+ try:
+ return idna.encode(name.lower(), strict=True, std3_rules=True)
+ except idna.IDNAError:
+ raise LocationParseError(f"Name '{name}' is not a valid IDNA label") from None
+
+ return name.lower().encode("ascii")
+
+ def _encode_target(target: str) -> str:
+ """Percent-encodes a request target so that there are no invalid characters
+
+ Pre-condition for this function is that 'target' must start with '/'.
+ If that is the case then _TARGET_RE will always produce a match.
+ """
+ match = _TARGET_RE.match(target)
+ if not match: # Defensive:
+ raise LocationParseError(f"{target!r} is not a valid request URI")
+
+ path, query = match.groups()
+ encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
+ if query is not None:
+ query = _encode_invalid_chars(query, _QUERY_CHARS)
+ encoded_target += "?" + query
+ return encoded_target
+
+ def parse_url(url: str) -> Url:
+ """
+ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+ performed to parse incomplete urls. Fields not provided will be None.
+ This parser is RFC 3986 and RFC 6874 compliant.
+
+ The parser logic and helper functions are based heavily on
+ work done in the ``rfc3986`` module.
+
+ :param str url: URL to parse into a :class:`.Url` namedtuple.
+
+ Partly backwards-compatible with :mod:`urllib.parse`.
+
+ Example:
+
+ .. code-block:: python
+
+ import urllib3
+
+ print( urllib3.util.parse_url('http://google.com/mail/'))
+ # Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+
+ print( urllib3.util.parse_url('google.com:80'))
+ # Url(scheme=None, host='google.com', port=80, path=None, ...)
+
+ print( urllib3.util.parse_url('/foo?bar'))
+ # Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+ """
+ if not url:
+ # Empty
+ return Url()
+
+ source_url = url
+ if not _SCHEME_RE.search(url):
+ url = "//" + url
+
+ scheme: str | None
+ authority: str | None
+ auth: str | None
+ host: str | None
+ port: str | None
+ port_int: int | None
+ path: str | None
+ query: str | None
+ fragment: str | None
+
+ try:
+ scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr]
+ normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES
+
+ if scheme:
+ scheme = scheme.lower()
+
+ if authority:
+ auth, _, host_port = authority.rpartition("@")
+ auth = auth or None
+ host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr]
+ if auth and normalize_uri:
+ auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
+ if port == "":
+ port = None
+ else:
+ auth, host, port = None, None, None
+
+ if port is not None:
+ port_int = int(port)
+ if not (0 <= port_int <= 65535):
+ raise LocationParseError(url)
+ else:
+ port_int = None
+
+ host = _normalize_host(host, scheme)
+
+ if normalize_uri and path:
+ path = _remove_path_dot_segments(path)
+ path = _encode_invalid_chars(path, _PATH_CHARS)
+ if normalize_uri and query:
+ query = _encode_invalid_chars(query, _QUERY_CHARS)
+ if normalize_uri and fragment:
+ fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)
+
+ except (ValueError, AttributeError) as e:
+ raise LocationParseError(source_url) from e
+
+ # For the sake of backwards compatibility we put empty
+ # string values for path if there are any defined values
+ # beyond the path in the URL.
+ # TODO: Remove this when we break backwards compatibility.
+ if not path:
+ if query is not None or fragment is not None:
+ path = ""
+ else:
+ path = None
+
+ return Url(
+ scheme=scheme,
+ auth=auth,
+ host=host,
+ port=port_int,
+ path=path,
+ query=query,
+ fragment=fragment,
+ )
+
+
+__all__ = ("Url", "parse_url", "HTTPError", "LocationParseError", "LocationValueError")
diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py
index d5adb8e..74b0333 100644
--- a/comicapi/genericmetadata.py
+++ b/comicapi/genericmetadata.py
@@ -31,6 +31,8 @@ from typing_extensions import NamedTuple, Required
from comicapi import utils
+from ._url import Url, parse_url
+
logger = logging.getLogger(__name__)
@@ -133,7 +135,7 @@ class GenericMetadata:
year: int | None = None
language: str | None = None # 2 letter iso code
country: str | None = None
- web_link: str | None = None
+ web_links: list[Url] = dataclasses.field(default_factory=list)
format: str | None = None
manga: str | None = None
black_and_white: bool | None = None
@@ -253,7 +255,7 @@ class GenericMetadata:
assign("year", new_md.year)
assign("language", new_md.language)
assign("country", new_md.country)
- assign("web_link", new_md.web_link)
+ assign("web_links", new_md.web_links)
assign("format", new_md.format)
assign("manga", new_md.manga)
assign("black_and_white", new_md.black_and_white)
@@ -487,7 +489,9 @@ md_test: GenericMetadata = GenericMetadata(
alternate_count=7,
imprint="craphound.com",
notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]",
- web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/",
+ web_links=[
+ parse_url("https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/")
+ ],
format="Series",
manga="No",
black_and_white=None,
@@ -551,3 +555,15 @@ md_test: GenericMetadata = GenericMetadata(
last_mark=None,
_cover_image=None,
)
+
+
+__all__ = (
+ "Url",
+ "parse_url",
+ "PageType",
+ "ImageMetadata",
+ "Credit",
+ "ComicSeries",
+ "TagOrigin",
+ "GenericMetadata",
+)
diff --git a/comicapi/metadata/comicrack.py b/comicapi/metadata/comicrack.py
index b9782d6..1b02826 100644
--- a/comicapi/metadata/comicrack.py
+++ b/comicapi/metadata/comicrack.py
@@ -57,7 +57,7 @@ class ComicRack(Metadata):
"month",
"year",
"language",
- "web_link",
+ "web_links",
"format",
"manga",
"black_and_white",
@@ -229,7 +229,7 @@ class ComicRack(Metadata):
assign("Month", md.month)
assign("Year", md.year)
assign("LanguageISO", md.language)
- assign("Web", md.web_link)
+ assign("Web", " ".join(u.url for u in md.web_links))
assign("Format", md.format)
assign("Manga", md.manga)
assign("BlackAndWhite", "Yes" if md.black_and_white else None)
@@ -313,7 +313,7 @@ class ComicRack(Metadata):
md.month = utils.xlate_int(get("Month"))
md.year = utils.xlate_int(get("Year"))
md.language = utils.xlate(get("LanguageISO"))
- md.web_link = utils.xlate(get("Web"))
+ md.web_links = utils.split_urls(utils.xlate(get("Web")))
md.format = utils.xlate(get("Format"))
md.manga = utils.xlate(get("Manga"))
md.maturity_rating = utils.xlate(get("AgeRating"))
diff --git a/comicapi/utils.py b/comicapi/utils.py
index 4af4f48..b7fdb4d 100644
--- a/comicapi/utils.py
+++ b/comicapi/utils.py
@@ -33,6 +33,8 @@ from comicfn2dict import comicfn2dict
import comicapi.data
from comicapi import filenamelexer, filenameparser
+from ._url import Url, parse_url
+
try:
import icu
@@ -370,6 +372,24 @@ def split(s: str | None, c: str) -> list[str]:
return []
+def split_urls(s: str | None) -> list[Url]:
+ if s is None:
+ return []
+ # Find occurences of ' http'
+ if s.count("http") > 1 and s.count(" http") >= 1:
+ urls = []
+ # Split urls out
+ url_strings = split(s, " http")
+ # Return the scheme 'http' and parse the url
+ for i, url_string in enumerate(url_strings):
+ if not url_string.startswith("http"):
+ url_string = "http" + url_string
+ urls.append(parse_url(url_string))
+ return urls
+ else:
+ return [parse_url(s)]
+
+
def remove_articles(text: str) -> str:
text = text.casefold()
articles = [
diff --git a/comictaggerlib/cbltransformer.py b/comictaggerlib/cbltransformer.py
index f5674ef..0fe3256 100644
--- a/comictaggerlib/cbltransformer.py
+++ b/comictaggerlib/cbltransformer.py
@@ -78,12 +78,13 @@ class CBLTransformer:
self.metadata.description += self.metadata.notes
if self.config.Comic_Book_Lover__copy_weblink_to_comments:
- if self.metadata.web_link is not None:
- if self.metadata.description is None:
- self.metadata.description = ""
+ for web_link in self.metadata.web_links:
+ temp_desc = self.metadata.description
+ if temp_desc is None:
+ temp_desc = ""
else:
- self.metadata.description += "\n\n"
- if self.metadata.web_link not in self.metadata.description:
- self.metadata.description += self.metadata.web_link
+ temp_desc += "\n\n"
+ if web_link.url and web_link.url not in temp_desc:
+ self.metadata.description = temp_desc + web_link.url
return self.metadata
diff --git a/comictaggerlib/filerenamer.py b/comictaggerlib/filerenamer.py
index ad52062..1760e19 100644
--- a/comictaggerlib/filerenamer.py
+++ b/comictaggerlib/filerenamer.py
@@ -69,7 +69,7 @@ class MetadataFormatter(string.Formatter):
if conversion == "t":
return str(value).title()
if conversion == "j":
- return ", ".join(list(value))
+ return ", ".join(list(str(v) for v in value))
return cast(str, super().convert_field(value, conversion))
def handle_replacements(self, string: str, replacements: list[Replacement]) -> str:
@@ -218,6 +218,10 @@ class FileRenamer:
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements)
md_dict = vars(md)
+ md_dict["web_link"] = ""
+ if md.web_links:
+ md_dict["web_link"] = md.web_links[0]
+
md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
md_dict[role] = md.get_primary_credit(role)
diff --git a/comictaggerlib/taggerwindow.py b/comictaggerlib/taggerwindow.py
index ca968c4..a74e95a 100644
--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@@ -27,11 +27,12 @@ import sys
import webbrowser
from datetime import datetime
from typing import Any, Callable
-from urllib.parse import urlparse
import natsort
import settngs
+import urllib3.util
from PyQt5 import QtCore, QtGui, QtNetwork, QtWidgets, uic
+from urllib3.util.url import LocationParseError
import comictaggerlib.ui
from comicapi import utils
@@ -112,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
"alternate_count": self.leAltIssueCount,
"imprint": self.leImprint,
"notes": self.teNotes,
- "web_link": self.leWebLink,
+ "web_links": (self.leWebLink, self.btnOpenWebLink, self.btnAddWebLink, self.btnRemoveWebLink),
"format": self.cbFormat,
"manga": self.cbManga,
"black_and_white": self.cbBW,
@@ -124,7 +125,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
"characters": self.teCharacters,
"teams": self.teTeams,
"locations": self.teLocations,
- "credits": [self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit],
+ "credits": (self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit),
"credits.person": 2,
"credits.role": 1,
"credits.primary": 0,
@@ -532,6 +533,31 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.toolBar.addAction(self.actionPageBrowser)
self.toolBar.addAction(self.actionAutoImprint)
+ self.leWebLink.addAction(self.actionAddWebLink)
+ self.leWebLink.addAction(self.actionRemoveWebLink)
+
+ self.actionAddWebLink.triggered.connect(self.add_weblink_item)
+ self.actionRemoveWebLink.triggered.connect(self.remove_weblink_item)
+
+ def add_weblink_item(self, url: str = "") -> None:
+ item = ""
+ if isinstance(url, str):
+ item = url
+ self.leWebLink.addItem(item)
+ self.leWebLink.item(self.leWebLink.count() - 1).setFlags(
+ QtCore.Qt.ItemFlag.ItemIsEditable
+ | QtCore.Qt.ItemFlag.ItemIsEnabled
+ | QtCore.Qt.ItemFlag.ItemIsDragEnabled
+ | QtCore.Qt.ItemFlag.ItemIsSelectable
+ )
+ self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True)
+ if not url:
+ self.leWebLink.editItem(self.leWebLink.item(self.leWebLink.count() - 1))
+
+ def remove_weblink_item(self) -> None:
+ item = self.leWebLink.takeItem(self.leWebLink.currentRow())
+ del item
+
def repackage_archive(self) -> None:
ca_list = self.fileSelectionList.get_selected_archive_list()
non_zip_count = 0
@@ -784,6 +810,8 @@ class TaggerWindow(QtWidgets.QMainWindow):
widget.currentIndexChanged.connect(self.set_dirty_flag)
if isinstance(widget, QtWidgets.QCheckBox):
widget.stateChanged.connect(self.set_dirty_flag)
+ if isinstance(widget, QtWidgets.QListWidget):
+ widget.itemChanged.connect(self.set_dirty_flag)
# recursive call on children
for child in widget.children():
@@ -844,7 +872,9 @@ class TaggerWindow(QtWidgets.QMainWindow):
assign_text(self.leAltSeries, md.alternate_series)
assign_text(self.leAltIssueNum, md.alternate_number)
assign_text(self.leAltIssueCount, md.alternate_count)
- assign_text(self.leWebLink, md.web_link)
+ self.leWebLink.clear()
+ for u in md.web_links:
+ self.add_weblink_item(u.url)
assign_text(self.teCharacters, "\n".join(md.characters))
assign_text(self.teTeams, "\n".join(md.teams))
assign_text(self.teLocations, "\n".join(md.locations))
@@ -967,7 +997,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
md.scan_info = utils.xlate(self.leScanInfo.text())
md.series_groups = utils.split(self.leSeriesGroup.text(), ",")
md.alternate_series = self.leAltSeries.text()
- md.web_link = utils.xlate(self.leWebLink.text())
+ md.web_links = [urllib3.util.parse_url(self.leWebLink.item(i).text()) for i in range(self.leWebLink.count())]
md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n"))
md.teams = set(utils.split(self.teTeams.toPlainText(), "\n"))
md.locations = set(utils.split(self.teLocations.toPlainText(), "\n"))
@@ -1343,14 +1373,17 @@ class TaggerWindow(QtWidgets.QMainWindow):
self.set_dirty_flag()
def open_web_link(self) -> None:
- if self.leWebLink is not None:
- web_link = self.leWebLink.text().strip()
- try:
- result = urlparse(web_link)
- all([result.scheme in ["http", "https"], result.netloc])
- webbrowser.open_new_tab(web_link)
- except ValueError:
- QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid."))
+ row = self.leWebLink.currentRow()
+ if row < 0:
+ if self.leWebLink.count() < 1:
+ return
+ row = 0
+ web_link = self.leWebLink.item(row).text()
+ try:
+ urllib3.util.parse_url(web_link)
+ webbrowser.open_new_tab(web_link)
+ except LocationParseError:
+ QtWidgets.QMessageBox.warning(self, "Web Link", "Web Link is invalid.")
def show_settings(self) -> None:
settingswin = SettingsWindow(self, self.config, self.talkers)
diff --git a/comictaggerlib/ui/qtutils.py b/comictaggerlib/ui/qtutils.py
index 7104b9b..23d4e54 100644
--- a/comictaggerlib/ui/qtutils.py
+++ b/comictaggerlib/ui/qtutils.py
@@ -6,6 +6,7 @@ import io
import logging
import traceback
import webbrowser
+from collections.abc import Sequence
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QWidget
@@ -155,7 +156,7 @@ if qt_available:
active_palette = None
def enable_widget(widget: QtWidgets.QWidget | list[QtWidgets.QWidget], enable: bool) -> None:
- if isinstance(widget, list):
+ if isinstance(widget, Sequence):
for w in widget:
_enable_widget(w, enable)
else:
@@ -214,6 +215,8 @@ if qt_available:
widget.setReadOnly(True)
widget.setPalette(inactive_palette[0])
elif isinstance(widget, QtWidgets.QListWidget):
+ inactive_palette = palettes()
+ widget.setPalette(inactive_palette[0])
widget.setMovement(QtWidgets.QListWidget.Static)
def replaceWidget(
diff --git a/comictaggerlib/ui/taggerwindow.ui b/comictaggerlib/ui/taggerwindow.ui
index ad7fdbd..cdf9bd3 100644
--- a/comictaggerlib/ui/taggerwindow.ui
+++ b/comictaggerlib/ui/taggerwindow.ui
@@ -958,10 +958,32 @@
-
-
-
-
-
- false
+
-
+
+
+ Qt::ActionsContextMenu
+
+
+ true
+
+
+ QAbstractItemView::DropOnly
+
+
+ Qt::MoveAction
+
+
+ true
+
+
+ QAbstractItemView::SingleSelection
+
+
+
+ -
+
+
+ Delete Item
@@ -981,6 +1003,13 @@
+ -
+
+
+ Add Item
+
+
+
-
@@ -1187,7 +1216,7 @@
0
0
1096
- 28
+ 30
@@ -1487,5 +1526,38 @@
-
+
+
+ btnAddWebLink
+ clicked()
+ actionAddWebLink
+ trigger()
+
+
+ 900
+ 536
+
+
+ -1
+ -1
+
+
+
+
+ btnRemoveWebLink
+ clicked()
+ actionRemoveWebLink
+ trigger()
+
+
+ 900
+ 576
+
+
+ -1
+ -1
+
+
+
+
diff --git a/comictalker/talkers/comicvine.py b/comictalker/talkers/comicvine.py
index 013adef..0a666f5 100644
--- a/comictalker/talkers/comicvine.py
+++ b/comictalker/talkers/comicvine.py
@@ -29,6 +29,8 @@ import requests
import settngs
from pyrate_limiter import Limiter, RequestRate
from typing_extensions import Required, TypedDict
+from urllib3.exceptions import LocationParseError
+from urllib3.util import parse_url
from comicapi import utils
from comicapi.genericmetadata import ComicSeries, GenericMetadata, TagOrigin
@@ -643,10 +645,15 @@ class ComicVineTalker(ComicTalker):
format=utils.xlate(series.format),
volume_count=utils.xlate_int(series.count_of_volumes),
title=utils.xlate(issue.get("name")),
- web_link=utils.xlate(issue.get("site_detail_url")),
series=utils.xlate(series.name),
series_aliases=series.aliases,
)
+ url = utils.xlate(issue.get("site_detail_url"))
+ if url:
+ try:
+ md.web_links = [parse_url(url)]
+ except LocationParseError:
+ ...
if issue.get("image") is None:
md._cover_image = ""
else:
diff --git a/testing/comicvine.py b/testing/comicvine.py
index e7265b1..72acfc6 100644
--- a/testing/comicvine.py
+++ b/testing/comicvine.py
@@ -185,7 +185,7 @@ comic_issue_result = comicapi.genericmetadata.GenericMetadata(
issue=cv_issue_result["results"]["issue_number"],
volume=None,
title=cv_issue_result["results"]["name"],
- web_link=cv_issue_result["results"]["site_detail_url"],
+ web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
)
cv_md = comicapi.genericmetadata.GenericMetadata(
@@ -213,7 +213,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
alternate_count=None,
imprint=None,
notes=None,
- web_link=cv_issue_result["results"]["site_detail_url"],
+ web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
format=None,
manga=None,
black_and_white=None,
diff --git a/testing/filenames.py b/testing/filenames.py
index ad0eb4e..399f2a8 100644
--- a/testing/filenames.py
+++ b/testing/filenames.py
@@ -1152,6 +1152,13 @@ rnames = [
"Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
does_not_raise(),
),
+ (
+ "{title} {web_links!j}", # Test that join forces str conversion
+ False,
+ "Linux",
+ "Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
+ does_not_raise(),
+ ),
(
"{series}:{title} #{issue} ({year})", # on windows the ':' is replaced
False,