Merge branch 'web-links' into develop

2024-03-09 13:42:29 -08:00 · 2024-03-09 13:42:29 -08:00 · 3df263858d
commit 3df263858d
parent b45c39043b ff2547e7f2
12 changed files with 667 additions and 36 deletions
--- a/comicapi/_url.py
+++ b/comicapi/_url.py
@ -0,0 +1,468 @@
+# mypy: disable-error-code="no-redef"
+from __future__ import annotations
+
+try:
+    from urllib3.exceptions import HTTPError, LocationParseError, LocationValueError
+    from urllib3.util import Url, parse_url
+except ImportError:
+
+    import re
+    import typing
+
+    class HTTPError(Exception):
+        """Base exception used by this module."""
+
+    class LocationValueError(ValueError, HTTPError):
+        """Raised when there is something wrong with a given URL input."""
+
+    class LocationParseError(LocationValueError):
+        """Raised when get_host or similar fails to parse the URL input."""
+
+        def __init__(self, location: str) -> None:
+            message = f"Failed to parse: {location}"
+            super().__init__(message)
+
+            self.location = location
+
+    def to_str(x: str | bytes, encoding: str | None = None, errors: str | None = None) -> str:
+        if isinstance(x, str):
+            return x
+        elif not isinstance(x, bytes):
+            raise TypeError(f"not expecting type {type(x).__name__}")
+        if encoding or errors:
+            return x.decode(encoding or "utf-8", errors=errors or "strict")
+        return x.decode()
+
+    # We only want to normalize urls with an HTTP(S) scheme.
+    # urllib3 infers URLs without a scheme (None) to be http.
+    _NORMALIZABLE_SCHEMES = ("http", "https", None)
+
+    # Almost all of these patterns were derived from the
+    # 'rfc3986' module: https://github.com/python-hyper/rfc3986
+    _PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
+    _SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
+    _URI_RE = re.compile(
+        r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" r"(?:\?([^#]*))?" r"(?:#(.*))?$",
+        re.UNICODE | re.DOTALL,
+    )
+
+    _IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
+    _HEX_PAT = "[0-9A-Fa-f]{1,4}"
+    _LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
+    _subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
+    _variations = [
+        #                            6( h16 ":" ) ls32
+        "(?:%(hex)s:){6}%(ls32)s",
+        #                       "::" 5( h16 ":" ) ls32
+        "::(?:%(hex)s:){5}%(ls32)s",
+        # [               h16 ] "::" 4( h16 ":" ) ls32
+        "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
+        # [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
+        "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
+        # [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
+        "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
+        # [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
+        "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
+        # [ *4( h16 ":" ) h16 ] "::"              ls32
+        "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
+        # [ *5( h16 ":" ) h16 ] "::"              h16
+        "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
+        # [ *6( h16 ":" ) h16 ] "::"
+        "(?:(?:%(hex)s:){0,6}%(hex)s)?::",
+    ]
+
+    _UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
+    _IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
+    _ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
+    _IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
+    _REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
+    _TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
+
+    _IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
+    _IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
+    _IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
+    _BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
+    _ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")
+
+    _HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
+        _REG_NAME_PAT,
+        _IPV4_PAT,
+        _IPV6_ADDRZ_PAT,
+    )
+    _HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
+
+    _UNRESERVED_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~")
+    _SUB_DELIM_CHARS = set("!$&'()*+,;=")
+    _USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
+    _PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
+    _QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}
+
+    class Url(
+        typing.NamedTuple(
+            "Url",
+            [
+                ("scheme", typing.Optional[str]),
+                ("auth", typing.Optional[str]),
+                ("host", typing.Optional[str]),
+                ("port", typing.Optional[int]),
+                ("path", typing.Optional[str]),
+                ("query", typing.Optional[str]),
+                ("fragment", typing.Optional[str]),
+            ],
+        )
+    ):
+        """
+        Data structure for representing an HTTP URL. Used as a return value for
+        :func:`parse_url`. Both the scheme and host are normalized as they are
+        both case-insensitive according to RFC 3986.
+        """
+
+        def __new__(  # type: ignore[no-untyped-def]
+            cls,
+            scheme: str | None = None,
+            auth: str | None = None,
+            host: str | None = None,
+            port: int | None = None,
+            path: str | None = None,
+            query: str | None = None,
+            fragment: str | None = None,
+        ):
+            if path and not path.startswith("/"):
+                path = "/" + path
+            if scheme is not None:
+                scheme = scheme.lower()
+            return super().__new__(cls, scheme, auth, host, port, path, query, fragment)
+
+        @property
+        def hostname(self) -> str | None:
+            """For backwards-compatibility with urlparse. We're nice like that."""
+            return self.host
+
+        @property
+        def request_uri(self) -> str:
+            """Absolute path including the query string."""
+            uri = self.path or "/"
+
+            if self.query is not None:
+                uri += "?" + self.query
+
+            return uri
+
+        @property
+        def authority(self) -> str | None:
+            """
+            Authority component as defined in RFC 3986 3.2.
+            This includes userinfo (auth), host and port.
+
+            i.e.
+                userinfo@host:port
+            """
+            userinfo = self.auth
+            netloc = self.netloc
+            if netloc is None or userinfo is None:
+                return netloc
+            else:
+                return f"{userinfo}@{netloc}"
+
+        @property
+        def netloc(self) -> str | None:
+            """
+            Network location including host and port.
+
+            If you need the equivalent of urllib.parse's ``netloc``,
+            use the ``authority`` property instead.
+            """
+            if self.host is None:
+                return None
+            if self.port:
+                return f"{self.host}:{self.port}"
+            return self.host
+
+        @property
+        def url(self) -> str:
+            """
+            Convert self into a url
+
+            This function should more or less round-trip with :func:`.parse_url`. The
+            returned url may not be exactly the same as the url inputted to
+            :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+            with a blank port will have : removed).
+
+            Example:
+
+            .. code-block:: python
+
+                import urllib3
+
+                U = urllib3.util.parse_url("https://google.com/mail/")
+
+                print(U.url)
+                # "https://google.com/mail/"
+
+                print( urllib3.util.Url("https", "username:password",
+                                        "host.com", 80, "/path", "query", "fragment"
+                                        ).url
+                    )
+                # "https://username:password@host.com:80/path?query#fragment"
+            """
+            scheme, auth, host, port, path, query, fragment = self
+            url = ""
+
+            # We use "is not None" we want things to happen with empty strings (or 0 port)
+            if scheme is not None:
+                url += scheme + "://"
+            if auth is not None:
+                url += auth + "@"
+            if host is not None:
+                url += host
+            if port is not None:
+                url += ":" + str(port)
+            if path is not None:
+                url += path
+            if query is not None:
+                url += "?" + query
+            if fragment is not None:
+                url += "#" + fragment
+
+            return url
+
+        def __str__(self) -> str:
+            return self.url
+
+    @typing.overload
+    def _encode_invalid_chars(component: str, allowed_chars: typing.Container[str]) -> str:  # Abstract
+        ...
+
+    @typing.overload
+    def _encode_invalid_chars(component: None, allowed_chars: typing.Container[str]) -> None:  # Abstract
+        ...
+
+    def _encode_invalid_chars(component: str | None, allowed_chars: typing.Container[str]) -> str | None:
+        """Percent-encodes a URI component without reapplying
+        onto an already percent-encoded component.
+        """
+        if component is None:
+            return component
+
+        component = to_str(component)
+
+        # Normalize existing percent-encoded bytes.
+        # Try to see if the component we're encoding is already percent-encoded
+        # so we can skip all '%' characters but still encode all others.
+        component, percent_encodings = _PERCENT_RE.subn(lambda match: match.group(0).upper(), component)
+
+        uri_bytes = component.encode("utf-8", "surrogatepass")
+        is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
+        encoded_component = bytearray()
+
+        for i in range(0, len(uri_bytes)):
+            # Will return a single character bytestring
+            byte = uri_bytes[i : i + 1]
+            byte_ord = ord(byte)
+            if (is_percent_encoded and byte == b"%") or (byte_ord < 128 and byte.decode() in allowed_chars):
+                encoded_component += byte
+                continue
+            encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
+
+        return encoded_component.decode()
+
+    def _remove_path_dot_segments(path: str) -> str:
+        # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
+        segments = path.split("/")  # Turn the path into a list of segments
+        output = []  # Initialize the variable to use to store output
+
+        for segment in segments:
+            # '.' is the current directory, so ignore it, it is superfluous
+            if segment == ".":
+                continue
+            # Anything other than '..', should be appended to the output
+            if segment != "..":
+                output.append(segment)
+            # In this case segment == '..', if we can, we should pop the last
+            # element
+            elif output:
+                output.pop()
+
+        # If the path starts with '/' and the output is empty or the first string
+        # is non-empty
+        if path.startswith("/") and (not output or output[0]):
+            output.insert(0, "")
+
+        # If the path starts with '/.' or '/..' ensure we add one more empty
+        # string to add a trailing '/'
+        if path.endswith(("/.", "/..")):
+            output.append("")
+
+        return "/".join(output)
+
+    @typing.overload
+    def _normalize_host(host: None, scheme: str | None) -> None: ...
+
+    @typing.overload
+    def _normalize_host(host: str, scheme: str | None) -> str: ...
+
+    def _normalize_host(host: str | None, scheme: str | None) -> str | None:
+        if host:
+            if scheme in _NORMALIZABLE_SCHEMES:
+                is_ipv6 = _IPV6_ADDRZ_RE.match(host)
+                if is_ipv6:
+                    # IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
+                    # such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
+                    # separator as necessary to return a valid RFC 4007 scoped IP.
+                    match = _ZONE_ID_RE.search(host)
+                    if match:
+                        start, end = match.span(1)
+                        zone_id = host[start:end]
+
+                        if zone_id.startswith("%25") and zone_id != "%25":
+                            zone_id = zone_id[3:]
+                        else:
+                            zone_id = zone_id[1:]
+                        zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
+                        return f"{host[:start].lower()}%{zone_id}{host[end:]}"
+                    else:
+                        return host.lower()
+                elif not _IPV4_RE.match(host):
+                    return to_str(
+                        b".".join([_idna_encode(label) for label in host.split(".")]),
+                        "ascii",
+                    )
+        return host
+
+    def _idna_encode(name: str) -> bytes:
+        if not name.isascii():
+            try:
+                import idna
+            except ImportError:
+                raise LocationParseError("Unable to parse URL without the 'idna' module") from None
+
+            try:
+                return idna.encode(name.lower(), strict=True, std3_rules=True)
+            except idna.IDNAError:
+                raise LocationParseError(f"Name '{name}' is not a valid IDNA label") from None
+
+        return name.lower().encode("ascii")
+
+    def _encode_target(target: str) -> str:
+        """Percent-encodes a request target so that there are no invalid characters
+
+        Pre-condition for this function is that 'target' must start with '/'.
+        If that is the case then _TARGET_RE will always produce a match.
+        """
+        match = _TARGET_RE.match(target)
+        if not match:  # Defensive:
+            raise LocationParseError(f"{target!r} is not a valid request URI")
+
+        path, query = match.groups()
+        encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
+        if query is not None:
+            query = _encode_invalid_chars(query, _QUERY_CHARS)
+            encoded_target += "?" + query
+        return encoded_target
+
+    def parse_url(url: str) -> Url:
+        """
+        Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+        performed to parse incomplete urls. Fields not provided will be None.
+        This parser is RFC 3986 and RFC 6874 compliant.
+
+        The parser logic and helper functions are based heavily on
+        work done in the ``rfc3986`` module.
+
+        :param str url: URL to parse into a :class:`.Url` namedtuple.
+
+        Partly backwards-compatible with :mod:`urllib.parse`.
+
+        Example:
+
+        .. code-block:: python
+
+            import urllib3
+
+            print( urllib3.util.parse_url('http://google.com/mail/'))
+            # Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+
+            print( urllib3.util.parse_url('google.com:80'))
+            # Url(scheme=None, host='google.com', port=80, path=None, ...)
+
+            print( urllib3.util.parse_url('/foo?bar'))
+            # Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+        """
+        if not url:
+            # Empty
+            return Url()
+
+        source_url = url
+        if not _SCHEME_RE.search(url):
+            url = "//" + url
+
+        scheme: str | None
+        authority: str | None
+        auth: str | None
+        host: str | None
+        port: str | None
+        port_int: int | None
+        path: str | None
+        query: str | None
+        fragment: str | None
+
+        try:
+            scheme, authority, path, query, fragment = _URI_RE.match(url).groups()  # type: ignore[union-attr]
+            normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES
+
+            if scheme:
+                scheme = scheme.lower()
+
+            if authority:
+                auth, _, host_port = authority.rpartition("@")
+                auth = auth or None
+                host, port = _HOST_PORT_RE.match(host_port).groups()  # type: ignore[union-attr]
+                if auth and normalize_uri:
+                    auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
+                if port == "":
+                    port = None
+            else:
+                auth, host, port = None, None, None
+
+            if port is not None:
+                port_int = int(port)
+                if not (0 <= port_int <= 65535):
+                    raise LocationParseError(url)
+            else:
+                port_int = None
+
+            host = _normalize_host(host, scheme)
+
+            if normalize_uri and path:
+                path = _remove_path_dot_segments(path)
+                path = _encode_invalid_chars(path, _PATH_CHARS)
+            if normalize_uri and query:
+                query = _encode_invalid_chars(query, _QUERY_CHARS)
+            if normalize_uri and fragment:
+                fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)
+
+        except (ValueError, AttributeError) as e:
+            raise LocationParseError(source_url) from e
+
+        # For the sake of backwards compatibility we put empty
+        # string values for path if there are any defined values
+        # beyond the path in the URL.
+        # TODO: Remove this when we break backwards compatibility.
+        if not path:
+            if query is not None or fragment is not None:
+                path = ""
+            else:
+                path = None
+
+        return Url(
+            scheme=scheme,
+            auth=auth,
+            host=host,
+            port=port_int,
+            path=path,
+            query=query,
+            fragment=fragment,
+        )
+
+
+__all__ = ("Url", "parse_url", "HTTPError", "LocationParseError", "LocationValueError")
--- a/comicapi/genericmetadata.py
+++ b/comicapi/genericmetadata.py
@ -31,6 +31,8 @@ from typing_extensions import NamedTuple, Required

 from comicapi import utils

+from ._url import Url, parse_url
+
 logger = logging.getLogger(__name__)


@ -133,7 +135,7 @@ class GenericMetadata:
    year: int | None = None
    language: str | None = None  # 2 letter iso code
    country: str | None = None
-    web_link: str | None = None
+    web_links: list[Url] = dataclasses.field(default_factory=list)
    format: str | None = None
    manga: str | None = None
    black_and_white: bool | None = None
@ -253,7 +255,7 @@ class GenericMetadata:
        assign("year", new_md.year)
        assign("language", new_md.language)
        assign("country", new_md.country)
-        assign("web_link", new_md.web_link)
+        assign("web_links", new_md.web_links)
        assign("format", new_md.format)
        assign("manga", new_md.manga)
        assign("black_and_white", new_md.black_and_white)
@ -487,7 +489,9 @@ md_test: GenericMetadata = GenericMetadata(
    alternate_count=7,
    imprint="craphound.com",
    notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]",
-    web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/",
+    web_links=[
+        parse_url("https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/")
+    ],
    format="Series",
    manga="No",
    black_and_white=None,
@ -551,3 +555,15 @@ md_test: GenericMetadata = GenericMetadata(
    last_mark=None,
    _cover_image=None,
 )
+
+
+__all__ = (
+    "Url",
+    "parse_url",
+    "PageType",
+    "ImageMetadata",
+    "Credit",
+    "ComicSeries",
+    "TagOrigin",
+    "GenericMetadata",
+)
--- a/comicapi/metadata/comicrack.py
+++ b/comicapi/metadata/comicrack.py
@ -57,7 +57,7 @@ class ComicRack(Metadata):
            "month",
            "year",
            "language",
-            "web_link",
+            "web_links",
            "format",
            "manga",
            "black_and_white",
@ -229,7 +229,7 @@ class ComicRack(Metadata):
        assign("Month", md.month)
        assign("Year", md.year)
        assign("LanguageISO", md.language)
-        assign("Web", md.web_link)
+        assign("Web", " ".join(u.url for u in md.web_links))
        assign("Format", md.format)
        assign("Manga", md.manga)
        assign("BlackAndWhite", "Yes" if md.black_and_white else None)
@ -313,7 +313,7 @@ class ComicRack(Metadata):
        md.month = utils.xlate_int(get("Month"))
        md.year = utils.xlate_int(get("Year"))
        md.language = utils.xlate(get("LanguageISO"))
-        md.web_link = utils.xlate(get("Web"))
+        md.web_links = utils.split_urls(utils.xlate(get("Web")))
        md.format = utils.xlate(get("Format"))
        md.manga = utils.xlate(get("Manga"))
        md.maturity_rating = utils.xlate(get("AgeRating"))
--- a/comicapi/utils.py
+++ b/comicapi/utils.py
@ -33,6 +33,8 @@ from comicfn2dict import comicfn2dict
 import comicapi.data
 from comicapi import filenamelexer, filenameparser

+from ._url import Url, parse_url
+
 try:
    import icu

@ -370,6 +372,24 @@ def split(s: str | None, c: str) -> list[str]:
    return []


+def split_urls(s: str | None) -> list[Url]:
+    if s is None:
+        return []
+    # Find occurences of ' http'
+    if s.count("http") > 1 and s.count(" http") >= 1:
+        urls = []
+        # Split urls out
+        url_strings = split(s, " http")
+        # Return the scheme 'http' and parse the url
+        for i, url_string in enumerate(url_strings):
+            if not url_string.startswith("http"):
+                url_string = "http" + url_string
+            urls.append(parse_url(url_string))
+        return urls
+    else:
+        return [parse_url(s)]
+
+
 def remove_articles(text: str) -> str:
    text = text.casefold()
    articles = [
--- a/comictaggerlib/cbltransformer.py
+++ b/comictaggerlib/cbltransformer.py
@ -78,12 +78,13 @@ class CBLTransformer:
                    self.metadata.description += self.metadata.notes

        if self.config.Comic_Book_Lover__copy_weblink_to_comments:
-            if self.metadata.web_link is not None:
-                if self.metadata.description is None:
-                    self.metadata.description = ""
+            for web_link in self.metadata.web_links:
+                temp_desc = self.metadata.description
+                if temp_desc is None:
+                    temp_desc = ""
                else:
-                    self.metadata.description += "\n\n"
-                if self.metadata.web_link not in self.metadata.description:
-                    self.metadata.description += self.metadata.web_link
+                    temp_desc += "\n\n"
+                if web_link.url and web_link.url not in temp_desc:
+                    self.metadata.description = temp_desc + web_link.url

        return self.metadata
--- a/comictaggerlib/filerenamer.py
+++ b/comictaggerlib/filerenamer.py
@ -69,7 +69,7 @@ class MetadataFormatter(string.Formatter):
        if conversion == "t":
            return str(value).title()
        if conversion == "j":
-            return ", ".join(list(value))
+            return ", ".join(list(str(v) for v in value))
        return cast(str, super().convert_field(value, conversion))

    def handle_replacements(self, string: str, replacements: list[Replacement]) -> str:
@ -218,6 +218,10 @@ class FileRenamer:

        fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements)
        md_dict = vars(md)
+        md_dict["web_link"] = ""
+        if md.web_links:
+            md_dict["web_link"] = md.web_links[0]
+
        md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
        for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
            md_dict[role] = md.get_primary_credit(role)
--- a/comictaggerlib/taggerwindow.py
+++ b/comictaggerlib/taggerwindow.py
@ -27,11 +27,12 @@ import sys
 import webbrowser
 from datetime import datetime
 from typing import Any, Callable
-from urllib.parse import urlparse

 import natsort
 import settngs
+import urllib3.util
 from PyQt5 import QtCore, QtGui, QtNetwork, QtWidgets, uic
+from urllib3.util.url import LocationParseError

 import comictaggerlib.ui
 from comicapi import utils
@ -112,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
            "alternate_count": self.leAltIssueCount,
            "imprint": self.leImprint,
            "notes": self.teNotes,
-            "web_link": self.leWebLink,
+            "web_links": (self.leWebLink, self.btnOpenWebLink, self.btnAddWebLink, self.btnRemoveWebLink),
            "format": self.cbFormat,
            "manga": self.cbManga,
            "black_and_white": self.cbBW,
@ -124,7 +125,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
            "characters": self.teCharacters,
            "teams": self.teTeams,
            "locations": self.teLocations,
-            "credits": [self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit],
+            "credits": (self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit),
            "credits.person": 2,
            "credits.role": 1,
            "credits.primary": 0,
@ -532,6 +533,31 @@ class TaggerWindow(QtWidgets.QMainWindow):
        self.toolBar.addAction(self.actionPageBrowser)
        self.toolBar.addAction(self.actionAutoImprint)

+        self.leWebLink.addAction(self.actionAddWebLink)
+        self.leWebLink.addAction(self.actionRemoveWebLink)
+
+        self.actionAddWebLink.triggered.connect(self.add_weblink_item)
+        self.actionRemoveWebLink.triggered.connect(self.remove_weblink_item)
+
+    def add_weblink_item(self, url: str = "") -> None:
+        item = ""
+        if isinstance(url, str):
+            item = url
+        self.leWebLink.addItem(item)
+        self.leWebLink.item(self.leWebLink.count() - 1).setFlags(
+            QtCore.Qt.ItemFlag.ItemIsEditable
+            | QtCore.Qt.ItemFlag.ItemIsEnabled
+            | QtCore.Qt.ItemFlag.ItemIsDragEnabled
+            | QtCore.Qt.ItemFlag.ItemIsSelectable
+        )
+        self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True)
+        if not url:
+            self.leWebLink.editItem(self.leWebLink.item(self.leWebLink.count() - 1))
+
+    def remove_weblink_item(self) -> None:
+        item = self.leWebLink.takeItem(self.leWebLink.currentRow())
+        del item
+
    def repackage_archive(self) -> None:
        ca_list = self.fileSelectionList.get_selected_archive_list()
        non_zip_count = 0
@ -784,6 +810,8 @@ class TaggerWindow(QtWidgets.QMainWindow):
            widget.currentIndexChanged.connect(self.set_dirty_flag)
        if isinstance(widget, QtWidgets.QCheckBox):
            widget.stateChanged.connect(self.set_dirty_flag)
+        if isinstance(widget, QtWidgets.QListWidget):
+            widget.itemChanged.connect(self.set_dirty_flag)

        # recursive call on children
        for child in widget.children():
@ -844,7 +872,9 @@ class TaggerWindow(QtWidgets.QMainWindow):
        assign_text(self.leAltSeries, md.alternate_series)
        assign_text(self.leAltIssueNum, md.alternate_number)
        assign_text(self.leAltIssueCount, md.alternate_count)
-        assign_text(self.leWebLink, md.web_link)
+        self.leWebLink.clear()
+        for u in md.web_links:
+            self.add_weblink_item(u.url)
        assign_text(self.teCharacters, "\n".join(md.characters))
        assign_text(self.teTeams, "\n".join(md.teams))
        assign_text(self.teLocations, "\n".join(md.locations))
@ -967,7 +997,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
        md.scan_info = utils.xlate(self.leScanInfo.text())
        md.series_groups = utils.split(self.leSeriesGroup.text(), ",")
        md.alternate_series = self.leAltSeries.text()
-        md.web_link = utils.xlate(self.leWebLink.text())
+        md.web_links = [urllib3.util.parse_url(self.leWebLink.item(i).text()) for i in range(self.leWebLink.count())]
        md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n"))
        md.teams = set(utils.split(self.teTeams.toPlainText(), "\n"))
        md.locations = set(utils.split(self.teLocations.toPlainText(), "\n"))
@ -1343,14 +1373,17 @@ class TaggerWindow(QtWidgets.QMainWindow):
        self.set_dirty_flag()

    def open_web_link(self) -> None:
-        if self.leWebLink is not None:
-            web_link = self.leWebLink.text().strip()
-            try:
-                result = urlparse(web_link)
-                all([result.scheme in ["http", "https"], result.netloc])
-                webbrowser.open_new_tab(web_link)
-            except ValueError:
-                QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid."))
+        row = self.leWebLink.currentRow()
+        if row < 0:
+            if self.leWebLink.count() < 1:
+                return
+            row = 0
+        web_link = self.leWebLink.item(row).text()
+        try:
+            urllib3.util.parse_url(web_link)
+            webbrowser.open_new_tab(web_link)
+        except LocationParseError:
+            QtWidgets.QMessageBox.warning(self, "Web Link", "Web Link is invalid.")

    def show_settings(self) -> None:
        settingswin = SettingsWindow(self, self.config, self.talkers)
--- a/comictaggerlib/ui/qtutils.py
+++ b/comictaggerlib/ui/qtutils.py
@ -6,6 +6,7 @@ import io
 import logging
 import traceback
 import webbrowser
+from collections.abc import Sequence

 from PyQt5.QtCore import QUrl
 from PyQt5.QtWidgets import QWidget
@ -155,7 +156,7 @@ if qt_available:
    active_palette = None

    def enable_widget(widget: QtWidgets.QWidget | list[QtWidgets.QWidget], enable: bool) -> None:
-        if isinstance(widget, list):
+        if isinstance(widget, Sequence):
            for w in widget:
                _enable_widget(w, enable)
        else:
@ -214,6 +215,8 @@ if qt_available:
                widget.setReadOnly(True)
                widget.setPalette(inactive_palette[0])
            elif isinstance(widget, QtWidgets.QListWidget):
+                inactive_palette = palettes()
+                widget.setPalette(inactive_palette[0])
                widget.setMovement(QtWidgets.QListWidget.Static)

    def replaceWidget(
--- a/comictaggerlib/ui/taggerwindow.ui
+++ b/comictaggerlib/ui/taggerwindow.ui
@ -958,10 +958,32 @@
                </item>
                <item row="2" column="1">
                 <layout class="QGridLayout" name="gridLayout_7">
-                  <item row="0" column="0">
-                   <widget class="QLineEdit" name="leWebLink">
-                    <property name="acceptDrops">
-                     <bool>false</bool>
+                  <item row="0" column="0" rowspan="3">
+                   <widget class="QListWidget" name="leWebLink">
+                    <property name="contextMenuPolicy">
+                     <enum>Qt::ActionsContextMenu</enum>
+                    </property>
+                    <property name="dragEnabled">
+                     <bool>true</bool>
+                    </property>
+                    <property name="dragDropMode">
+                     <enum>QAbstractItemView::DropOnly</enum>
+                    </property>
+                    <property name="defaultDropAction">
+                     <enum>Qt::MoveAction</enum>
+                    </property>
+                    <property name="alternatingRowColors">
+                     <bool>true</bool>
+                    </property>
+                    <property name="selectionMode">
+                     <enum>QAbstractItemView::SingleSelection</enum>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="2" column="1">
+                   <widget class="QPushButton" name="btnRemoveWebLink">
+                    <property name="text">
+                     <string>Delete Item</string>
                    </property>
                   </widget>
                  </item>
@ -981,6 +1003,13 @@
                    </property>
                   </widget>
                  </item>
+                  <item row="1" column="1">
+                   <widget class="QPushButton" name="btnAddWebLink">
+                    <property name="text">
+                     <string>Add Item</string>
+                    </property>
+                   </widget>
+                  </item>
                 </layout>
                </item>
                <item row="4" column="0">
@ -1187,7 +1216,7 @@
     <x>0</x>
     <y>0</y>
     <width>1096</width>
-     <height>28</height>
+     <height>30</height>
    </rect>
   </property>
   <widget class="QMenu" name="menuComicTagger">
@ -1477,6 +1506,16 @@
    <string>Open Folder as Comic</string>
   </property>
  </action>
+  <action name="actionAddWebLink">
+   <property name="text">
+    <string>Add Item</string>
+   </property>
+  </action>
+  <action name="actionRemoveWebLink">
+   <property name="text">
+    <string>Remove Web Link</string>
+   </property>
+  </action>
 </widget>
 <layoutdefault spacing="6" margin="11"/>
 <customwidgets>
@ -1487,5 +1526,38 @@
  </customwidget>
 </customwidgets>
 <resources/>
- <connections/>
+ <connections>
+  <connection>
+   <sender>btnAddWebLink</sender>
+   <signal>clicked()</signal>
+   <receiver>actionAddWebLink</receiver>
+   <slot>trigger()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>900</x>
+     <y>536</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>-1</x>
+     <y>-1</y>
+    </hint>
+   </hints>
+  </connection>
+  <connection>
+   <sender>btnRemoveWebLink</sender>
+   <signal>clicked()</signal>
+   <receiver>actionRemoveWebLink</receiver>
+   <slot>trigger()</slot>
+   <hints>
+    <hint type="sourcelabel">
+     <x>900</x>
+     <y>576</y>
+    </hint>
+    <hint type="destinationlabel">
+     <x>-1</x>
+     <y>-1</y>
+    </hint>
+   </hints>
+  </connection>
+ </connections>
 </ui>
--- a/comictalker/talkers/comicvine.py
+++ b/comictalker/talkers/comicvine.py
@ -29,6 +29,8 @@ import requests
 import settngs
 from pyrate_limiter import Limiter, RequestRate
 from typing_extensions import Required, TypedDict
+from urllib3.exceptions import LocationParseError
+from urllib3.util import parse_url

 from comicapi import utils
 from comicapi.genericmetadata import ComicSeries, GenericMetadata, TagOrigin
@ -643,10 +645,15 @@ class ComicVineTalker(ComicTalker):
            format=utils.xlate(series.format),
            volume_count=utils.xlate_int(series.count_of_volumes),
            title=utils.xlate(issue.get("name")),
-            web_link=utils.xlate(issue.get("site_detail_url")),
            series=utils.xlate(series.name),
            series_aliases=series.aliases,
        )
+        url = utils.xlate(issue.get("site_detail_url"))
+        if url:
+            try:
+                md.web_links = [parse_url(url)]
+            except LocationParseError:
+                ...
        if issue.get("image") is None:
            md._cover_image = ""
        else:
--- a/testing/comicvine.py
+++ b/testing/comicvine.py
@ -185,7 +185,7 @@ comic_issue_result = comicapi.genericmetadata.GenericMetadata(
    issue=cv_issue_result["results"]["issue_number"],
    volume=None,
    title=cv_issue_result["results"]["name"],
-    web_link=cv_issue_result["results"]["site_detail_url"],
+    web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
 )

 cv_md = comicapi.genericmetadata.GenericMetadata(
@ -213,7 +213,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
    alternate_count=None,
    imprint=None,
    notes=None,
-    web_link=cv_issue_result["results"]["site_detail_url"],
+    web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
    format=None,
    manga=None,
    black_and_white=None,
--- a/testing/filenames.py
+++ b/testing/filenames.py
@ -1152,6 +1152,13 @@ rnames = [
        "Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
        does_not_raise(),
    ),
+    (
+        "{title} {web_links!j}",  # Test that join forces str conversion
+        False,
+        "Linux",
+        "Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
+        does_not_raise(),
+    ),
    (
        "{series}:{title} #{issue} ({year})",  # on windows the ':' is replaced
        False,