Merge branch 'web-links' into develop
This commit is contained in:
commit
3df263858d
468
comicapi/_url.py
Normal file
468
comicapi/_url.py
Normal file
@ -0,0 +1,468 @@
|
||||
# mypy: disable-error-code="no-redef"
|
||||
from __future__ import annotations
|
||||
|
||||
try:
|
||||
from urllib3.exceptions import HTTPError, LocationParseError, LocationValueError
|
||||
from urllib3.util import Url, parse_url
|
||||
except ImportError:
|
||||
|
||||
import re
|
||||
import typing
|
||||
|
||||
class HTTPError(Exception):
|
||||
"""Base exception used by this module."""
|
||||
|
||||
class LocationValueError(ValueError, HTTPError):
|
||||
"""Raised when there is something wrong with a given URL input."""
|
||||
|
||||
class LocationParseError(LocationValueError):
|
||||
"""Raised when get_host or similar fails to parse the URL input."""
|
||||
|
||||
def __init__(self, location: str) -> None:
|
||||
message = f"Failed to parse: {location}"
|
||||
super().__init__(message)
|
||||
|
||||
self.location = location
|
||||
|
||||
def to_str(x: str | bytes, encoding: str | None = None, errors: str | None = None) -> str:
|
||||
if isinstance(x, str):
|
||||
return x
|
||||
elif not isinstance(x, bytes):
|
||||
raise TypeError(f"not expecting type {type(x).__name__}")
|
||||
if encoding or errors:
|
||||
return x.decode(encoding or "utf-8", errors=errors or "strict")
|
||||
return x.decode()
|
||||
|
||||
# We only want to normalize urls with an HTTP(S) scheme.
|
||||
# urllib3 infers URLs without a scheme (None) to be http.
|
||||
_NORMALIZABLE_SCHEMES = ("http", "https", None)
|
||||
|
||||
# Almost all of these patterns were derived from the
|
||||
# 'rfc3986' module: https://github.com/python-hyper/rfc3986
|
||||
_PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
|
||||
_SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
|
||||
_URI_RE = re.compile(
|
||||
r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?" r"(?://([^\\/?#]*))?" r"([^?#]*)" r"(?:\?([^#]*))?" r"(?:#(.*))?$",
|
||||
re.UNICODE | re.DOTALL,
|
||||
)
|
||||
|
||||
_IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
|
||||
_HEX_PAT = "[0-9A-Fa-f]{1,4}"
|
||||
_LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
|
||||
_subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
|
||||
_variations = [
|
||||
# 6( h16 ":" ) ls32
|
||||
"(?:%(hex)s:){6}%(ls32)s",
|
||||
# "::" 5( h16 ":" ) ls32
|
||||
"::(?:%(hex)s:){5}%(ls32)s",
|
||||
# [ h16 ] "::" 4( h16 ":" ) ls32
|
||||
"(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
|
||||
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
|
||||
"(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
|
||||
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
|
||||
"(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
|
||||
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
|
||||
"(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
|
||||
# [ *4( h16 ":" ) h16 ] "::" ls32
|
||||
"(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
|
||||
# [ *5( h16 ":" ) h16 ] "::" h16
|
||||
"(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
|
||||
# [ *6( h16 ":" ) h16 ] "::"
|
||||
"(?:(?:%(hex)s:){0,6}%(hex)s)?::",
|
||||
]
|
||||
|
||||
_UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
|
||||
_IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
|
||||
_ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
|
||||
_IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
|
||||
_REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
|
||||
_TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
|
||||
|
||||
_IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
|
||||
_IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
|
||||
_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
|
||||
_BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
|
||||
_ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")
|
||||
|
||||
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
|
||||
_REG_NAME_PAT,
|
||||
_IPV4_PAT,
|
||||
_IPV6_ADDRZ_PAT,
|
||||
)
|
||||
_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
|
||||
|
||||
_UNRESERVED_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~")
|
||||
_SUB_DELIM_CHARS = set("!$&'()*+,;=")
|
||||
_USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
|
||||
_PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
|
||||
_QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}
|
||||
|
||||
class Url(
|
||||
typing.NamedTuple(
|
||||
"Url",
|
||||
[
|
||||
("scheme", typing.Optional[str]),
|
||||
("auth", typing.Optional[str]),
|
||||
("host", typing.Optional[str]),
|
||||
("port", typing.Optional[int]),
|
||||
("path", typing.Optional[str]),
|
||||
("query", typing.Optional[str]),
|
||||
("fragment", typing.Optional[str]),
|
||||
],
|
||||
)
|
||||
):
|
||||
"""
|
||||
Data structure for representing an HTTP URL. Used as a return value for
|
||||
:func:`parse_url`. Both the scheme and host are normalized as they are
|
||||
both case-insensitive according to RFC 3986.
|
||||
"""
|
||||
|
||||
def __new__( # type: ignore[no-untyped-def]
|
||||
cls,
|
||||
scheme: str | None = None,
|
||||
auth: str | None = None,
|
||||
host: str | None = None,
|
||||
port: int | None = None,
|
||||
path: str | None = None,
|
||||
query: str | None = None,
|
||||
fragment: str | None = None,
|
||||
):
|
||||
if path and not path.startswith("/"):
|
||||
path = "/" + path
|
||||
if scheme is not None:
|
||||
scheme = scheme.lower()
|
||||
return super().__new__(cls, scheme, auth, host, port, path, query, fragment)
|
||||
|
||||
@property
|
||||
def hostname(self) -> str | None:
|
||||
"""For backwards-compatibility with urlparse. We're nice like that."""
|
||||
return self.host
|
||||
|
||||
@property
|
||||
def request_uri(self) -> str:
|
||||
"""Absolute path including the query string."""
|
||||
uri = self.path or "/"
|
||||
|
||||
if self.query is not None:
|
||||
uri += "?" + self.query
|
||||
|
||||
return uri
|
||||
|
||||
@property
|
||||
def authority(self) -> str | None:
|
||||
"""
|
||||
Authority component as defined in RFC 3986 3.2.
|
||||
This includes userinfo (auth), host and port.
|
||||
|
||||
i.e.
|
||||
userinfo@host:port
|
||||
"""
|
||||
userinfo = self.auth
|
||||
netloc = self.netloc
|
||||
if netloc is None or userinfo is None:
|
||||
return netloc
|
||||
else:
|
||||
return f"{userinfo}@{netloc}"
|
||||
|
||||
@property
|
||||
def netloc(self) -> str | None:
|
||||
"""
|
||||
Network location including host and port.
|
||||
|
||||
If you need the equivalent of urllib.parse's ``netloc``,
|
||||
use the ``authority`` property instead.
|
||||
"""
|
||||
if self.host is None:
|
||||
return None
|
||||
if self.port:
|
||||
return f"{self.host}:{self.port}"
|
||||
return self.host
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
"""
|
||||
Convert self into a url
|
||||
|
||||
This function should more or less round-trip with :func:`.parse_url`. The
|
||||
returned url may not be exactly the same as the url inputted to
|
||||
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
|
||||
with a blank port will have : removed).
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import urllib3
|
||||
|
||||
U = urllib3.util.parse_url("https://google.com/mail/")
|
||||
|
||||
print(U.url)
|
||||
# "https://google.com/mail/"
|
||||
|
||||
print( urllib3.util.Url("https", "username:password",
|
||||
"host.com", 80, "/path", "query", "fragment"
|
||||
).url
|
||||
)
|
||||
# "https://username:password@host.com:80/path?query#fragment"
|
||||
"""
|
||||
scheme, auth, host, port, path, query, fragment = self
|
||||
url = ""
|
||||
|
||||
# We use "is not None" we want things to happen with empty strings (or 0 port)
|
||||
if scheme is not None:
|
||||
url += scheme + "://"
|
||||
if auth is not None:
|
||||
url += auth + "@"
|
||||
if host is not None:
|
||||
url += host
|
||||
if port is not None:
|
||||
url += ":" + str(port)
|
||||
if path is not None:
|
||||
url += path
|
||||
if query is not None:
|
||||
url += "?" + query
|
||||
if fragment is not None:
|
||||
url += "#" + fragment
|
||||
|
||||
return url
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.url
|
||||
|
||||
@typing.overload
|
||||
def _encode_invalid_chars(component: str, allowed_chars: typing.Container[str]) -> str: # Abstract
|
||||
...
|
||||
|
||||
@typing.overload
|
||||
def _encode_invalid_chars(component: None, allowed_chars: typing.Container[str]) -> None: # Abstract
|
||||
...
|
||||
|
||||
def _encode_invalid_chars(component: str | None, allowed_chars: typing.Container[str]) -> str | None:
|
||||
"""Percent-encodes a URI component without reapplying
|
||||
onto an already percent-encoded component.
|
||||
"""
|
||||
if component is None:
|
||||
return component
|
||||
|
||||
component = to_str(component)
|
||||
|
||||
# Normalize existing percent-encoded bytes.
|
||||
# Try to see if the component we're encoding is already percent-encoded
|
||||
# so we can skip all '%' characters but still encode all others.
|
||||
component, percent_encodings = _PERCENT_RE.subn(lambda match: match.group(0).upper(), component)
|
||||
|
||||
uri_bytes = component.encode("utf-8", "surrogatepass")
|
||||
is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
|
||||
encoded_component = bytearray()
|
||||
|
||||
for i in range(0, len(uri_bytes)):
|
||||
# Will return a single character bytestring
|
||||
byte = uri_bytes[i : i + 1]
|
||||
byte_ord = ord(byte)
|
||||
if (is_percent_encoded and byte == b"%") or (byte_ord < 128 and byte.decode() in allowed_chars):
|
||||
encoded_component += byte
|
||||
continue
|
||||
encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
|
||||
|
||||
return encoded_component.decode()
|
||||
|
||||
def _remove_path_dot_segments(path: str) -> str:
|
||||
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
|
||||
segments = path.split("/") # Turn the path into a list of segments
|
||||
output = [] # Initialize the variable to use to store output
|
||||
|
||||
for segment in segments:
|
||||
# '.' is the current directory, so ignore it, it is superfluous
|
||||
if segment == ".":
|
||||
continue
|
||||
# Anything other than '..', should be appended to the output
|
||||
if segment != "..":
|
||||
output.append(segment)
|
||||
# In this case segment == '..', if we can, we should pop the last
|
||||
# element
|
||||
elif output:
|
||||
output.pop()
|
||||
|
||||
# If the path starts with '/' and the output is empty or the first string
|
||||
# is non-empty
|
||||
if path.startswith("/") and (not output or output[0]):
|
||||
output.insert(0, "")
|
||||
|
||||
# If the path starts with '/.' or '/..' ensure we add one more empty
|
||||
# string to add a trailing '/'
|
||||
if path.endswith(("/.", "/..")):
|
||||
output.append("")
|
||||
|
||||
return "/".join(output)
|
||||
|
||||
@typing.overload
|
||||
def _normalize_host(host: None, scheme: str | None) -> None: ...
|
||||
|
||||
@typing.overload
|
||||
def _normalize_host(host: str, scheme: str | None) -> str: ...
|
||||
|
||||
def _normalize_host(host: str | None, scheme: str | None) -> str | None:
|
||||
if host:
|
||||
if scheme in _NORMALIZABLE_SCHEMES:
|
||||
is_ipv6 = _IPV6_ADDRZ_RE.match(host)
|
||||
if is_ipv6:
|
||||
# IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
|
||||
# such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
|
||||
# separator as necessary to return a valid RFC 4007 scoped IP.
|
||||
match = _ZONE_ID_RE.search(host)
|
||||
if match:
|
||||
start, end = match.span(1)
|
||||
zone_id = host[start:end]
|
||||
|
||||
if zone_id.startswith("%25") and zone_id != "%25":
|
||||
zone_id = zone_id[3:]
|
||||
else:
|
||||
zone_id = zone_id[1:]
|
||||
zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
|
||||
return f"{host[:start].lower()}%{zone_id}{host[end:]}"
|
||||
else:
|
||||
return host.lower()
|
||||
elif not _IPV4_RE.match(host):
|
||||
return to_str(
|
||||
b".".join([_idna_encode(label) for label in host.split(".")]),
|
||||
"ascii",
|
||||
)
|
||||
return host
|
||||
|
||||
def _idna_encode(name: str) -> bytes:
|
||||
if not name.isascii():
|
||||
try:
|
||||
import idna
|
||||
except ImportError:
|
||||
raise LocationParseError("Unable to parse URL without the 'idna' module") from None
|
||||
|
||||
try:
|
||||
return idna.encode(name.lower(), strict=True, std3_rules=True)
|
||||
except idna.IDNAError:
|
||||
raise LocationParseError(f"Name '{name}' is not a valid IDNA label") from None
|
||||
|
||||
return name.lower().encode("ascii")
|
||||
|
||||
def _encode_target(target: str) -> str:
|
||||
"""Percent-encodes a request target so that there are no invalid characters
|
||||
|
||||
Pre-condition for this function is that 'target' must start with '/'.
|
||||
If that is the case then _TARGET_RE will always produce a match.
|
||||
"""
|
||||
match = _TARGET_RE.match(target)
|
||||
if not match: # Defensive:
|
||||
raise LocationParseError(f"{target!r} is not a valid request URI")
|
||||
|
||||
path, query = match.groups()
|
||||
encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
|
||||
if query is not None:
|
||||
query = _encode_invalid_chars(query, _QUERY_CHARS)
|
||||
encoded_target += "?" + query
|
||||
return encoded_target
|
||||
|
||||
def parse_url(url: str) -> Url:
|
||||
"""
|
||||
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
|
||||
performed to parse incomplete urls. Fields not provided will be None.
|
||||
This parser is RFC 3986 and RFC 6874 compliant.
|
||||
|
||||
The parser logic and helper functions are based heavily on
|
||||
work done in the ``rfc3986`` module.
|
||||
|
||||
:param str url: URL to parse into a :class:`.Url` namedtuple.
|
||||
|
||||
Partly backwards-compatible with :mod:`urllib.parse`.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import urllib3
|
||||
|
||||
print( urllib3.util.parse_url('http://google.com/mail/'))
|
||||
# Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
|
||||
|
||||
print( urllib3.util.parse_url('google.com:80'))
|
||||
# Url(scheme=None, host='google.com', port=80, path=None, ...)
|
||||
|
||||
print( urllib3.util.parse_url('/foo?bar'))
|
||||
# Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
|
||||
"""
|
||||
if not url:
|
||||
# Empty
|
||||
return Url()
|
||||
|
||||
source_url = url
|
||||
if not _SCHEME_RE.search(url):
|
||||
url = "//" + url
|
||||
|
||||
scheme: str | None
|
||||
authority: str | None
|
||||
auth: str | None
|
||||
host: str | None
|
||||
port: str | None
|
||||
port_int: int | None
|
||||
path: str | None
|
||||
query: str | None
|
||||
fragment: str | None
|
||||
|
||||
try:
|
||||
scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr]
|
||||
normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES
|
||||
|
||||
if scheme:
|
||||
scheme = scheme.lower()
|
||||
|
||||
if authority:
|
||||
auth, _, host_port = authority.rpartition("@")
|
||||
auth = auth or None
|
||||
host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr]
|
||||
if auth and normalize_uri:
|
||||
auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
|
||||
if port == "":
|
||||
port = None
|
||||
else:
|
||||
auth, host, port = None, None, None
|
||||
|
||||
if port is not None:
|
||||
port_int = int(port)
|
||||
if not (0 <= port_int <= 65535):
|
||||
raise LocationParseError(url)
|
||||
else:
|
||||
port_int = None
|
||||
|
||||
host = _normalize_host(host, scheme)
|
||||
|
||||
if normalize_uri and path:
|
||||
path = _remove_path_dot_segments(path)
|
||||
path = _encode_invalid_chars(path, _PATH_CHARS)
|
||||
if normalize_uri and query:
|
||||
query = _encode_invalid_chars(query, _QUERY_CHARS)
|
||||
if normalize_uri and fragment:
|
||||
fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)
|
||||
|
||||
except (ValueError, AttributeError) as e:
|
||||
raise LocationParseError(source_url) from e
|
||||
|
||||
# For the sake of backwards compatibility we put empty
|
||||
# string values for path if there are any defined values
|
||||
# beyond the path in the URL.
|
||||
# TODO: Remove this when we break backwards compatibility.
|
||||
if not path:
|
||||
if query is not None or fragment is not None:
|
||||
path = ""
|
||||
else:
|
||||
path = None
|
||||
|
||||
return Url(
|
||||
scheme=scheme,
|
||||
auth=auth,
|
||||
host=host,
|
||||
port=port_int,
|
||||
path=path,
|
||||
query=query,
|
||||
fragment=fragment,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ("Url", "parse_url", "HTTPError", "LocationParseError", "LocationValueError")
|
@ -31,6 +31,8 @@ from typing_extensions import NamedTuple, Required
|
||||
|
||||
from comicapi import utils
|
||||
|
||||
from ._url import Url, parse_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -133,7 +135,7 @@ class GenericMetadata:
|
||||
year: int | None = None
|
||||
language: str | None = None # 2 letter iso code
|
||||
country: str | None = None
|
||||
web_link: str | None = None
|
||||
web_links: list[Url] = dataclasses.field(default_factory=list)
|
||||
format: str | None = None
|
||||
manga: str | None = None
|
||||
black_and_white: bool | None = None
|
||||
@ -253,7 +255,7 @@ class GenericMetadata:
|
||||
assign("year", new_md.year)
|
||||
assign("language", new_md.language)
|
||||
assign("country", new_md.country)
|
||||
assign("web_link", new_md.web_link)
|
||||
assign("web_links", new_md.web_links)
|
||||
assign("format", new_md.format)
|
||||
assign("manga", new_md.manga)
|
||||
assign("black_and_white", new_md.black_and_white)
|
||||
@ -487,7 +489,9 @@ md_test: GenericMetadata = GenericMetadata(
|
||||
alternate_count=7,
|
||||
imprint="craphound.com",
|
||||
notes="Tagged with ComicTagger 1.3.2a5 using info from Comic Vine on 2022-04-16 15:52:26. [Issue ID 140529]",
|
||||
web_link="https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/",
|
||||
web_links=[
|
||||
parse_url("https://comicvine.gamespot.com/cory-doctorows-futuristic-tales-of-the-here-and-no/4000-140529/")
|
||||
],
|
||||
format="Series",
|
||||
manga="No",
|
||||
black_and_white=None,
|
||||
@ -551,3 +555,15 @@ md_test: GenericMetadata = GenericMetadata(
|
||||
last_mark=None,
|
||||
_cover_image=None,
|
||||
)
|
||||
|
||||
|
||||
__all__ = (
|
||||
"Url",
|
||||
"parse_url",
|
||||
"PageType",
|
||||
"ImageMetadata",
|
||||
"Credit",
|
||||
"ComicSeries",
|
||||
"TagOrigin",
|
||||
"GenericMetadata",
|
||||
)
|
||||
|
@ -57,7 +57,7 @@ class ComicRack(Metadata):
|
||||
"month",
|
||||
"year",
|
||||
"language",
|
||||
"web_link",
|
||||
"web_links",
|
||||
"format",
|
||||
"manga",
|
||||
"black_and_white",
|
||||
@ -229,7 +229,7 @@ class ComicRack(Metadata):
|
||||
assign("Month", md.month)
|
||||
assign("Year", md.year)
|
||||
assign("LanguageISO", md.language)
|
||||
assign("Web", md.web_link)
|
||||
assign("Web", " ".join(u.url for u in md.web_links))
|
||||
assign("Format", md.format)
|
||||
assign("Manga", md.manga)
|
||||
assign("BlackAndWhite", "Yes" if md.black_and_white else None)
|
||||
@ -313,7 +313,7 @@ class ComicRack(Metadata):
|
||||
md.month = utils.xlate_int(get("Month"))
|
||||
md.year = utils.xlate_int(get("Year"))
|
||||
md.language = utils.xlate(get("LanguageISO"))
|
||||
md.web_link = utils.xlate(get("Web"))
|
||||
md.web_links = utils.split_urls(utils.xlate(get("Web")))
|
||||
md.format = utils.xlate(get("Format"))
|
||||
md.manga = utils.xlate(get("Manga"))
|
||||
md.maturity_rating = utils.xlate(get("AgeRating"))
|
||||
|
@ -33,6 +33,8 @@ from comicfn2dict import comicfn2dict
|
||||
import comicapi.data
|
||||
from comicapi import filenamelexer, filenameparser
|
||||
|
||||
from ._url import Url, parse_url
|
||||
|
||||
try:
|
||||
import icu
|
||||
|
||||
@ -370,6 +372,24 @@ def split(s: str | None, c: str) -> list[str]:
|
||||
return []
|
||||
|
||||
|
||||
def split_urls(s: str | None) -> list[Url]:
|
||||
if s is None:
|
||||
return []
|
||||
# Find occurences of ' http'
|
||||
if s.count("http") > 1 and s.count(" http") >= 1:
|
||||
urls = []
|
||||
# Split urls out
|
||||
url_strings = split(s, " http")
|
||||
# Return the scheme 'http' and parse the url
|
||||
for i, url_string in enumerate(url_strings):
|
||||
if not url_string.startswith("http"):
|
||||
url_string = "http" + url_string
|
||||
urls.append(parse_url(url_string))
|
||||
return urls
|
||||
else:
|
||||
return [parse_url(s)]
|
||||
|
||||
|
||||
def remove_articles(text: str) -> str:
|
||||
text = text.casefold()
|
||||
articles = [
|
||||
|
@ -78,12 +78,13 @@ class CBLTransformer:
|
||||
self.metadata.description += self.metadata.notes
|
||||
|
||||
if self.config.Comic_Book_Lover__copy_weblink_to_comments:
|
||||
if self.metadata.web_link is not None:
|
||||
if self.metadata.description is None:
|
||||
self.metadata.description = ""
|
||||
for web_link in self.metadata.web_links:
|
||||
temp_desc = self.metadata.description
|
||||
if temp_desc is None:
|
||||
temp_desc = ""
|
||||
else:
|
||||
self.metadata.description += "\n\n"
|
||||
if self.metadata.web_link not in self.metadata.description:
|
||||
self.metadata.description += self.metadata.web_link
|
||||
temp_desc += "\n\n"
|
||||
if web_link.url and web_link.url not in temp_desc:
|
||||
self.metadata.description = temp_desc + web_link.url
|
||||
|
||||
return self.metadata
|
||||
|
@ -69,7 +69,7 @@ class MetadataFormatter(string.Formatter):
|
||||
if conversion == "t":
|
||||
return str(value).title()
|
||||
if conversion == "j":
|
||||
return ", ".join(list(value))
|
||||
return ", ".join(list(str(v) for v in value))
|
||||
return cast(str, super().convert_field(value, conversion))
|
||||
|
||||
def handle_replacements(self, string: str, replacements: list[Replacement]) -> str:
|
||||
@ -218,6 +218,10 @@ class FileRenamer:
|
||||
|
||||
fmt = MetadataFormatter(self.smart_cleanup, platform=self.platform, replacements=self.replacements)
|
||||
md_dict = vars(md)
|
||||
md_dict["web_link"] = ""
|
||||
if md.web_links:
|
||||
md_dict["web_link"] = md.web_links[0]
|
||||
|
||||
md_dict["issue"] = IssueString(md.issue).as_string(pad=self.issue_zero_padding)
|
||||
for role in ["writer", "penciller", "inker", "colorist", "letterer", "cover artist", "editor"]:
|
||||
md_dict[role] = md.get_primary_credit(role)
|
||||
|
@ -27,11 +27,12 @@ import sys
|
||||
import webbrowser
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import natsort
|
||||
import settngs
|
||||
import urllib3.util
|
||||
from PyQt5 import QtCore, QtGui, QtNetwork, QtWidgets, uic
|
||||
from urllib3.util.url import LocationParseError
|
||||
|
||||
import comictaggerlib.ui
|
||||
from comicapi import utils
|
||||
@ -112,7 +113,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
"alternate_count": self.leAltIssueCount,
|
||||
"imprint": self.leImprint,
|
||||
"notes": self.teNotes,
|
||||
"web_link": self.leWebLink,
|
||||
"web_links": (self.leWebLink, self.btnOpenWebLink, self.btnAddWebLink, self.btnRemoveWebLink),
|
||||
"format": self.cbFormat,
|
||||
"manga": self.cbManga,
|
||||
"black_and_white": self.cbBW,
|
||||
@ -124,7 +125,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
"characters": self.teCharacters,
|
||||
"teams": self.teTeams,
|
||||
"locations": self.teLocations,
|
||||
"credits": [self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit],
|
||||
"credits": (self.twCredits, self.btnAddCredit, self.btnEditCredit, self.btnRemoveCredit),
|
||||
"credits.person": 2,
|
||||
"credits.role": 1,
|
||||
"credits.primary": 0,
|
||||
@ -532,6 +533,31 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
self.toolBar.addAction(self.actionPageBrowser)
|
||||
self.toolBar.addAction(self.actionAutoImprint)
|
||||
|
||||
self.leWebLink.addAction(self.actionAddWebLink)
|
||||
self.leWebLink.addAction(self.actionRemoveWebLink)
|
||||
|
||||
self.actionAddWebLink.triggered.connect(self.add_weblink_item)
|
||||
self.actionRemoveWebLink.triggered.connect(self.remove_weblink_item)
|
||||
|
||||
def add_weblink_item(self, url: str = "") -> None:
|
||||
item = ""
|
||||
if isinstance(url, str):
|
||||
item = url
|
||||
self.leWebLink.addItem(item)
|
||||
self.leWebLink.item(self.leWebLink.count() - 1).setFlags(
|
||||
QtCore.Qt.ItemFlag.ItemIsEditable
|
||||
| QtCore.Qt.ItemFlag.ItemIsEnabled
|
||||
| QtCore.Qt.ItemFlag.ItemIsDragEnabled
|
||||
| QtCore.Qt.ItemFlag.ItemIsSelectable
|
||||
)
|
||||
self.leWebLink.item(self.leWebLink.count() - 1).setSelected(True)
|
||||
if not url:
|
||||
self.leWebLink.editItem(self.leWebLink.item(self.leWebLink.count() - 1))
|
||||
|
||||
def remove_weblink_item(self) -> None:
|
||||
item = self.leWebLink.takeItem(self.leWebLink.currentRow())
|
||||
del item
|
||||
|
||||
def repackage_archive(self) -> None:
|
||||
ca_list = self.fileSelectionList.get_selected_archive_list()
|
||||
non_zip_count = 0
|
||||
@ -784,6 +810,8 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
widget.currentIndexChanged.connect(self.set_dirty_flag)
|
||||
if isinstance(widget, QtWidgets.QCheckBox):
|
||||
widget.stateChanged.connect(self.set_dirty_flag)
|
||||
if isinstance(widget, QtWidgets.QListWidget):
|
||||
widget.itemChanged.connect(self.set_dirty_flag)
|
||||
|
||||
# recursive call on children
|
||||
for child in widget.children():
|
||||
@ -844,7 +872,9 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
assign_text(self.leAltSeries, md.alternate_series)
|
||||
assign_text(self.leAltIssueNum, md.alternate_number)
|
||||
assign_text(self.leAltIssueCount, md.alternate_count)
|
||||
assign_text(self.leWebLink, md.web_link)
|
||||
self.leWebLink.clear()
|
||||
for u in md.web_links:
|
||||
self.add_weblink_item(u.url)
|
||||
assign_text(self.teCharacters, "\n".join(md.characters))
|
||||
assign_text(self.teTeams, "\n".join(md.teams))
|
||||
assign_text(self.teLocations, "\n".join(md.locations))
|
||||
@ -967,7 +997,7 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
md.scan_info = utils.xlate(self.leScanInfo.text())
|
||||
md.series_groups = utils.split(self.leSeriesGroup.text(), ",")
|
||||
md.alternate_series = self.leAltSeries.text()
|
||||
md.web_link = utils.xlate(self.leWebLink.text())
|
||||
md.web_links = [urllib3.util.parse_url(self.leWebLink.item(i).text()) for i in range(self.leWebLink.count())]
|
||||
md.characters = set(utils.split(self.teCharacters.toPlainText(), "\n"))
|
||||
md.teams = set(utils.split(self.teTeams.toPlainText(), "\n"))
|
||||
md.locations = set(utils.split(self.teLocations.toPlainText(), "\n"))
|
||||
@ -1343,14 +1373,17 @@ class TaggerWindow(QtWidgets.QMainWindow):
|
||||
self.set_dirty_flag()
|
||||
|
||||
def open_web_link(self) -> None:
|
||||
if self.leWebLink is not None:
|
||||
web_link = self.leWebLink.text().strip()
|
||||
try:
|
||||
result = urlparse(web_link)
|
||||
all([result.scheme in ["http", "https"], result.netloc])
|
||||
webbrowser.open_new_tab(web_link)
|
||||
except ValueError:
|
||||
QtWidgets.QMessageBox.warning(self, self.tr("Web Link"), self.tr("Web Link is invalid."))
|
||||
row = self.leWebLink.currentRow()
|
||||
if row < 0:
|
||||
if self.leWebLink.count() < 1:
|
||||
return
|
||||
row = 0
|
||||
web_link = self.leWebLink.item(row).text()
|
||||
try:
|
||||
urllib3.util.parse_url(web_link)
|
||||
webbrowser.open_new_tab(web_link)
|
||||
except LocationParseError:
|
||||
QtWidgets.QMessageBox.warning(self, "Web Link", "Web Link is invalid.")
|
||||
|
||||
def show_settings(self) -> None:
|
||||
settingswin = SettingsWindow(self, self.config, self.talkers)
|
||||
|
@ -6,6 +6,7 @@ import io
|
||||
import logging
|
||||
import traceback
|
||||
import webbrowser
|
||||
from collections.abc import Sequence
|
||||
|
||||
from PyQt5.QtCore import QUrl
|
||||
from PyQt5.QtWidgets import QWidget
|
||||
@ -155,7 +156,7 @@ if qt_available:
|
||||
active_palette = None
|
||||
|
||||
def enable_widget(widget: QtWidgets.QWidget | list[QtWidgets.QWidget], enable: bool) -> None:
|
||||
if isinstance(widget, list):
|
||||
if isinstance(widget, Sequence):
|
||||
for w in widget:
|
||||
_enable_widget(w, enable)
|
||||
else:
|
||||
@ -214,6 +215,8 @@ if qt_available:
|
||||
widget.setReadOnly(True)
|
||||
widget.setPalette(inactive_palette[0])
|
||||
elif isinstance(widget, QtWidgets.QListWidget):
|
||||
inactive_palette = palettes()
|
||||
widget.setPalette(inactive_palette[0])
|
||||
widget.setMovement(QtWidgets.QListWidget.Static)
|
||||
|
||||
def replaceWidget(
|
||||
|
@ -958,10 +958,32 @@
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<layout class="QGridLayout" name="gridLayout_7">
|
||||
<item row="0" column="0">
|
||||
<widget class="QLineEdit" name="leWebLink">
|
||||
<property name="acceptDrops">
|
||||
<bool>false</bool>
|
||||
<item row="0" column="0" rowspan="3">
|
||||
<widget class="QListWidget" name="leWebLink">
|
||||
<property name="contextMenuPolicy">
|
||||
<enum>Qt::ActionsContextMenu</enum>
|
||||
</property>
|
||||
<property name="dragEnabled">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="dragDropMode">
|
||||
<enum>QAbstractItemView::DropOnly</enum>
|
||||
</property>
|
||||
<property name="defaultDropAction">
|
||||
<enum>Qt::MoveAction</enum>
|
||||
</property>
|
||||
<property name="alternatingRowColors">
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="selectionMode">
|
||||
<enum>QAbstractItemView::SingleSelection</enum>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QPushButton" name="btnRemoveWebLink">
|
||||
<property name="text">
|
||||
<string>Delete Item</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
@ -981,6 +1003,13 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QPushButton" name="btnAddWebLink">
|
||||
<property name="text">
|
||||
<string>Add Item</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
@ -1187,7 +1216,7 @@
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>1096</width>
|
||||
<height>28</height>
|
||||
<height>30</height>
|
||||
</rect>
|
||||
</property>
|
||||
<widget class="QMenu" name="menuComicTagger">
|
||||
@ -1477,6 +1506,16 @@
|
||||
<string>Open Folder as Comic</string>
|
||||
</property>
|
||||
</action>
|
||||
<action name="actionAddWebLink">
|
||||
<property name="text">
|
||||
<string>Add Item</string>
|
||||
</property>
|
||||
</action>
|
||||
<action name="actionRemoveWebLink">
|
||||
<property name="text">
|
||||
<string>Remove Web Link</string>
|
||||
</property>
|
||||
</action>
|
||||
</widget>
|
||||
<layoutdefault spacing="6" margin="11"/>
|
||||
<customwidgets>
|
||||
@ -1487,5 +1526,38 @@
|
||||
</customwidget>
|
||||
</customwidgets>
|
||||
<resources/>
|
||||
<connections/>
|
||||
<connections>
|
||||
<connection>
|
||||
<sender>btnAddWebLink</sender>
|
||||
<signal>clicked()</signal>
|
||||
<receiver>actionAddWebLink</receiver>
|
||||
<slot>trigger()</slot>
|
||||
<hints>
|
||||
<hint type="sourcelabel">
|
||||
<x>900</x>
|
||||
<y>536</y>
|
||||
</hint>
|
||||
<hint type="destinationlabel">
|
||||
<x>-1</x>
|
||||
<y>-1</y>
|
||||
</hint>
|
||||
</hints>
|
||||
</connection>
|
||||
<connection>
|
||||
<sender>btnRemoveWebLink</sender>
|
||||
<signal>clicked()</signal>
|
||||
<receiver>actionRemoveWebLink</receiver>
|
||||
<slot>trigger()</slot>
|
||||
<hints>
|
||||
<hint type="sourcelabel">
|
||||
<x>900</x>
|
||||
<y>576</y>
|
||||
</hint>
|
||||
<hint type="destinationlabel">
|
||||
<x>-1</x>
|
||||
<y>-1</y>
|
||||
</hint>
|
||||
</hints>
|
||||
</connection>
|
||||
</connections>
|
||||
</ui>
|
||||
|
@ -29,6 +29,8 @@ import requests
|
||||
import settngs
|
||||
from pyrate_limiter import Limiter, RequestRate
|
||||
from typing_extensions import Required, TypedDict
|
||||
from urllib3.exceptions import LocationParseError
|
||||
from urllib3.util import parse_url
|
||||
|
||||
from comicapi import utils
|
||||
from comicapi.genericmetadata import ComicSeries, GenericMetadata, TagOrigin
|
||||
@ -643,10 +645,15 @@ class ComicVineTalker(ComicTalker):
|
||||
format=utils.xlate(series.format),
|
||||
volume_count=utils.xlate_int(series.count_of_volumes),
|
||||
title=utils.xlate(issue.get("name")),
|
||||
web_link=utils.xlate(issue.get("site_detail_url")),
|
||||
series=utils.xlate(series.name),
|
||||
series_aliases=series.aliases,
|
||||
)
|
||||
url = utils.xlate(issue.get("site_detail_url"))
|
||||
if url:
|
||||
try:
|
||||
md.web_links = [parse_url(url)]
|
||||
except LocationParseError:
|
||||
...
|
||||
if issue.get("image") is None:
|
||||
md._cover_image = ""
|
||||
else:
|
||||
|
@ -185,7 +185,7 @@ comic_issue_result = comicapi.genericmetadata.GenericMetadata(
|
||||
issue=cv_issue_result["results"]["issue_number"],
|
||||
volume=None,
|
||||
title=cv_issue_result["results"]["name"],
|
||||
web_link=cv_issue_result["results"]["site_detail_url"],
|
||||
web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
|
||||
)
|
||||
|
||||
cv_md = comicapi.genericmetadata.GenericMetadata(
|
||||
@ -213,7 +213,7 @@ cv_md = comicapi.genericmetadata.GenericMetadata(
|
||||
alternate_count=None,
|
||||
imprint=None,
|
||||
notes=None,
|
||||
web_link=cv_issue_result["results"]["site_detail_url"],
|
||||
web_links=[comicapi.genericmetadata.parse_url(cv_issue_result["results"]["site_detail_url"])],
|
||||
format=None,
|
||||
manga=None,
|
||||
black_and_white=None,
|
||||
|
@ -1152,6 +1152,13 @@ rnames = [
|
||||
"Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
|
||||
does_not_raise(),
|
||||
),
|
||||
(
|
||||
"{title} {web_links!j}", # Test that join forces str conversion
|
||||
False,
|
||||
"Linux",
|
||||
"Anda's Game https:--comicvine.gamespot.com-cory-doctorows-futuristic-tales-of-the-here-and-no-4000-140529-.cbz",
|
||||
does_not_raise(),
|
||||
),
|
||||
(
|
||||
"{series}:{title} #{issue} ({year})", # on windows the ':' is replaced
|
||||
False,
|
||||
|
Loading…
Reference in New Issue
Block a user