2015-02-21 18:30:32 -08:00
|
|
|
"""A class to manage creating image content hashes, and calculate hamming distances"""
|
2024-01-29 09:14:25 -08:00
|
|
|
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2023-02-16 17:23:13 -08:00
|
|
|
# Copyright 2013 ComicTagger Authors
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2022-06-02 18:32:16 -07:00
|
|
|
from __future__ import annotations
|
2015-02-13 15:08:07 -08:00
|
|
|
|
2018-09-19 13:05:39 -07:00
|
|
|
import io
|
2024-02-06 18:01:26 -08:00
|
|
|
import itertools
|
2022-04-04 18:59:26 -07:00
|
|
|
import logging
|
2023-06-25 17:54:26 -07:00
|
|
|
import math
|
2023-11-23 15:58:00 -08:00
|
|
|
from collections.abc import Sequence
|
2023-06-25 17:54:26 -07:00
|
|
|
from statistics import median
|
2023-06-27 14:44:08 -07:00
|
|
|
from typing import TypeVar
|
2012-11-07 09:29:45 -08:00
|
|
|
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
|
|
|
from PIL import Image
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
|
2015-02-12 14:57:46 -08:00
|
|
|
pil_available = True
|
2012-11-27 10:00:27 -08:00
|
|
|
except ImportError:
|
2015-02-12 14:57:46 -08:00
|
|
|
pil_available = False
|
2022-04-04 18:59:26 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
2012-11-27 10:00:27 -08:00
|
|
|
|
|
|
|
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
class ImageHasher:
|
2022-06-07 20:22:33 -07:00
|
|
|
def __init__(self, path: str | None = None, data: bytes = b"", width: int = 8, height: int = 8) -> None:
|
2015-02-12 14:57:46 -08:00
|
|
|
self.width = width
|
|
|
|
self.height = height
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
if path is None and not data:
|
2022-06-02 18:32:16 -07:00
|
|
|
raise OSError
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
|
|
|
|
try:
|
|
|
|
if path is not None:
|
|
|
|
self.image = Image.open(path)
|
|
|
|
else:
|
|
|
|
self.image = Image.open(io.BytesIO(data))
|
2022-04-04 18:59:26 -07:00
|
|
|
except Exception:
|
|
|
|
logger.exception("Image data seems corrupted!")
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
# just generate a bogus image
|
|
|
|
self.image = Image.new("L", (1, 1))
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def average_hash(self) -> int:
|
2015-02-12 14:57:46 -08:00
|
|
|
try:
|
2022-07-18 09:00:56 -07:00
|
|
|
image = self.image.resize((self.width, self.height), Image.Resampling.LANCZOS).convert("L")
|
2022-04-04 18:59:26 -07:00
|
|
|
except Exception:
|
|
|
|
logger.exception("average_hash error")
|
2022-05-17 13:57:04 -07:00
|
|
|
return 0
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
pixels = list(image.getdata())
|
|
|
|
avg = sum(pixels) / len(pixels)
|
|
|
|
|
2024-02-10 15:02:24 -08:00
|
|
|
diff = "".join(str(int(p > avg)) for p in pixels)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2024-02-06 18:01:26 -08:00
|
|
|
result = int(diff, 2)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
2023-06-27 14:44:08 -07:00
|
|
|
def average_hash2(self) -> None:
|
|
|
|
"""
|
|
|
|
# Got this one from somewhere on the net. Not a clue how the 'convolve2d' works!
|
|
|
|
|
|
|
|
from numpy import array
|
|
|
|
from scipy.signal import convolve2d
|
|
|
|
|
|
|
|
im = self.image.resize((self.width, self.height), Image.ANTIALIAS).convert('L')
|
|
|
|
|
|
|
|
in_data = array((im.getdata())).reshape(self.width, self.height)
|
|
|
|
filt = array([[0,1,0],[1,-4,1],[0,1,0]])
|
|
|
|
filt_data = convolve2d(in_data,filt,mode='same',boundary='symm').flatten()
|
|
|
|
|
|
|
|
result = reduce(lambda x, (y, z): x | (z << y),
|
|
|
|
enumerate(map(lambda i: 0 if i < 0 else 1, filt_data)),
|
|
|
|
0)
|
|
|
|
return result
|
|
|
|
"""
|
|
|
|
|
|
|
|
def p_hash(self) -> int:
|
2015-02-12 14:57:46 -08:00
|
|
|
"""
|
2023-06-25 17:54:26 -07:00
|
|
|
Pure python version of Perceptual Hash computation of https://github.com/JohannesBuchner/imagehash/tree/master
|
|
|
|
Implementation follows http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
|
2015-02-12 14:57:46 -08:00
|
|
|
"""
|
|
|
|
|
2023-11-23 15:58:00 -08:00
|
|
|
def generate_dct2(block: Sequence[Sequence[float]], axis: int = 0) -> list[list[float]]:
|
|
|
|
def dct1(block: Sequence[float]) -> list[float]:
|
2023-06-25 17:54:26 -07:00
|
|
|
"""Perform 1D Discrete Cosine Transform (DCT) on a given block."""
|
|
|
|
N = len(block)
|
|
|
|
dct_block = [0.0] * N
|
|
|
|
|
|
|
|
for k in range(N):
|
|
|
|
sum_val = 0.0
|
|
|
|
for n in range(N):
|
|
|
|
cos_val = math.cos(math.pi * k * (2 * n + 1) / (2 * N))
|
|
|
|
sum_val += block[n] * cos_val
|
|
|
|
dct_block[k] = sum_val
|
|
|
|
|
|
|
|
return dct_block
|
|
|
|
|
|
|
|
"""Perform 2D Discrete Cosine Transform (DCT) on a given block along the specified axis."""
|
|
|
|
rows = len(block)
|
|
|
|
cols = len(block[0])
|
|
|
|
dct_block = [[0.0] * cols for _ in range(rows)]
|
|
|
|
|
|
|
|
if axis == 0:
|
|
|
|
# Apply 1D DCT on each row
|
|
|
|
for i in range(rows):
|
|
|
|
dct_block[i] = dct1(block[i])
|
|
|
|
elif axis == 1:
|
|
|
|
# Apply 1D DCT on each column
|
|
|
|
for j in range(cols):
|
|
|
|
column = [block[i][j] for i in range(rows)]
|
|
|
|
dct_column = dct1(column)
|
|
|
|
for i in range(rows):
|
|
|
|
dct_block[i][j] = dct_column[i]
|
|
|
|
else:
|
|
|
|
raise ValueError("Invalid axis value. Must be either 0 or 1.")
|
2012-11-07 09:29:45 -08:00
|
|
|
|
2023-06-25 17:54:26 -07:00
|
|
|
return dct_block
|
2012-11-07 09:29:45 -08:00
|
|
|
|
2023-11-23 15:58:00 -08:00
|
|
|
def convert_image_to_ndarray(image: Image.Image) -> Sequence[Sequence[float]]:
|
2023-06-25 17:54:26 -07:00
|
|
|
width, height = image.size
|
2012-11-07 09:29:45 -08:00
|
|
|
|
2023-06-25 17:54:26 -07:00
|
|
|
pixels2 = []
|
|
|
|
for y in range(height):
|
|
|
|
row = []
|
|
|
|
for x in range(width):
|
|
|
|
pixel = image.getpixel((x, y))
|
|
|
|
row.append(pixel)
|
|
|
|
pixels2.append(row)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2023-06-25 17:54:26 -07:00
|
|
|
return pixels2
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2023-06-25 17:54:26 -07:00
|
|
|
highfreq_factor = 4
|
|
|
|
img_size = 8 * highfreq_factor
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2023-06-25 17:54:26 -07:00
|
|
|
try:
|
|
|
|
image = self.image.convert("L").resize((img_size, img_size), Image.Resampling.LANCZOS)
|
|
|
|
except Exception:
|
|
|
|
logger.exception("p_hash error converting to greyscale and resizing")
|
2023-06-27 14:44:08 -07:00
|
|
|
return 0
|
2023-06-25 17:54:26 -07:00
|
|
|
|
|
|
|
pixels = convert_image_to_ndarray(image)
|
|
|
|
dct = generate_dct2(generate_dct2(pixels, axis=0), axis=1)
|
2024-02-06 18:01:26 -08:00
|
|
|
dctlowfreq = list(itertools.chain.from_iterable(row[:8] for row in dct[:8]))
|
|
|
|
med = median(dctlowfreq)
|
2023-06-25 17:54:26 -07:00
|
|
|
# Convert to a bit string
|
2024-02-06 18:01:26 -08:00
|
|
|
diff = "".join(str(int(item > med)) for item in dctlowfreq)
|
2023-06-27 14:44:08 -07:00
|
|
|
|
|
|
|
result = int(diff, 2)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# accepts 2 hashes (longs or hex strings) and returns the hamming distance
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2023-06-27 14:44:08 -07:00
|
|
|
T = TypeVar("T", int, str)
|
|
|
|
|
2015-02-12 14:57:46 -08:00
|
|
|
@staticmethod
|
2023-06-27 14:44:08 -07:00
|
|
|
def hamming_distance(h1: T, h2: T) -> int:
|
2023-11-23 15:58:00 -08:00
|
|
|
if isinstance(h1, int):
|
2015-02-12 14:57:46 -08:00
|
|
|
n1 = h1
|
2023-06-27 14:44:08 -07:00
|
|
|
else:
|
2018-09-19 13:05:39 -07:00
|
|
|
n1 = int(h1, 16)
|
2023-11-23 15:58:00 -08:00
|
|
|
|
|
|
|
if isinstance(h2, int):
|
|
|
|
n2 = h2
|
|
|
|
else:
|
2018-09-19 13:05:39 -07:00
|
|
|
n2 = int(h2, 16)
|
2015-02-12 14:57:46 -08:00
|
|
|
|
|
|
|
# xor the two numbers
|
2023-06-27 14:44:08 -07:00
|
|
|
n = n1 ^ n2
|
2015-02-12 14:57:46 -08:00
|
|
|
|
2015-02-15 02:44:00 -08:00
|
|
|
# count up the 1's in the binary string
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
return sum(b == "1" for b in bin(n)[2:])
|