goimagehash/imagehash.go

334 lines
7.0 KiB
Go
Raw Permalink Normal View History

2017-07-28 10:18:24 -07:00
// Copyright 2017 The goimagehash Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package goimagehash
import (
"encoding/binary"
"encoding/gob"
"encoding/hex"
2024-08-11 19:57:15 -07:00
"encoding/json"
2017-07-28 10:18:24 -07:00
"errors"
"fmt"
"io"
2017-07-28 10:18:24 -07:00
)
var errNoOther = errors.New("other should not be nil")
// Kind describes the kinds of hash.
type Kind int
2017-07-28 10:18:24 -07:00
2017-07-31 07:24:04 -07:00
// ImageHash is a struct of hash computation.
2017-07-28 10:18:24 -07:00
type ImageHash struct {
hash uint64
kind Kind
2017-07-28 10:18:24 -07:00
}
// ExtImageHash is a struct of big hash computation.
type ExtImageHash struct {
hash []uint64
kind Kind
bits int
}
2017-07-28 10:18:24 -07:00
const (
// Unknown is a enum value of the unknown hash.
Unknown Kind = iota
// AHash is a enum value of the average hash.
AHash
// DHash is a enum value of the difference hash.
DHash
2024-08-11 19:57:15 -07:00
// PHash is a enum value of the perceptual hash.
PHash
// WHash is a enum value of the wavelet hash.
WHash
2017-07-28 10:18:24 -07:00
)
2024-08-11 19:57:15 -07:00
var kindNames = map[Kind]string{
Unknown: "Unknown",
AHash: "ahash",
DHash: "dhash",
PHash: "phash",
WHash: "whash",
}
var nameKinds = map[string]Kind{
"Unknown": Unknown,
"ahash": AHash,
"dhash": DHash,
"phash": PHash,
"whash": WHash,
}
2017-07-31 07:24:04 -07:00
// NewImageHash function creates a new image hash.
func NewImageHash(hash uint64, kind Kind) *ImageHash {
2017-07-28 10:18:24 -07:00
return &ImageHash{hash: hash, kind: kind}
}
2024-08-11 19:57:15 -07:00
func (k Kind) String() string {
if name, ok := kindNames[k]; ok {
return name
}
return "Unknown"
}
func (k *Kind) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
*k = nameKinds[s]
return nil
}
func (k *Kind) MarshalJSON() ([]byte, error) {
return json.Marshal(k.String())
}
// Bits method returns an actual hash bit size
func (h *ImageHash) Bits() int {
return 64
}
2017-07-31 07:24:04 -07:00
// Distance method returns a distance between two hashes.
2017-07-28 10:18:24 -07:00
func (h *ImageHash) Distance(other *ImageHash) (int, error) {
if other == nil {
return -1, errNoOther
}
2017-07-28 10:18:24 -07:00
if h.GetKind() != other.GetKind() {
2024-08-04 18:24:04 -07:00
return -1, errors.New("image hashes's kind should be identical")
2017-07-28 10:18:24 -07:00
}
lhash := h.GetHash()
rhash := other.GetHash()
hamming := lhash ^ rhash
return popcnt(hamming), nil
2017-07-28 10:18:24 -07:00
}
2017-07-31 07:24:04 -07:00
// GetHash method returns a 64bits hash value.
2017-07-28 10:18:24 -07:00
func (h *ImageHash) GetHash() uint64 {
return h.hash
}
2017-07-31 07:24:04 -07:00
// GetKind method returns a kind of image hash.
func (h *ImageHash) GetKind() Kind {
2017-07-28 10:18:24 -07:00
return h.kind
}
2019-03-16 01:22:57 -07:00
func (h *ImageHash) leftShiftSet(idx int) {
2017-07-28 10:18:24 -07:00
h.hash |= 1 << uint(idx)
}
2024-08-04 18:24:04 -07:00
const (
strFmtHex = "%1s:%016x"
strFmtBin = "%1s:%064b"
)
// Dump method writes a binary serialization into w io.Writer.
func (h *ImageHash) Dump(w io.Writer) error {
type D struct {
Hash uint64
Kind Kind
}
enc := gob.NewEncoder(w)
err := enc.Encode(D{Hash: h.hash, Kind: h.kind})
if err != nil {
return err
}
return nil
}
// LoadImageHash method loads a ImageHash from io.Reader.
func LoadImageHash(b io.Reader) (*ImageHash, error) {
type E struct {
Hash uint64
Kind Kind
}
var e E
dec := gob.NewDecoder(b)
err := dec.Decode(&e)
if err != nil {
return nil, err
}
return &ImageHash{hash: e.Hash, kind: e.Kind}, nil
}
2017-11-16 11:32:11 -08:00
// ImageHashFromString returns an image hash from a hex representation
//
// Deprecated: Use goimagehash.LoadImageHash instead.
2017-11-16 11:32:11 -08:00
func ImageHashFromString(s string) (*ImageHash, error) {
var kindStr string
var hash uint64
2024-04-05 16:29:03 -07:00
_, err := fmt.Sscanf(s, strFmtHex, &kindStr, &hash)
if err != nil {
return nil, errors.New("Couldn't parse string " + s)
}
kind := Unknown
switch kindStr {
case "a":
kind = AHash
case "p":
kind = PHash
case "d":
kind = DHash
case "w":
kind = WHash
}
return NewImageHash(hash, kind), nil
}
2024-04-05 16:29:03 -07:00
// String returns a hex representation of the hash
2024-05-01 17:59:11 -07:00
func (h *ImageHash) String() string {
return fmt.Sprintf("%016x", h.hash)
}
// String returns a binary representation of the hash
func (h *ImageHash) BinString() string {
return fmt.Sprintf("%064b", h.hash)
}
// NewExtImageHash function creates a new big hash
func NewExtImageHash(hash []uint64, kind Kind, bits int) *ExtImageHash {
return &ExtImageHash{hash: hash, kind: kind, bits: bits}
}
2024-05-01 17:59:11 -07:00
// Bits method returns the hash bit size
func (h *ExtImageHash) Bits() int {
return h.bits
}
// Distance method returns a distance between two big hashes
func (h *ExtImageHash) Distance(other *ExtImageHash) (int, error) {
if h.GetKind() != other.GetKind() {
2024-08-04 18:24:04 -07:00
return -1, errors.New("extended Image hashes's kind should be identical")
}
if h.Bits() != other.Bits() {
2024-08-04 18:24:04 -07:00
msg := fmt.Sprintf("extended image hash should has an identical bit size but got %v vs %v", h.Bits(), other.Bits())
return -1, errors.New(msg)
}
lHash := h.GetHash()
rHash := other.GetHash()
if len(lHash) != len(rHash) {
2024-08-04 18:24:04 -07:00
return -1, errors.New("extended Image hashes's size should be identical")
}
distance := 0
for idx, lh := range lHash {
rh := rHash[idx]
hamming := lh ^ rh
distance += popcnt(hamming)
}
return distance, nil
}
// GetHash method returns a big hash value
func (h *ExtImageHash) GetHash() []uint64 {
return h.hash
}
// GetKind method returns a kind of big hash
func (h *ExtImageHash) GetKind() Kind {
return h.kind
}
// Dump method writes a binary serialization into w io.Writer.
func (h *ExtImageHash) Dump(w io.Writer) error {
type D struct {
Hash []uint64
Kind Kind
Bits int
}
enc := gob.NewEncoder(w)
err := enc.Encode(D{Hash: h.hash, Kind: h.kind, Bits: h.bits})
if err != nil {
return err
}
return nil
}
// LoadExtImageHash method loads a ExtImageHash from io.Reader.
func LoadExtImageHash(b io.Reader) (*ExtImageHash, error) {
type E struct {
Hash []uint64
Kind Kind
Bits int
}
var e E
dec := gob.NewDecoder(b)
err := dec.Decode(&e)
if err != nil {
return nil, err
}
return &ExtImageHash{hash: e.Hash, kind: e.Kind, bits: e.Bits}, nil
}
const extStrFmt = "%1s:%s"
// ExtImageHashFromString returns a big hash from a hex representation
//
// Deprecated: Use goimagehash.LoadExtImageHash instead.
func ExtImageHashFromString(s string) (*ExtImageHash, error) {
var kindStr string
var hashStr string
_, err := fmt.Sscanf(s, extStrFmt, &kindStr, &hashStr)
if err != nil {
return nil, errors.New("Couldn't parse string " + s)
}
hexBytes, err := hex.DecodeString(hashStr)
if err != nil {
return nil, err
}
var hash []uint64
lenOfByte := 8
for i := 0; i < len(hexBytes)/lenOfByte; i++ {
startIndex := i * lenOfByte
endIndex := startIndex + lenOfByte
hashUint64 := binary.BigEndian.Uint64(hexBytes[startIndex:endIndex])
hash = append(hash, hashUint64)
}
kind := Unknown
switch kindStr {
case "a":
kind = AHash
case "p":
kind = PHash
case "d":
kind = DHash
case "w":
kind = WHash
}
return NewExtImageHash(hash, kind, len(hash)*64), nil
}
2024-04-05 16:29:03 -07:00
// String returns a hex representation of big hash
func (h *ExtImageHash) String() string {
var hexBytes []byte
for _, hash := range h.hash {
hashBytes := make([]byte, 8)
binary.BigEndian.PutUint64(hashBytes, hash)
hexBytes = append(hexBytes, hashBytes...)
}
hexStr := hex.EncodeToString(hexBytes)
kindStr := ""
switch h.kind {
case AHash:
kindStr = "a"
case PHash:
kindStr = "p"
case DHash:
kindStr = "d"
case WHash:
kindStr = "w"
}
return fmt.Sprintf(extStrFmt, kindStr, hexStr)
}