d730de8fe1
Make a pip installable package for quick-tag Save and load hashes from msgpack/json Embed hashes of all thumb_urls from ComicVine Allow associating new IDs to old IDs Support multiple domains Support hashing local covers Support simple list of ids and distance and results including the hash
160 lines
2.8 KiB
Go
160 lines
2.8 KiB
Go
package ch
|
|
|
|
import (
|
|
"cmp"
|
|
_ "embed"
|
|
"fmt"
|
|
"image"
|
|
"log"
|
|
"math/bits"
|
|
"runtime"
|
|
"slices"
|
|
|
|
"gitea.narnian.us/lordwelch/goimagehash"
|
|
)
|
|
|
|
//go:embed hashes.gz
|
|
var Hashes []byte
|
|
|
|
const (
|
|
H0 uint64 = 0b11111111 << (8 * iota)
|
|
H1
|
|
H2
|
|
H3
|
|
H4
|
|
H5
|
|
H6
|
|
H7
|
|
)
|
|
|
|
const (
|
|
Shift0 = (8 * iota)
|
|
Shift1
|
|
Shift2
|
|
Shift3
|
|
Shift4
|
|
Shift5
|
|
Shift6
|
|
Shift7
|
|
)
|
|
|
|
const (
|
|
ComicVine Source = "comicvine.gamespot.com"
|
|
)
|
|
|
|
type Source string
|
|
|
|
type Match struct {
|
|
Distance int
|
|
Hash uint64
|
|
}
|
|
|
|
type ID struct {
|
|
Domain, ID string
|
|
}
|
|
|
|
type Result struct {
|
|
IDs []string // domain:id
|
|
Distance int
|
|
Hash ImageHash
|
|
}
|
|
|
|
type Im struct {
|
|
Im image.Image
|
|
Format string
|
|
Domain Source
|
|
ID, Path string
|
|
}
|
|
|
|
type Hash struct {
|
|
Ahash *goimagehash.ImageHash
|
|
Dhash *goimagehash.ImageHash
|
|
Phash *goimagehash.ImageHash
|
|
Domain Source
|
|
ID string
|
|
}
|
|
|
|
type ImageHash struct {
|
|
Hash uint64
|
|
Kind goimagehash.Kind
|
|
}
|
|
|
|
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
|
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
|
for _, storedHash := range hashes {
|
|
distance := bits.OnesCount64(searchHash ^ storedHash)
|
|
if distance <= maxDistance {
|
|
matchingHashes = append(matchingHashes, Match{distance, storedHash})
|
|
}
|
|
}
|
|
return matchingHashes
|
|
}
|
|
|
|
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
|
|
index, itemFound := slices.BinarySearch(slice, item)
|
|
if itemFound {
|
|
return slice
|
|
}
|
|
return slices.Insert(slice, index, item)
|
|
}
|
|
|
|
func MemStats() uint64 {
|
|
var m runtime.MemStats
|
|
runtime.ReadMemStats(&m)
|
|
return m.Alloc
|
|
}
|
|
|
|
func HashImage(i Im) Hash {
|
|
if i.Format == "webp" {
|
|
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
|
|
}
|
|
|
|
var (
|
|
err error = nil
|
|
ahash *goimagehash.ImageHash
|
|
dhash *goimagehash.ImageHash
|
|
phash *goimagehash.ImageHash
|
|
)
|
|
|
|
ahash, err = goimagehash.AverageHash(i.Im)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
|
|
log.Println(msg)
|
|
return Hash{}
|
|
}
|
|
dhash, err = goimagehash.DifferenceHash(i.Im)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
|
|
log.Println(msg)
|
|
return Hash{}
|
|
}
|
|
phash, err = goimagehash.PerceptionHash(i.Im)
|
|
if err != nil {
|
|
msg := fmt.Sprintf("Failed to phash Image: %s", err)
|
|
log.Println(msg)
|
|
return Hash{}
|
|
}
|
|
return Hash{
|
|
Ahash: ahash,
|
|
Dhash: dhash,
|
|
Phash: phash,
|
|
Domain: i.Domain,
|
|
ID: i.ID,
|
|
}
|
|
}
|
|
|
|
func SplitHash(hash uint64) [8]uint8 {
|
|
return [8]uint8{
|
|
uint8((hash & H7) >> Shift7),
|
|
uint8((hash & H6) >> Shift6),
|
|
uint8((hash & H5) >> Shift5),
|
|
uint8((hash & H4) >> Shift4),
|
|
uint8((hash & H3) >> Shift3),
|
|
uint8((hash & H2) >> Shift2),
|
|
uint8((hash & H1) >> Shift1),
|
|
uint8((hash & H0) >> Shift0),
|
|
}
|
|
}
|
|
|
|
type IDList map[Source][]string // IDs is a map of domain to ID eg IDs['comicvine.gamespot.com'] = []string{"1235"}
|