comic-hasher/hashing.go
Timmy Welch d730de8fe1 Finish POC
Make a pip installable package for quick-tag
Save and load hashes from msgpack/json
Embed hashes of all thumb_urls from ComicVine
Allow associating new IDs to old IDs
Support multiple domains
Support hashing local covers
Support simple list of ids and distance and results including the hash
2024-08-11 20:46:41 -07:00

160 lines
2.8 KiB
Go

package ch
import (
"cmp"
_ "embed"
"fmt"
"image"
"log"
"math/bits"
"runtime"
"slices"
"gitea.narnian.us/lordwelch/goimagehash"
)
//go:embed hashes.gz
var Hashes []byte
const (
H0 uint64 = 0b11111111 << (8 * iota)
H1
H2
H3
H4
H5
H6
H7
)
const (
Shift0 = (8 * iota)
Shift1
Shift2
Shift3
Shift4
Shift5
Shift6
Shift7
)
const (
ComicVine Source = "comicvine.gamespot.com"
)
type Source string
type Match struct {
Distance int
Hash uint64
}
type ID struct {
Domain, ID string
}
type Result struct {
IDs []string // domain:id
Distance int
Hash ImageHash
}
type Im struct {
Im image.Image
Format string
Domain Source
ID, Path string
}
type Hash struct {
Ahash *goimagehash.ImageHash
Dhash *goimagehash.ImageHash
Phash *goimagehash.ImageHash
Domain Source
ID string
}
type ImageHash struct {
Hash uint64
Kind goimagehash.Kind
}
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
for _, storedHash := range hashes {
distance := bits.OnesCount64(searchHash ^ storedHash)
if distance <= maxDistance {
matchingHashes = append(matchingHashes, Match{distance, storedHash})
}
}
return matchingHashes
}
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
index, itemFound := slices.BinarySearch(slice, item)
if itemFound {
return slice
}
return slices.Insert(slice, index, item)
}
func MemStats() uint64 {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return m.Alloc
}
func HashImage(i Im) Hash {
if i.Format == "webp" {
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
}
var (
err error = nil
ahash *goimagehash.ImageHash
dhash *goimagehash.ImageHash
phash *goimagehash.ImageHash
)
ahash, err = goimagehash.AverageHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
log.Println(msg)
return Hash{}
}
dhash, err = goimagehash.DifferenceHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
log.Println(msg)
return Hash{}
}
phash, err = goimagehash.PerceptionHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to phash Image: %s", err)
log.Println(msg)
return Hash{}
}
return Hash{
Ahash: ahash,
Dhash: dhash,
Phash: phash,
Domain: i.Domain,
ID: i.ID,
}
}
func SplitHash(hash uint64) [8]uint8 {
return [8]uint8{
uint8((hash & H7) >> Shift7),
uint8((hash & H6) >> Shift6),
uint8((hash & H5) >> Shift5),
uint8((hash & H4) >> Shift4),
uint8((hash & H3) >> Shift3),
uint8((hash & H2) >> Shift2),
uint8((hash & H1) >> Shift1),
uint8((hash & H0) >> Shift0),
}
}
type IDList map[Source][]string // IDs is a map of domain to ID eg IDs['comicvine.gamespot.com'] = []string{"1235"}