2024-08-04 18:12:00 -07:00
|
|
|
package ch
|
2024-05-01 18:09:02 -07:00
|
|
|
|
|
|
|
import (
|
2024-08-04 18:12:00 -07:00
|
|
|
"cmp"
|
2024-08-11 20:46:41 -07:00
|
|
|
_ "embed"
|
2024-05-01 18:09:02 -07:00
|
|
|
"fmt"
|
|
|
|
"image"
|
|
|
|
"log"
|
2024-08-04 18:12:00 -07:00
|
|
|
"math/bits"
|
2024-07-31 11:35:17 -07:00
|
|
|
"runtime"
|
2024-08-04 18:12:00 -07:00
|
|
|
"slices"
|
2024-05-01 18:09:02 -07:00
|
|
|
|
2024-07-31 11:35:17 -07:00
|
|
|
"gitea.narnian.us/lordwelch/goimagehash"
|
2024-05-01 18:09:02 -07:00
|
|
|
)
|
|
|
|
|
2024-08-11 20:46:41 -07:00
|
|
|
//go:embed hashes.gz
|
|
|
|
var Hashes []byte
|
|
|
|
|
2024-05-01 18:09:02 -07:00
|
|
|
const (
|
2024-08-04 18:26:20 -07:00
|
|
|
H0 uint64 = 0b11111111 << (8 * iota)
|
|
|
|
H1
|
|
|
|
H2
|
|
|
|
H3
|
|
|
|
H4
|
|
|
|
H5
|
|
|
|
H6
|
|
|
|
H7
|
2024-05-01 18:09:02 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2024-08-04 18:26:20 -07:00
|
|
|
Shift0 = (8 * iota)
|
|
|
|
Shift1
|
|
|
|
Shift2
|
|
|
|
Shift3
|
|
|
|
Shift4
|
|
|
|
Shift5
|
|
|
|
Shift6
|
|
|
|
Shift7
|
2024-05-01 18:09:02 -07:00
|
|
|
)
|
|
|
|
|
2024-08-05 13:54:00 -07:00
|
|
|
const (
|
|
|
|
ComicVine Source = "comicvine.gamespot.com"
|
|
|
|
)
|
|
|
|
|
2024-08-04 18:12:00 -07:00
|
|
|
type Source string
|
2024-05-01 18:09:02 -07:00
|
|
|
|
2024-08-04 18:12:00 -07:00
|
|
|
type Match struct {
|
|
|
|
Distance int
|
|
|
|
Hash uint64
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
|
2024-08-11 20:46:41 -07:00
|
|
|
type ID struct {
|
2024-09-01 18:13:47 -07:00
|
|
|
Domain Source
|
|
|
|
ID string
|
2024-08-11 20:46:41 -07:00
|
|
|
}
|
|
|
|
|
2024-08-04 18:12:00 -07:00
|
|
|
type Result struct {
|
2024-09-01 18:13:47 -07:00
|
|
|
IDs IDList
|
2024-08-04 18:12:00 -07:00
|
|
|
Distance int
|
2024-09-01 18:13:47 -07:00
|
|
|
Hash Hash
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
|
|
|
|
2024-08-04 18:12:00 -07:00
|
|
|
type Im struct {
|
2024-10-14 02:03:37 -07:00
|
|
|
Im image.Image
|
|
|
|
Format string
|
|
|
|
ID ID
|
|
|
|
NewOnly bool
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
|
|
|
|
2024-09-01 18:13:47 -07:00
|
|
|
type ImageHash struct {
|
|
|
|
Hashes []Hash
|
|
|
|
ID ID
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
|
2024-09-01 18:13:47 -07:00
|
|
|
type Hash struct {
|
2024-08-04 18:12:00 -07:00
|
|
|
Hash uint64
|
|
|
|
Kind goimagehash.Kind
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
// IDList is a map of domain to ID eg IDs["comicvine.gamespot.com"] = []string{"1235"}
|
|
|
|
// Maps are extremely expensive in go for small maps this should only be used to return info to a user no internal code should use this
|
|
|
|
type IDList map[Source][]string
|
|
|
|
|
|
|
|
type OldSavedHashes map[Source]map[string][3]uint64
|
|
|
|
|
|
|
|
type SavedHashes struct {
|
|
|
|
IDs [][]ID
|
|
|
|
Hashes [3]map[uint64]int
|
|
|
|
}
|
|
|
|
|
|
|
|
func ToIDList(ids []ID) IDList {
|
|
|
|
idlist := IDList{}
|
|
|
|
for _, id := range ids {
|
|
|
|
idlist[id.Domain] = Insert(idlist[id.Domain], id.ID)
|
|
|
|
}
|
|
|
|
return idlist
|
|
|
|
}
|
|
|
|
func InsertID(ids []ID, id ID) []ID {
|
|
|
|
index, itemFound := slices.BinarySearchFunc(ids, id, func(e ID, t ID) int {
|
|
|
|
return cmp.Or(
|
|
|
|
cmp.Compare(e.Domain, t.Domain),
|
|
|
|
cmp.Compare(e.ID, t.ID),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
if itemFound {
|
|
|
|
return ids
|
|
|
|
}
|
|
|
|
return slices.Insert(ids, index, id)
|
|
|
|
}
|
|
|
|
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
|
|
|
|
for i, h := range s.Hashes {
|
|
|
|
if h == nil {
|
|
|
|
s.Hashes[i] = make(map[uint64]int)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
hashType := int(hash.Kind) - 1
|
|
|
|
idx, hashFound := s.Hashes[hashType][hash.Hash]
|
|
|
|
if !hashFound {
|
|
|
|
idx = len(s.IDs)
|
|
|
|
s.IDs = append(s.IDs, make([]ID, 0, 3))
|
|
|
|
}
|
|
|
|
s.IDs[idx] = InsertID(s.IDs[idx], id)
|
|
|
|
s.Hashes[hashType][hash.Hash] = idx
|
|
|
|
}
|
|
|
|
|
|
|
|
func ConvertSavedHashes(oldHashes OldSavedHashes) SavedHashes {
|
|
|
|
t := SavedHashes{}
|
|
|
|
idcount := 0
|
|
|
|
for _, ids := range oldHashes {
|
|
|
|
idcount += len(ids)
|
|
|
|
}
|
|
|
|
t.IDs = make([][]ID, 0, idcount)
|
|
|
|
t.Hashes[0] = make(map[uint64]int, idcount)
|
|
|
|
t.Hashes[1] = make(map[uint64]int, idcount)
|
|
|
|
t.Hashes[2] = make(map[uint64]int, idcount)
|
|
|
|
for domain, sourceHashes := range oldHashes {
|
|
|
|
for id, hashes := range sourceHashes {
|
|
|
|
idx := len(t.IDs)
|
|
|
|
t.IDs = append(t.IDs, []ID{{domain, id}})
|
|
|
|
for hashType, hash := range hashes {
|
|
|
|
t.Hashes[hashType][hash] = idx
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fmt.Println("Expected number of IDs", idcount)
|
|
|
|
idcount = 0
|
|
|
|
for _, ids := range t.IDs {
|
|
|
|
idcount += len(ids)
|
|
|
|
}
|
|
|
|
fmt.Println("length of hashes", len(t.Hashes[0])+len(t.Hashes[1])+len(t.Hashes[2]))
|
|
|
|
fmt.Println("Length of ID lists", len(t.IDs))
|
|
|
|
fmt.Println("Total number of IDs", idcount)
|
|
|
|
return t
|
|
|
|
}
|
2024-09-01 18:13:47 -07:00
|
|
|
|
|
|
|
type NewIDs struct {
|
|
|
|
OldID ID
|
|
|
|
NewID ID
|
|
|
|
}
|
|
|
|
|
|
|
|
type HashStorage interface {
|
|
|
|
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
|
|
|
|
MapHashes(ImageHash)
|
|
|
|
DecodeHashes(hashes SavedHashes) error
|
|
|
|
EncodeHashes() (SavedHashes, error)
|
2024-10-14 02:02:26 -07:00
|
|
|
AssociateIDs(newIDs []NewIDs) error
|
2024-09-01 18:13:47 -07:00
|
|
|
GetIDs(id ID) IDList
|
|
|
|
}
|
|
|
|
|
2024-08-04 18:26:20 -07:00
|
|
|
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
|
|
|
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
|
|
|
for _, storedHash := range hashes {
|
|
|
|
distance := bits.OnesCount64(searchHash ^ storedHash)
|
2024-08-04 18:12:00 -07:00
|
|
|
if distance <= maxDistance {
|
2024-08-04 18:26:20 -07:00
|
|
|
matchingHashes = append(matchingHashes, Match{distance, storedHash})
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
}
|
2024-08-04 18:26:20 -07:00
|
|
|
return matchingHashes
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
|
|
|
|
2024-08-04 18:12:00 -07:00
|
|
|
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
|
2024-08-04 18:26:20 -07:00
|
|
|
index, itemFound := slices.BinarySearch(slice, item)
|
|
|
|
if itemFound {
|
2024-08-04 18:12:00 -07:00
|
|
|
return slice
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
2024-08-04 18:12:00 -07:00
|
|
|
return slices.Insert(slice, index, item)
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
|
|
|
|
2024-09-01 18:13:47 -07:00
|
|
|
func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
|
|
|
|
index, itemFound := slices.BinarySearch(slice, item)
|
|
|
|
if itemFound {
|
|
|
|
return slice, index
|
|
|
|
}
|
|
|
|
return slices.Insert(slice, index, item), index
|
|
|
|
}
|
|
|
|
|
2024-07-31 11:35:17 -07:00
|
|
|
func MemStats() uint64 {
|
|
|
|
var m runtime.MemStats
|
|
|
|
runtime.ReadMemStats(&m)
|
|
|
|
return m.Alloc
|
|
|
|
}
|
|
|
|
|
2024-09-01 18:13:47 -07:00
|
|
|
func HashImage(i Im) ImageHash {
|
2024-08-04 18:12:00 -07:00
|
|
|
if i.Format == "webp" {
|
|
|
|
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
|
2024-07-31 11:35:17 -07:00
|
|
|
}
|
2024-05-01 18:09:02 -07:00
|
|
|
|
|
|
|
var (
|
2024-09-01 18:13:47 -07:00
|
|
|
err error
|
2024-05-01 18:09:02 -07:00
|
|
|
)
|
|
|
|
|
2024-09-01 18:13:47 -07:00
|
|
|
ahash, err := goimagehash.AverageHash(i.Im)
|
2024-05-01 18:09:02 -07:00
|
|
|
if err != nil {
|
|
|
|
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
|
|
|
|
log.Println(msg)
|
2024-09-01 18:13:47 -07:00
|
|
|
return ImageHash{}
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
2024-09-01 18:13:47 -07:00
|
|
|
dhash, err := goimagehash.DifferenceHash(i.Im)
|
2024-05-01 18:09:02 -07:00
|
|
|
if err != nil {
|
|
|
|
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
|
|
|
|
log.Println(msg)
|
2024-09-01 18:13:47 -07:00
|
|
|
return ImageHash{}
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
2024-09-01 18:13:47 -07:00
|
|
|
phash, err := goimagehash.PerceptionHash(i.Im)
|
2024-05-01 18:09:02 -07:00
|
|
|
if err != nil {
|
|
|
|
msg := fmt.Sprintf("Failed to phash Image: %s", err)
|
|
|
|
log.Println(msg)
|
2024-09-01 18:13:47 -07:00
|
|
|
return ImageHash{}
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
2024-09-01 18:13:47 -07:00
|
|
|
return ImageHash{
|
|
|
|
Hashes: []Hash{{ahash.GetHash(), ahash.GetKind()}, {dhash.GetHash(), dhash.GetKind()}, {phash.GetHash(), phash.GetKind()}},
|
2024-08-04 18:12:00 -07:00
|
|
|
ID: i.ID,
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func SplitHash(hash uint64) [8]uint8 {
|
|
|
|
return [8]uint8{
|
2024-08-04 18:26:20 -07:00
|
|
|
uint8((hash & H7) >> Shift7),
|
|
|
|
uint8((hash & H6) >> Shift6),
|
|
|
|
uint8((hash & H5) >> Shift5),
|
|
|
|
uint8((hash & H4) >> Shift4),
|
|
|
|
uint8((hash & H3) >> Shift3),
|
|
|
|
uint8((hash & H2) >> Shift2),
|
|
|
|
uint8((hash & H1) >> Shift1),
|
|
|
|
uint8((hash & H0) >> Shift0),
|
2024-05-01 18:09:02 -07:00
|
|
|
}
|
|
|
|
}
|