package ch import ( "cmp" _ "embed" "fmt" "image" "log" "math/bits" "runtime" "slices" "gitea.narnian.us/lordwelch/goimagehash" ) //go:embed hashes.gz var Hashes []byte const ( H0 uint64 = 0b11111111 << (8 * iota) H1 H2 H3 H4 H5 H6 H7 ) const ( Shift0 = (8 * iota) Shift1 Shift2 Shift3 Shift4 Shift5 Shift6 Shift7 ) const ( ComicVine Source = "comicvine.gamespot.com" ) type Source string type Match struct { Distance int Hash uint64 } type ID struct { Domain Source ID string } type Result struct { IDs IDList Distance int Hash Hash } type Im struct { Im image.Image Format string Path string ID ID } type ImageHash struct { Hashes []Hash ID ID } type Hash struct { Hash uint64 Kind goimagehash.Kind } type SavedHashes map[Source]map[string][3]uint64 type NewIDs struct { OldID ID NewID ID } type HashStorage interface { GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) MapHashes(ImageHash) DecodeHashes(hashes SavedHashes) error EncodeHashes() (SavedHashes, error) AssociateIDs(newIDs []NewIDs) GetIDs(id ID) IDList } func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match { matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them for _, storedHash := range hashes { distance := bits.OnesCount64(searchHash ^ storedHash) if distance <= maxDistance { matchingHashes = append(matchingHashes, Match{distance, storedHash}) } } return matchingHashes } func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S { index, itemFound := slices.BinarySearch(slice, item) if itemFound { return slice } return slices.Insert(slice, index, item) } func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) { index, itemFound := slices.BinarySearch(slice, item) if itemFound { return slice, index } return slices.Insert(slice, index, item), index } func MemStats() uint64 { var m runtime.MemStats runtime.ReadMemStats(&m) return m.Alloc } func HashImage(i Im) ImageHash { if i.Format == "webp" { i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr)) } var ( err error ) ahash, err := goimagehash.AverageHash(i.Im) if err != nil { msg := fmt.Sprintf("Failed to ahash Image: %s", err) log.Println(msg) return ImageHash{} } dhash, err := goimagehash.DifferenceHash(i.Im) if err != nil { msg := fmt.Sprintf("Failed to dhash Image: %s", err) log.Println(msg) return ImageHash{} } phash, err := goimagehash.PerceptionHash(i.Im) if err != nil { msg := fmt.Sprintf("Failed to phash Image: %s", err) log.Println(msg) return ImageHash{} } return ImageHash{ Hashes: []Hash{{ahash.GetHash(), ahash.GetKind()}, {dhash.GetHash(), dhash.GetKind()}, {phash.GetHash(), phash.GetKind()}}, ID: i.ID, } } func SplitHash(hash uint64) [8]uint8 { return [8]uint8{ uint8((hash & H7) >> Shift7), uint8((hash & H6) >> Shift6), uint8((hash & H5) >> Shift5), uint8((hash & H4) >> Shift4), uint8((hash & H3) >> Shift3), uint8((hash & H2) >> Shift2), uint8((hash & H1) >> Shift1), uint8((hash & H0) >> Shift0), } } type IDList map[Source][]string // IDs is a map of domain to ID eg IDs['comicvine.gamespot.com'] = []string{"1235"}