264 lines
6.8 KiB
Go
264 lines
6.8 KiB
Go
package ch
|
|
|
|
import (
|
|
"cmp"
|
|
"errors"
|
|
"fmt"
|
|
"math/bits"
|
|
"slices"
|
|
"sync"
|
|
|
|
"gitea.narnian.us/lordwelch/goimagehash"
|
|
)
|
|
|
|
type basicMapStorage struct {
|
|
hashMutex *sync.RWMutex
|
|
|
|
ids map[ID]*[]ID
|
|
aHashes []SavedHash
|
|
dHashes []SavedHash
|
|
pHashes []SavedHash
|
|
}
|
|
|
|
// atleast must have a read lock before using
|
|
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
|
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
|
|
|
|
mappedIds := map[*[]ID]bool{}
|
|
for _, storedHash := range *b.getCurrentHashes(kind) {
|
|
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
|
|
if distance <= maxDistance {
|
|
ids := b.ids[storedHash.ID]
|
|
if mappedIds[ids] {
|
|
continue
|
|
}
|
|
mappedIds[ids] = true
|
|
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
|
|
}
|
|
}
|
|
return matchingHashes
|
|
}
|
|
|
|
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
|
var (
|
|
foundMatches []Result
|
|
tl timeLog
|
|
)
|
|
tl.resetTime()
|
|
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
|
b.hashMutex.RLock()
|
|
defer b.hashMutex.RUnlock()
|
|
|
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
for _, hash := range hashes {
|
|
mappedIds := map[*[]ID]bool{}
|
|
|
|
index, count := b.findHash(hash)
|
|
if count > 0 {
|
|
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
|
|
ids := b.ids[storedHash.ID]
|
|
if mappedIds[ids] {
|
|
continue
|
|
}
|
|
mappedIds[ids] = true
|
|
|
|
foundMatches = append(foundMatches, Result{
|
|
Distance: 0,
|
|
Hash: storedHash.Hash,
|
|
IDs: ToIDList(*b.ids[storedHash.ID]),
|
|
})
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
tl.logTime("Search Exact")
|
|
|
|
return foundMatches, nil
|
|
}
|
|
|
|
foundHashes := make(map[uint64]struct{})
|
|
totalPartialHashes := 0
|
|
|
|
for _, hash := range hashes {
|
|
foundMatches = append(foundMatches, b.atleast(hash.Kind, max, hash.Hash)...)
|
|
|
|
}
|
|
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
|
|
return foundMatches, nil
|
|
}
|
|
|
|
// getCurrentHashes must have a read lock before using
|
|
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]SavedHash {
|
|
if kind == goimagehash.AHash {
|
|
return &b.aHashes
|
|
}
|
|
if kind == goimagehash.DHash {
|
|
return &b.dHashes
|
|
}
|
|
if kind == goimagehash.PHash {
|
|
return &b.pHashes
|
|
}
|
|
panic("Unknown hash type: " + kind.String())
|
|
}
|
|
|
|
// findHash must have a read lock before using
|
|
// return value is index, count
|
|
// if count < 1 then no results were found
|
|
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
|
|
currentHashes := *b.getCurrentHashes(hash.Kind)
|
|
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing SavedHash, target Hash) int {
|
|
return cmp.Compare(existing.Hash.Hash, target.Hash)
|
|
})
|
|
if !found {
|
|
return index, 0
|
|
}
|
|
count := 0
|
|
for i := index + 1; i < len(currentHashes) && currentHashes[i].Hash.Hash == hash.Hash; i++ {
|
|
count++
|
|
}
|
|
return index, count
|
|
}
|
|
|
|
// insertHash must already have a lock
|
|
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
|
|
currentHashes := b.getCurrentHashes(hash.Kind)
|
|
index, count := b.findHash(hash)
|
|
max := index + count
|
|
for ; index < max; index++ {
|
|
if (*currentHashes)[index].ID == id {
|
|
return
|
|
}
|
|
}
|
|
|
|
*currentHashes = slices.Insert(*currentHashes, index, SavedHash{hash, id})
|
|
if _, mapped := b.ids[id]; !mapped {
|
|
b.ids[id] = &[]ID{id}
|
|
}
|
|
}
|
|
|
|
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
|
b.hashMutex.Lock()
|
|
defer b.hashMutex.Unlock()
|
|
for _, ih := range hash.Hashes {
|
|
b.insertHash(ih, hash.ID)
|
|
}
|
|
}
|
|
|
|
// DecodeHashes must already have a lock
|
|
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
|
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
|
|
|
|
// Initialize all the known equal IDs
|
|
for _, ids := range hashes.IDs {
|
|
for _, id := range ids {
|
|
b.ids[id] = &ids
|
|
}
|
|
}
|
|
|
|
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
|
|
return cmp.Or(
|
|
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
|
|
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
|
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
|
cmp.Compare(existing.ID.ID, target.ID.ID),
|
|
)
|
|
})
|
|
|
|
// Assume they are probably fairly equally split between hash types
|
|
b.aHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
|
b.dHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
|
b.pHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
|
for _, savedHash := range hashes.Hashes {
|
|
|
|
if savedHash.Hash.Kind == goimagehash.AHash {
|
|
b.aHashes = append(b.aHashes, savedHash)
|
|
}
|
|
if savedHash.Hash.Kind == goimagehash.DHash {
|
|
b.dHashes = append(b.dHashes, savedHash)
|
|
}
|
|
if savedHash.Hash.Kind == goimagehash.PHash {
|
|
b.pHashes = append(b.pHashes, savedHash)
|
|
}
|
|
|
|
if savedHash.ID == (ID{}) {
|
|
fmt.Println("Empty ID detected")
|
|
panic(savedHash)
|
|
}
|
|
// All known equal IDs are already mapped we can add any missing ones from hashes
|
|
if _, ok := b.ids[savedHash.ID]; !ok {
|
|
b.ids[savedHash.ID] = &[]ID{savedHash.ID}
|
|
}
|
|
}
|
|
|
|
hashCmp := func(existing, target SavedHash) int {
|
|
return cmp.Or(
|
|
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
|
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
|
cmp.Compare(existing.ID.ID, target.ID.ID),
|
|
)
|
|
}
|
|
slices.SortFunc(b.aHashes, hashCmp)
|
|
slices.SortFunc(b.dHashes, hashCmp)
|
|
slices.SortFunc(b.pHashes, hashCmp)
|
|
|
|
return nil
|
|
}
|
|
|
|
// EncodeHashes should already have a lock
|
|
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
|
savedHashes := SavedHashes{
|
|
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
|
|
}
|
|
|
|
// Only keep groups >1 as they will be mapped in SavedHashes.Hashes
|
|
for _, ids := range b.ids {
|
|
if len(*ids) > 1 {
|
|
savedHashes.IDs = append(savedHashes.IDs, *ids)
|
|
}
|
|
}
|
|
|
|
savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
|
|
savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
|
|
savedHashes.Hashes = append(savedHashes.Hashes, b.pHashes...)
|
|
|
|
return savedHashes, nil
|
|
}
|
|
|
|
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
|
|
for _, newid := range newids {
|
|
b.hashMutex.RLock()
|
|
ids, found := b.ids[newid.OldID]
|
|
b.hashMutex.RUnlock()
|
|
if !found {
|
|
msg := "ID not found on this server"
|
|
return errors.New(msg)
|
|
}
|
|
b.hashMutex.Lock()
|
|
*ids = InsertID(*ids, newid.NewID)
|
|
b.hashMutex.Unlock()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (b *basicMapStorage) GetIDs(id ID) IDList {
|
|
b.hashMutex.RLock()
|
|
defer b.hashMutex.RUnlock()
|
|
ids, found := b.ids[id]
|
|
if !found {
|
|
return nil
|
|
}
|
|
return ToIDList(*ids)
|
|
}
|
|
|
|
func NewBasicMapStorage() (HashStorage, error) {
|
|
storage := &basicMapStorage{
|
|
hashMutex: &sync.RWMutex{},
|
|
ids: make(map[ID]*[]ID),
|
|
aHashes: []SavedHash{},
|
|
dHashes: []SavedHash{},
|
|
pHashes: []SavedHash{},
|
|
}
|
|
return storage, nil
|
|
}
|