2024-09-01 18:13:47 -07:00
|
|
|
package ch
|
|
|
|
|
|
|
|
import (
|
2024-09-12 11:42:29 -07:00
|
|
|
"cmp"
|
2024-09-07 14:51:18 -07:00
|
|
|
"fmt"
|
2024-09-01 18:13:47 -07:00
|
|
|
"slices"
|
|
|
|
"sync"
|
|
|
|
)
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
type MapStorage struct {
|
|
|
|
basicMapStorage
|
|
|
|
partialHash [3][8]map[uint8][]uint64
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
2024-09-01 18:13:47 -07:00
|
|
|
var foundMatches []Result
|
|
|
|
m.hashMutex.RLock()
|
|
|
|
defer m.hashMutex.RUnlock()
|
2024-09-07 14:51:18 -07:00
|
|
|
resetTime()
|
2024-09-12 11:42:29 -07:00
|
|
|
defer logTime("Search Complete")
|
2024-09-01 18:13:47 -07:00
|
|
|
|
|
|
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
|
|
for _, hash := range hashes {
|
|
|
|
hashType := int(hash.Kind) - 1
|
2024-09-12 11:42:29 -07:00
|
|
|
index, hashFound := m.findHash(hashType, hash.Hash)
|
|
|
|
if hashFound {
|
2024-09-07 14:51:18 -07:00
|
|
|
foundMatches = append(foundMatches, Result{
|
|
|
|
Distance: 0,
|
|
|
|
Hash: hash,
|
2024-09-12 11:42:29 -07:00
|
|
|
IDs: ToIDList(*m.hashes[hashType][index].ids),
|
2024-09-07 14:51:18 -07:00
|
|
|
})
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we have exact matches don't bother with other matches
|
2024-09-12 11:42:29 -07:00
|
|
|
logTime("Search Exact")
|
2024-09-01 18:13:47 -07:00
|
|
|
if len(foundMatches) > 0 && exactOnly {
|
|
|
|
return foundMatches, nil
|
|
|
|
}
|
2024-09-07 14:51:18 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
totalPartialHashes := 0
|
|
|
|
for _, searchHash := range hashes {
|
|
|
|
foundHashes := make(map[uint64]struct{})
|
|
|
|
hashType := int(searchHash.Kind) - 1
|
|
|
|
for i, partialHash := range SplitHash(searchHash.Hash) {
|
|
|
|
partialHashes := m.partialHash[hashType][i][partialHash]
|
|
|
|
totalPartialHashes += len(partialHashes)
|
|
|
|
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
|
|
|
|
_, alreadyMatched := foundHashes[match.Hash]
|
2024-09-12 11:42:29 -07:00
|
|
|
if index, hashFound := m.findHash(hashType, match.Hash); hashFound && !alreadyMatched {
|
2024-09-07 14:51:18 -07:00
|
|
|
foundHashes[match.Hash] = struct{}{}
|
2024-09-12 11:42:29 -07:00
|
|
|
foundMatches = append(foundMatches, Result{IDs: ToIDList(*m.hashes[hashType][index].ids), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-09-07 14:51:18 -07:00
|
|
|
fmt.Println("Total partial hashes tested:", totalPartialHashes)
|
|
|
|
go m.printSizes()
|
2024-09-01 18:13:47 -07:00
|
|
|
return foundMatches, nil
|
|
|
|
}
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
func (m *MapStorage) MapHashes(hash ImageHash) {
|
|
|
|
m.basicMapStorage.MapHashes(hash)
|
2024-09-01 18:13:47 -07:00
|
|
|
for _, hash := range hash.Hashes {
|
2024-09-07 14:51:18 -07:00
|
|
|
hashType := int(hash.Kind) - 1
|
2024-09-01 18:13:47 -07:00
|
|
|
for i, partialHash := range SplitHash(hash.Hash) {
|
2024-09-07 14:51:18 -07:00
|
|
|
m.partialHash[hashType][i][partialHash] = Insert(m.partialHash[hashType][i][partialHash], hash.Hash)
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
|
|
|
for hashType, sourceHashes := range hashes.Hashes {
|
2024-09-12 11:42:29 -07:00
|
|
|
m.hashes[hashType] = make([]structHash, len(sourceHashes))
|
2024-09-07 14:51:18 -07:00
|
|
|
for savedHash, idlistLocation := range sourceHashes {
|
2024-09-12 11:42:29 -07:00
|
|
|
m.hashes[hashType] = append(m.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
}
|
2024-09-12 11:42:29 -07:00
|
|
|
for hashType := range m.hashes {
|
|
|
|
slices.SortFunc(m.hashes[hashType], func(a, b structHash) int {
|
|
|
|
return cmp.Compare(a.hash, b.hash)
|
|
|
|
})
|
|
|
|
}
|
2024-09-07 14:51:18 -07:00
|
|
|
m.printSizes()
|
|
|
|
for _, partialHashes := range m.partialHash {
|
|
|
|
for _, partMap := range partialHashes {
|
|
|
|
for part, hashes := range partMap {
|
|
|
|
slices.Sort(hashes)
|
|
|
|
partMap[part] = slices.Compact(hashes)
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-09-07 14:51:18 -07:00
|
|
|
m.printSizes()
|
|
|
|
return nil
|
2024-09-01 18:13:47 -07:00
|
|
|
}
|
|
|
|
|
2024-09-07 14:51:18 -07:00
|
|
|
func (m *MapStorage) printSizes() {
|
|
|
|
fmt.Println("Length of hashes:", len(m.hashes[0])+len(m.hashes[1])+len(m.hashes[2]))
|
|
|
|
// fmt.Println("Size of", "hashes:", size.Of(m.hashes)/1024/1024, "MB")
|
|
|
|
// fmt.Println("Size of", "ids:", size.Of(m.ids)/1024/1024, "MB")
|
|
|
|
// fmt.Println("Size of", "MapStorage:", size.Of(m)/1024/1024, "MB")
|
2024-09-01 18:13:47 -07:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewMapStorage() (HashStorage, error) {
|
2024-09-07 14:51:18 -07:00
|
|
|
storage := &MapStorage{
|
|
|
|
basicMapStorage: basicMapStorage{
|
2024-10-16 17:56:19 -07:00
|
|
|
hashMutex: &sync.RWMutex{},
|
2024-09-12 11:42:29 -07:00
|
|
|
hashes: [3][]structHash{
|
|
|
|
[]structHash{},
|
|
|
|
[]structHash{},
|
|
|
|
[]structHash{},
|
2024-09-07 14:51:18 -07:00
|
|
|
},
|
|
|
|
},
|
|
|
|
partialHash: [3][8]map[uint8][]uint64{
|
|
|
|
{
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
make(map[uint8][]uint64),
|
|
|
|
},
|
2024-09-01 18:13:47 -07:00
|
|
|
},
|
|
|
|
}
|
|
|
|
return storage, nil
|
|
|
|
}
|