Optimize iterating hashes
This commit is contained in:
parent
0928ed6ccf
commit
095c78f0e7
65
BasicMap.go
65
BasicMap.go
@ -1,8 +1,10 @@
|
|||||||
package ch
|
package ch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/bits"
|
"math/bits"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"gitea.narnian.us/lordwelch/goimagehash"
|
"gitea.narnian.us/lordwelch/goimagehash"
|
||||||
@ -12,16 +14,21 @@ type basicMapStorage struct {
|
|||||||
hashMutex sync.RWMutex
|
hashMutex sync.RWMutex
|
||||||
|
|
||||||
ids map[ID]*[]ID
|
ids map[ID]*[]ID
|
||||||
hashes [3]map[uint64]*[]ID
|
hashes [3][]structHash
|
||||||
|
}
|
||||||
|
|
||||||
|
type structHash struct {
|
||||||
|
hash uint64
|
||||||
|
ids *[]ID
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
||||||
hashType := int(hashKind) - 1
|
hashType := int(hashKind) - 1
|
||||||
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
|
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
|
||||||
for storedHash, ids := range b.hashes[hashType] {
|
for _, storedHash := range b.hashes[hashType] {
|
||||||
distance := bits.OnesCount64(searchHash ^ storedHash)
|
distance := bits.OnesCount64(searchHash ^ storedHash.hash)
|
||||||
if distance <= maxDistance {
|
if distance <= maxDistance {
|
||||||
matchingHashes = append(matchingHashes, Result{ToIDList(*ids), distance, Hash{storedHash, hashKind}})
|
matchingHashes = append(matchingHashes, Result{ToIDList(*storedHash.ids), distance, Hash{storedHash.hash, hashKind}})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return matchingHashes
|
return matchingHashes
|
||||||
@ -31,25 +38,26 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
|
|||||||
b.hashMutex.RLock()
|
b.hashMutex.RLock()
|
||||||
defer b.hashMutex.RUnlock()
|
defer b.hashMutex.RUnlock()
|
||||||
resetTime()
|
resetTime()
|
||||||
|
defer logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
||||||
|
|
||||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||||
for _, hash := range hashes {
|
for _, hash := range hashes {
|
||||||
hashType := int(hash.Kind) - 1
|
hashType := int(hash.Kind) - 1
|
||||||
ids := b.hashes[hashType][hash.Hash]
|
index, hashFound := b.findHash(hashType, hash.Hash)
|
||||||
if ids != nil && len(*ids) > 0 {
|
if hashFound {
|
||||||
foundMatches = append(foundMatches, Result{
|
foundMatches = append(foundMatches, Result{
|
||||||
Distance: 0,
|
Distance: 0,
|
||||||
Hash: hash,
|
Hash: hash,
|
||||||
IDs: ToIDList(*ids),
|
IDs: ToIDList(*b.hashes[hashType][index].ids),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logTime("Search Exact")
|
||||||
// If we have exact matches don't bother with other matches
|
// If we have exact matches don't bother with other matches
|
||||||
if len(foundMatches) > 0 && exactOnly {
|
if len(foundMatches) > 0 && exactOnly {
|
||||||
return foundMatches, nil
|
return foundMatches, nil
|
||||||
}
|
}
|
||||||
logTime("Search Exact")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
foundHashes := make(map[uint64]struct{})
|
foundHashes := make(map[uint64]struct{})
|
||||||
@ -66,28 +74,49 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
|
|||||||
|
|
||||||
}
|
}
|
||||||
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
|
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
|
||||||
logTime("Search Complete")
|
|
||||||
go b.printSizes()
|
go b.printSizes()
|
||||||
return foundMatches, nil
|
return foundMatches, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *basicMapStorage) findHash(hashType int, hash uint64) (int, bool) {
|
||||||
|
return slices.BinarySearchFunc(b.hashes[hashType], hash, func(e structHash, t uint64) int {
|
||||||
|
return cmp.Compare(e.hash, t)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
func (b *basicMapStorage) InsertHash(hashType int, hash uint64, ids *[]ID) {
|
||||||
|
index, hashFound := b.findHash(hashType, hash)
|
||||||
|
if hashFound {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slices.Insert(b.hashes[hashType], index, structHash{hash, ids})
|
||||||
|
}
|
||||||
|
|
||||||
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||||
for _, ih := range hash.Hashes {
|
for _, ih := range hash.Hashes {
|
||||||
var (
|
var (
|
||||||
hashType = int(ih.Kind) - 1
|
hashType = int(ih.Kind) - 1
|
||||||
)
|
)
|
||||||
|
ids, ok := b.ids[hash.ID]
|
||||||
|
if !ok {
|
||||||
|
ids = &[]ID{hash.ID}
|
||||||
|
}
|
||||||
|
|
||||||
*b.hashes[hashType][ih.Hash] = InsertID((*b.hashes[hashType][ih.Hash]), hash.ID)
|
b.InsertHash(hashType, ih.Hash, ids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||||
for hashType, sourceHashes := range hashes.Hashes {
|
for hashType, sourceHashes := range hashes.Hashes {
|
||||||
b.hashes[hashType] = make(map[uint64]*[]ID, len(sourceHashes))
|
b.hashes[hashType] = make([]structHash, len(sourceHashes))
|
||||||
for savedHash, idlistLocation := range sourceHashes {
|
for savedHash, idlistLocation := range sourceHashes {
|
||||||
b.hashes[hashType][savedHash] = &hashes.IDs[idlistLocation]
|
b.hashes[hashType] = append(b.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for hashType := range b.hashes {
|
||||||
|
slices.SortFunc(b.hashes[hashType], func(a, b structHash) int {
|
||||||
|
return cmp.Compare(a.hash, b.hash)
|
||||||
|
})
|
||||||
|
}
|
||||||
b.printSizes()
|
b.printSizes()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -110,8 +139,8 @@ func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
|||||||
idmap[ids] = len(hashes.IDs)
|
idmap[ids] = len(hashes.IDs)
|
||||||
}
|
}
|
||||||
for hashType, hashToID := range b.hashes {
|
for hashType, hashToID := range b.hashes {
|
||||||
for hash, ids := range hashToID {
|
for _, hash := range hashToID {
|
||||||
hashes.Hashes[hashType][hash] = idmap[ids]
|
hashes.Hashes[hashType][hash.hash] = idmap[hash.ids]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return hashes, nil
|
return hashes, nil
|
||||||
@ -141,10 +170,10 @@ func NewBasicMapStorage() (HashStorage, error) {
|
|||||||
storage := &basicMapStorage{
|
storage := &basicMapStorage{
|
||||||
hashMutex: sync.RWMutex{},
|
hashMutex: sync.RWMutex{},
|
||||||
|
|
||||||
hashes: [3]map[uint64]*[]ID{
|
hashes: [3][]structHash{
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
return storage, nil
|
return storage, nil
|
||||||
|
35
map.go
35
map.go
@ -1,6 +1,7 @@
|
|||||||
package ch
|
package ch
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"fmt"
|
"fmt"
|
||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
@ -16,25 +17,26 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
|||||||
m.hashMutex.RLock()
|
m.hashMutex.RLock()
|
||||||
defer m.hashMutex.RUnlock()
|
defer m.hashMutex.RUnlock()
|
||||||
resetTime()
|
resetTime()
|
||||||
|
defer logTime("Search Complete")
|
||||||
|
|
||||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||||
for _, hash := range hashes {
|
for _, hash := range hashes {
|
||||||
hashType := int(hash.Kind) - 1
|
hashType := int(hash.Kind) - 1
|
||||||
idlist := m.hashes[hashType][hash.Hash]
|
index, hashFound := m.findHash(hashType, hash.Hash)
|
||||||
if idlist != nil && len(*idlist) > 0 {
|
if hashFound {
|
||||||
foundMatches = append(foundMatches, Result{
|
foundMatches = append(foundMatches, Result{
|
||||||
Distance: 0,
|
Distance: 0,
|
||||||
Hash: hash,
|
Hash: hash,
|
||||||
IDs: ToIDList(*idlist),
|
IDs: ToIDList(*m.hashes[hashType][index].ids),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have exact matches don't bother with other matches
|
// If we have exact matches don't bother with other matches
|
||||||
|
logTime("Search Exact")
|
||||||
if len(foundMatches) > 0 && exactOnly {
|
if len(foundMatches) > 0 && exactOnly {
|
||||||
return foundMatches, nil
|
return foundMatches, nil
|
||||||
}
|
}
|
||||||
logTime("Search Exact")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
totalPartialHashes := 0
|
totalPartialHashes := 0
|
||||||
@ -46,15 +48,14 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
|||||||
totalPartialHashes += len(partialHashes)
|
totalPartialHashes += len(partialHashes)
|
||||||
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
|
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
|
||||||
_, alreadyMatched := foundHashes[match.Hash]
|
_, alreadyMatched := foundHashes[match.Hash]
|
||||||
if matchedResults, ok := m.hashes[hashType][match.Hash]; ok && !alreadyMatched {
|
if index, hashFound := m.findHash(hashType, match.Hash); hashFound && !alreadyMatched {
|
||||||
foundHashes[match.Hash] = struct{}{}
|
foundHashes[match.Hash] = struct{}{}
|
||||||
foundMatches = append(foundMatches, Result{IDs: ToIDList(*matchedResults), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
|
foundMatches = append(foundMatches, Result{IDs: ToIDList(*m.hashes[hashType][index].ids), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fmt.Println("Total partial hashes tested:", totalPartialHashes)
|
fmt.Println("Total partial hashes tested:", totalPartialHashes)
|
||||||
logTime("Search Complete")
|
|
||||||
go m.printSizes()
|
go m.printSizes()
|
||||||
return foundMatches, nil
|
return foundMatches, nil
|
||||||
}
|
}
|
||||||
@ -71,14 +72,16 @@ func (m *MapStorage) MapHashes(hash ImageHash) {
|
|||||||
|
|
||||||
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||||
for hashType, sourceHashes := range hashes.Hashes {
|
for hashType, sourceHashes := range hashes.Hashes {
|
||||||
m.hashes[hashType] = make(map[uint64]*[]ID, len(sourceHashes))
|
m.hashes[hashType] = make([]structHash, len(sourceHashes))
|
||||||
for savedHash, idlistLocation := range sourceHashes {
|
for savedHash, idlistLocation := range sourceHashes {
|
||||||
for i, partialHash := range SplitHash(savedHash) {
|
m.hashes[hashType] = append(m.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
|
||||||
m.partialHash[hashType][i][partialHash] = append(m.partialHash[hashType][i][partialHash], savedHash)
|
|
||||||
}
|
|
||||||
m.hashes[hashType][savedHash] = &hashes.IDs[idlistLocation]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for hashType := range m.hashes {
|
||||||
|
slices.SortFunc(m.hashes[hashType], func(a, b structHash) int {
|
||||||
|
return cmp.Compare(a.hash, b.hash)
|
||||||
|
})
|
||||||
|
}
|
||||||
m.printSizes()
|
m.printSizes()
|
||||||
for _, partialHashes := range m.partialHash {
|
for _, partialHashes := range m.partialHash {
|
||||||
for _, partMap := range partialHashes {
|
for _, partMap := range partialHashes {
|
||||||
@ -104,10 +107,10 @@ func NewMapStorage() (HashStorage, error) {
|
|||||||
storage := &MapStorage{
|
storage := &MapStorage{
|
||||||
basicMapStorage: basicMapStorage{
|
basicMapStorage: basicMapStorage{
|
||||||
hashMutex: sync.RWMutex{},
|
hashMutex: sync.RWMutex{},
|
||||||
hashes: [3]map[uint64]*[]ID{
|
hashes: [3][]structHash{
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
make(map[uint64]*[]ID),
|
[]structHash{},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
partialHash: [3][8]map[uint8][]uint64{
|
partialHash: [3][8]map[uint8][]uint64{
|
||||||
|
Loading…
Reference in New Issue
Block a user