Fix locking for map storage

This commit is contained in:
Timmy Welch 2024-10-16 17:56:19 -07:00
parent da54b3a454
commit 4922ceb678
2 changed files with 28 additions and 16 deletions

View File

@ -12,7 +12,7 @@ import (
) )
type basicMapStorage struct { type basicMapStorage struct {
hashMutex sync.RWMutex hashMutex *sync.RWMutex
ids map[ID]*[]ID ids map[ID]*[]ID
hashes [3][]structHash hashes [3][]structHash
@ -26,6 +26,8 @@ type structHash struct {
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result { func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
hashType := int(hashKind) - 1 hashType := int(hashKind) - 1
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
for _, storedHash := range b.hashes[hashType] { for _, storedHash := range b.hashes[hashType] {
distance := bits.OnesCount64(searchHash ^ storedHash.hash) distance := bits.OnesCount64(searchHash ^ storedHash.hash)
if distance <= maxDistance { if distance <= maxDistance {
@ -36,14 +38,13 @@ func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, se
} }
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var foundMatches []Result var foundMatches []Result
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
resetTime() resetTime()
defer logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly)) defer logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes { for _, hash := range hashes {
hashType := int(hash.Kind) - 1 hashType := int(hash.Kind) - 1
b.hashMutex.RLock()
index, hashFound := b.findHash(hashType, hash.Hash) index, hashFound := b.findHash(hashType, hash.Hash)
if hashFound { if hashFound {
foundMatches = append(foundMatches, Result{ foundMatches = append(foundMatches, Result{
@ -52,6 +53,7 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
IDs: ToIDList(*b.hashes[hashType][index].ids), IDs: ToIDList(*b.hashes[hashType][index].ids),
}) })
} }
b.hashMutex.RUnlock()
} }
logTime("Search Exact") logTime("Search Exact")
@ -75,21 +77,27 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
} }
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes)) fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
go b.printSizes()
return foundMatches, nil return foundMatches, nil
} }
// findHash must have a read lock before using
func (b *basicMapStorage) findHash(hashType int, hash uint64) (int, bool) { func (b *basicMapStorage) findHash(hashType int, hash uint64) (int, bool) {
return slices.BinarySearchFunc(b.hashes[hashType], hash, func(e structHash, t uint64) int { return slices.BinarySearchFunc(b.hashes[hashType], hash, func(e structHash, t uint64) int {
return cmp.Compare(e.hash, t) return cmp.Compare(e.hash, t)
}) })
} }
func (b *basicMapStorage) InsertHash(hashType int, hash uint64, ids *[]ID) {
// insertHash will take a write lock if the hash is not found
func (b *basicMapStorage) insertHash(hashType int, hash uint64, ids *[]ID) {
b.hashMutex.RLock()
index, hashFound := b.findHash(hashType, hash) index, hashFound := b.findHash(hashType, hash)
b.hashMutex.RUnlock()
if hashFound { if hashFound {
return return
} }
b.hashMutex.Lock()
b.hashes[hashType] = slices.Insert(b.hashes[hashType], index, structHash{hash, ids}) b.hashes[hashType] = slices.Insert(b.hashes[hashType], index, structHash{hash, ids})
b.hashMutex.Unlock()
} }
func (b *basicMapStorage) MapHashes(hash ImageHash) { func (b *basicMapStorage) MapHashes(hash ImageHash) {
@ -97,16 +105,21 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
var ( var (
hashType = int(ih.Kind) - 1 hashType = int(ih.Kind) - 1
) )
b.hashMutex.RLock()
ids, ok := b.ids[hash.ID] ids, ok := b.ids[hash.ID]
b.hashMutex.RUnlock()
if !ok { if !ok {
b.hashMutex.Lock()
ids = &[]ID{hash.ID} ids = &[]ID{hash.ID}
b.ids[hash.ID] = ids b.ids[hash.ID] = ids
b.hashMutex.Unlock()
} }
b.InsertHash(hashType, ih.Hash, ids) b.insertHash(hashType, ih.Hash, ids)
} }
} }
// DecodeHashes should already have a lock
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error { func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
for hashType, sourceHashes := range hashes.Hashes { for hashType, sourceHashes := range hashes.Hashes {
b.hashes[hashType] = make([]structHash, len(sourceHashes)) b.hashes[hashType] = make([]structHash, len(sourceHashes))
@ -122,17 +135,10 @@ func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
return cmp.Compare(a.hash, b.hash) return cmp.Compare(a.hash, b.hash)
}) })
} }
b.printSizes()
return nil return nil
} }
func (b *basicMapStorage) printSizes() { // EncodeHashes should already have a lock
// fmt.Println("Size of", "hashes:", size.Of(b.hashes)/1024/1024, "MB")
// fmt.Println("Size of", "ids:", size.Of(b.ids)/1024/1024, "MB")
// fmt.Println("Size of", "basicMapStorage:", size.Of(b)/1024/1024, "MB")
}
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) { func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
hashes := SavedHashes{ hashes := SavedHashes{
Hashes: [3]map[uint64]int{ Hashes: [3]map[uint64]int{
@ -161,17 +167,23 @@ func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error { func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
for _, newid := range newids { for _, newid := range newids {
b.hashMutex.RLock()
ids, found := b.ids[newid.OldID] ids, found := b.ids[newid.OldID]
b.hashMutex.RUnlock()
if !found { if !found {
msg := "No IDs belonging to " + string(newid.OldID.Domain) + " exist on this server" msg := "No IDs belonging to " + string(newid.OldID.Domain) + " exist on this server"
return errors.New(msg) return errors.New(msg)
} }
b.hashMutex.Lock()
*ids = InsertID(*ids, newid.NewID) *ids = InsertID(*ids, newid.NewID)
b.hashMutex.Unlock()
} }
return nil return nil
} }
func (b *basicMapStorage) GetIDs(id ID) IDList { func (b *basicMapStorage) GetIDs(id ID) IDList {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
ids, found := b.ids[id] ids, found := b.ids[id]
if !found { if !found {
return nil return nil
@ -181,7 +193,7 @@ func (b *basicMapStorage) GetIDs(id ID) IDList {
func NewBasicMapStorage() (HashStorage, error) { func NewBasicMapStorage() (HashStorage, error) {
storage := &basicMapStorage{ storage := &basicMapStorage{
hashMutex: sync.RWMutex{}, hashMutex: &sync.RWMutex{},
ids: make(map[ID]*[]ID), ids: make(map[ID]*[]ID),
hashes: [3][]structHash{}, hashes: [3][]structHash{},
} }

2
map.go
View File

@ -106,7 +106,7 @@ func (m *MapStorage) printSizes() {
func NewMapStorage() (HashStorage, error) { func NewMapStorage() (HashStorage, error) {
storage := &MapStorage{ storage := &MapStorage{
basicMapStorage: basicMapStorage{ basicMapStorage: basicMapStorage{
hashMutex: sync.RWMutex{}, hashMutex: &sync.RWMutex{},
hashes: [3][]structHash{ hashes: [3][]structHash{
[]structHash{}, []structHash{},
[]structHash{}, []structHash{},