comic-hasher/map.go

151 lines
4.2 KiB
Go
Raw Permalink Normal View History

2024-09-01 18:13:47 -07:00
package ch
import (
2024-09-12 11:42:29 -07:00
"cmp"
"fmt"
2024-09-01 18:13:47 -07:00
"slices"
"sync"
)
type MapStorage struct {
basicMapStorage
partialHash [3][8]map[uint8][]uint64
2024-09-01 18:13:47 -07:00
}
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
2024-09-01 18:13:47 -07:00
var foundMatches []Result
m.hashMutex.RLock()
defer m.hashMutex.RUnlock()
resetTime()
2024-09-12 11:42:29 -07:00
defer logTime("Search Complete")
2024-09-01 18:13:47 -07:00
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
hashType := int(hash.Kind) - 1
2024-09-12 11:42:29 -07:00
index, hashFound := m.findHash(hashType, hash.Hash)
if hashFound {
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: hash,
2024-09-12 11:42:29 -07:00
IDs: ToIDList(*m.hashes[hashType][index].ids),
})
2024-09-01 18:13:47 -07:00
}
}
// If we have exact matches don't bother with other matches
2024-09-12 11:42:29 -07:00
logTime("Search Exact")
2024-09-01 18:13:47 -07:00
if len(foundMatches) > 0 && exactOnly {
return foundMatches, nil
}
}
totalPartialHashes := 0
for _, searchHash := range hashes {
foundHashes := make(map[uint64]struct{})
hashType := int(searchHash.Kind) - 1
for i, partialHash := range SplitHash(searchHash.Hash) {
partialHashes := m.partialHash[hashType][i][partialHash]
totalPartialHashes += len(partialHashes)
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
_, alreadyMatched := foundHashes[match.Hash]
2024-09-12 11:42:29 -07:00
if index, hashFound := m.findHash(hashType, match.Hash); hashFound && !alreadyMatched {
foundHashes[match.Hash] = struct{}{}
2024-09-12 11:42:29 -07:00
foundMatches = append(foundMatches, Result{IDs: ToIDList(*m.hashes[hashType][index].ids), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
2024-09-01 18:13:47 -07:00
}
}
}
}
fmt.Println("Total partial hashes tested:", totalPartialHashes)
go m.printSizes()
2024-09-01 18:13:47 -07:00
return foundMatches, nil
}
func (m *MapStorage) MapHashes(hash ImageHash) {
m.basicMapStorage.MapHashes(hash)
2024-09-01 18:13:47 -07:00
for _, hash := range hash.Hashes {
hashType := int(hash.Kind) - 1
2024-09-01 18:13:47 -07:00
for i, partialHash := range SplitHash(hash.Hash) {
m.partialHash[hashType][i][partialHash] = Insert(m.partialHash[hashType][i][partialHash], hash.Hash)
2024-09-01 18:13:47 -07:00
}
}
}
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
for hashType, sourceHashes := range hashes.Hashes {
2024-09-12 11:42:29 -07:00
m.hashes[hashType] = make([]structHash, len(sourceHashes))
for savedHash, idlistLocation := range sourceHashes {
2024-09-12 11:42:29 -07:00
m.hashes[hashType] = append(m.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
2024-09-01 18:13:47 -07:00
}
}
2024-09-12 11:42:29 -07:00
for hashType := range m.hashes {
slices.SortFunc(m.hashes[hashType], func(a, b structHash) int {
return cmp.Compare(a.hash, b.hash)
})
}
m.printSizes()
for _, partialHashes := range m.partialHash {
for _, partMap := range partialHashes {
for part, hashes := range partMap {
slices.Sort(hashes)
partMap[part] = slices.Compact(hashes)
2024-09-01 18:13:47 -07:00
}
}
}
m.printSizes()
return nil
2024-09-01 18:13:47 -07:00
}
func (m *MapStorage) printSizes() {
fmt.Println("Length of hashes:", len(m.hashes[0])+len(m.hashes[1])+len(m.hashes[2]))
// fmt.Println("Size of", "hashes:", size.Of(m.hashes)/1024/1024, "MB")
// fmt.Println("Size of", "ids:", size.Of(m.ids)/1024/1024, "MB")
// fmt.Println("Size of", "MapStorage:", size.Of(m)/1024/1024, "MB")
2024-09-01 18:13:47 -07:00
}
func NewMapStorage() (HashStorage, error) {
storage := &MapStorage{
basicMapStorage: basicMapStorage{
hashMutex: sync.RWMutex{},
2024-09-12 11:42:29 -07:00
hashes: [3][]structHash{
[]structHash{},
[]structHash{},
[]structHash{},
},
},
partialHash: [3][8]map[uint8][]uint64{
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
2024-09-01 18:13:47 -07:00
},
}
return storage, nil
}