Compare commits

..

1 Commits

Author SHA1 Message Date
22d59aa221 Move HashStorage to its own package 2025-05-31 19:00:40 -07:00
10 changed files with 266 additions and 245 deletions

View File

@ -35,6 +35,7 @@ import (
ch "gitea.narnian.us/lordwelch/comic-hasher" ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/comic-hasher/cv" "gitea.narnian.us/lordwelch/comic-hasher/cv"
"gitea.narnian.us/lordwelch/comic-hasher/storage"
) )
var bufPool = &sync.Pool{ var bufPool = &sync.Pool{
@ -215,15 +216,15 @@ func signalHandler(s *Server) {
func initializeStorage(opts Opts) (ch.HashStorage, error) { func initializeStorage(opts Opts) (ch.HashStorage, error) {
switch opts.storageType { switch opts.storageType {
case Map: case Map:
return ch.NewMapStorage() return storage.NewMapStorage()
case BasicMap: case BasicMap:
return ch.NewBasicMapStorage() return storage.NewBasicMapStorage()
case Sqlite: case Sqlite:
return ch.NewSqliteStorage("sqlite", opts.sqlitePath) return storage.NewSqliteStorage("sqlite", opts.sqlitePath)
case Sqlite3: case Sqlite3:
return ch.NewSqliteStorage("sqlite3", opts.sqlitePath) return storage.NewSqliteStorage("sqlite3", opts.sqlitePath)
case VPTree: case VPTree:
return ch.NewVPStorage() return storage.NewVPStorage()
} }
return nil, errors.New("Unknown storage type provided") return nil, errors.New("Unknown storage type provided")
} }

View File

@ -83,6 +83,19 @@ func (f *Format) Set(s string) error {
return nil return nil
} }
func (h *SavedHash) Clone() SavedHash {
return SavedHash{
Hash: Hash{
Hash: h.Hash.Hash,
Kind: h.Hash.Kind,
},
ID: ID{
Domain: NewSource(*h.ID.Domain),
ID: strings.Clone(h.ID.ID),
},
}
}
func (s *SavedHashes) InsertHash(hash SavedHash) { func (s *SavedHashes) InsertHash(hash SavedHash) {
index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int { index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int {
return cmp.Or( return cmp.Or(

View File

@ -1,4 +1,4 @@
package ch package storage
import ( import (
"cmp" "cmp"
@ -6,49 +6,34 @@ import (
"fmt" "fmt"
"math/bits" "math/bits"
"slices" "slices"
"strings"
"sync" "sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash" "gitea.narnian.us/lordwelch/goimagehash"
) )
type bmHash struct {
Hash Hash
ID ID
}
func NewbmHash(data SavedHash) bmHash {
return bmHash{
Hash: Hash{
Hash: data.Hash.Hash,
Kind: data.Hash.Kind,
},
ID: ID{
Domain: data.ID.Domain,
ID: strings.Clone(data.ID.ID),
},
}
}
type basicMapStorage struct { type basicMapStorage struct {
hashMutex *sync.RWMutex hashMutex *sync.RWMutex
ids IDMap ids IDMap
aHashes []bmHash aHashes []ch.SavedHash
dHashes []bmHash dHashes []ch.SavedHash
pHashes []bmHash pHashes []ch.SavedHash
} }
type IDs struct { type IDs struct {
id *ID id *ch.ID
idList *[]*ID idList *[]*ch.ID
} }
type IDMap struct { type IDMap struct {
ids []IDs ids []IDs
} }
func (m *IDMap) InsertID(id *ID) *ID { func (m *IDMap) InsertID(id *ch.ID) *ch.ID {
return m.insertID(id, &[]*ID{id}) return m.insertID(id, &[]*ch.ID{id})
} }
func (m *IDMap) insertID(id *ID, idList *[]*ID) *ID { func (m *IDMap) insertID(id *ch.ID, idList *[]*ch.ID) *ch.ID {
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int { index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target) return id.id.Compare(*target)
}) })
if !found { if !found {
@ -66,40 +51,40 @@ func (m *IDMap) sort() {
}) })
} }
func (m *IDMap) FindID(id *ID) (int, bool) { func (m *IDMap) FindID(id *ch.ID) (int, bool) {
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int { return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target) return id.id.Compare(*target)
}) })
} }
func (m *IDMap) GetIDs(id *ID) []ID { func (m *IDMap) GetIDs(id *ch.ID) []ch.ID {
index, found := m.FindID(id) index, found := m.FindID(id)
if !found { if !found {
return nil return nil
} }
ids := make([]ID, 0, len(*m.ids[index].idList)) ids := make([]ch.ID, 0, len(*m.ids[index].idList))
for _, id := range *m.ids[index].idList { for _, id := range *m.ids[index].idList {
ids = append(ids, *id) ids = append(ids, *id)
} }
return ids return ids
} }
func (m *IDMap) AssociateIDs(newids []NewIDs) error { func (m *IDMap) AssociateIDs(newids []ch.NewIDs) error {
for _, newid := range newids { for _, newid := range newids {
index, found := m.FindID(&newid.OldID) index, found := m.FindID(&newid.OldID)
if !found { if !found {
return ErrIDNotFound return ErrIDNotFound
} }
*(m.ids[index].idList) = InsertIDp(*(m.ids[index].idList), &newid.NewID) *(m.ids[index].idList) = ch.InsertIDp(*(m.ids[index].idList), &newid.NewID)
m.insertID(&newid.NewID, m.ids[index].idList) m.insertID(&newid.NewID, m.ids[index].idList)
} }
return nil return nil
} }
// func (m *IDMap) NewID(domain Source, id string) *ID { // func (m *IDMap) NewID(domain Source, id string) *ch.ID {
// newID := ID{domain, id} // newID := ch.ID{domain, id}
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ID, target ID) int { // index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ch.ID, target ch.ID) int {
// return id.Compare(*target) // return id.Compare(*target)
// }) // })
// if !found { // if !found {
@ -111,11 +96,11 @@ func (m *IDMap) AssociateIDs(newids []NewIDs) error {
var ErrIDNotFound = errors.New("ID not found on this server") var ErrIDNotFound = errors.New("ID not found on this server")
// atleast must have a read lock before using // atleast must have a read lock before using
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result { func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []ch.Result {
matchingHashes := make([]Result, 0, 20) // hope that we don't need more matchingHashes := make([]ch.Result, 0, 20) // hope that we don't need more
mappedIds := map[int]bool{} mappedIds := map[int]bool{}
storedHash := bmHash{} // reduces allocations and ensures queries are <1s storedHash := ch.SavedHash{} // reduces allocations and ensures queries are <1s
for _, storedHash = range *b.getCurrentHashes(kind) { for _, storedHash = range *b.getCurrentHashes(kind) {
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash) distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
if distance <= maxDistance { if distance <= maxDistance {
@ -124,7 +109,7 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
continue continue
} }
mappedIds[index] = true mappedIds[index] = true
matchingHashes = append(matchingHashes, Result{ matchingHashes = append(matchingHashes, ch.Result{
Hash: storedHash.Hash, Hash: storedHash.Hash,
ID: storedHash.ID, ID: storedHash.ID,
Distance: distance, Distance: distance,
@ -135,8 +120,8 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
return matchingHashes return matchingHashes
} }
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result { func (b *basicMapStorage) exactMatches(hashes []ch.Hash, max int) []ch.Result {
var foundMatches []Result var foundMatches []ch.Result
for _, hash := range hashes { for _, hash := range hashes {
mappedIds := map[int]bool{} mappedIds := map[int]bool{}
@ -149,7 +134,7 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
} }
mappedIds[index] = true mappedIds[index] = true
foundMatches = append(foundMatches, Result{ foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash, Hash: storedHash.Hash,
ID: storedHash.ID, ID: storedHash.ID,
Distance: 0, Distance: 0,
@ -162,20 +147,20 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
return foundMatches return foundMatches
} }
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (b *basicMapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var ( var (
foundMatches []Result foundMatches []ch.Result
tl timeLog tl ch.TimeLog
) )
tl.resetTime() tl.ResetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly)) defer tl.LogTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock() b.hashMutex.RLock()
defer b.hashMutex.RUnlock() defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = b.exactMatches(hashes, max) foundMatches = b.exactMatches(hashes, max)
tl.logTime("Search Exact") tl.LogTime("Search Exact")
if len(foundMatches) > 0 { if len(foundMatches) > 0 {
return foundMatches, nil return foundMatches, nil
} }
@ -193,7 +178,7 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
} }
// getCurrentHashes must have a read lock before using // getCurrentHashes must have a read lock before using
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash { func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]ch.SavedHash {
if kind == goimagehash.AHash { if kind == goimagehash.AHash {
return &b.aHashes return &b.aHashes
} }
@ -209,9 +194,9 @@ func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash {
// findHash must have a read lock before using // findHash must have a read lock before using
// return value is index, count // return value is index, count
// if count < 1 then no results were found // if count < 1 then no results were found
func (b *basicMapStorage) findHash(hash Hash) (int, int) { func (b *basicMapStorage) findHash(hash ch.Hash) (int, int) {
currentHashes := *b.getCurrentHashes(hash.Kind) currentHashes := *b.getCurrentHashes(hash.Kind)
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing bmHash, target Hash) int { index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing ch.SavedHash, target ch.Hash) int {
return cmp.Compare(existing.Hash.Hash, target.Hash) return cmp.Compare(existing.Hash.Hash, target.Hash)
}) })
if !found { if !found {
@ -225,7 +210,7 @@ func (b *basicMapStorage) findHash(hash Hash) (int, int) {
} }
// insertHash must already have a lock // insertHash must already have a lock
func (b *basicMapStorage) insertHash(hash Hash, id ID) { func (b *basicMapStorage) insertHash(hash ch.Hash, id ch.ID) {
currentHashes := b.getCurrentHashes(hash.Kind) currentHashes := b.getCurrentHashes(hash.Kind)
index, count := b.findHash(hash) index, count := b.findHash(hash)
max := index + count max := index + count
@ -235,12 +220,15 @@ func (b *basicMapStorage) insertHash(hash Hash, id ID) {
} }
} }
sh := bmHash{hash, id} sh := ch.SavedHash{
Hash: hash,
ID: id,
}
*currentHashes = slices.Insert(*currentHashes, index, sh) *currentHashes = slices.Insert(*currentHashes, index, sh)
b.ids.InsertID(&sh.ID) b.ids.InsertID(&sh.ID)
} }
func (b *basicMapStorage) MapHashes(hash ImageHash) { func (b *basicMapStorage) MapHashes(hash ch.ImageHash) {
b.hashMutex.Lock() b.hashMutex.Lock()
defer b.hashMutex.Unlock() defer b.hashMutex.Unlock()
for _, ih := range hash.Hashes { for _, ih := range hash.Hashes {
@ -249,7 +237,7 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
} }
// DecodeHashes must already have a lock // DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error { func (b *basicMapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil { if hashes == nil {
return nil return nil
} }
@ -257,7 +245,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
// Initialize all the known equal IDs // Initialize all the known equal IDs
for _, ids := range hashes.IDs { for _, ids := range hashes.IDs {
new_ids := make([]*ID, 0, len(ids)) new_ids := make([]*ch.ID, 0, len(ids))
for _, id := range ids { for _, id := range ids {
new_ids = append(new_ids, &id) new_ids = append(new_ids, &id)
} }
@ -270,7 +258,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
} }
b.ids.sort() b.ids.sort()
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int { slices.SortFunc(hashes.Hashes, func(existing, target ch.SavedHash) int {
return cmp.Or( return cmp.Or(
cmp.Compare(*existing.ID.Domain, *target.ID.Domain), // Sorted for id insertion efficiency cmp.Compare(*existing.ID.Domain, *target.ID.Domain), // Sorted for id insertion efficiency
cmp.Compare(existing.ID.ID, target.ID.ID), // Sorted for id insertion efficiency cmp.Compare(existing.ID.ID, target.ID.ID), // Sorted for id insertion efficiency
@ -295,31 +283,31 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
} }
// Assume they are probably fairly equally split between hash types // Assume they are probably fairly equally split between hash types
b.aHashes = make([]bmHash, 0, aHashCount) b.aHashes = make([]ch.SavedHash, 0, aHashCount)
b.dHashes = make([]bmHash, 0, dHashCount) b.dHashes = make([]ch.SavedHash, 0, dHashCount)
b.pHashes = make([]bmHash, 0, pHashCount) b.pHashes = make([]ch.SavedHash, 0, pHashCount)
for i := range hashes.Hashes { for i := range hashes.Hashes {
bmhash := NewbmHash(hashes.Hashes[i]) hash := hashes.Hashes[i].Clone() // Not cloning this will keep strings/slices loaded from json wasting memory
if hashes.Hashes[i].Hash.Kind == goimagehash.AHash { if hashes.Hashes[i].Hash.Kind == goimagehash.AHash {
b.aHashes = append(b.aHashes, bmhash) b.aHashes = append(b.aHashes, hash)
} }
if hashes.Hashes[i].Hash.Kind == goimagehash.DHash { if hashes.Hashes[i].Hash.Kind == goimagehash.DHash {
b.dHashes = append(b.dHashes, bmhash) b.dHashes = append(b.dHashes, hash)
} }
if hashes.Hashes[i].Hash.Kind == goimagehash.PHash { if hashes.Hashes[i].Hash.Kind == goimagehash.PHash {
b.pHashes = append(b.pHashes, bmhash) b.pHashes = append(b.pHashes, hash)
} }
if hashes.Hashes[i].ID == (ID{}) { if hashes.Hashes[i].ID == (ch.ID{}) {
fmt.Println("Empty ID detected") fmt.Println("Empty ID detected")
panic(hashes.Hashes[i]) panic(hashes.Hashes[i])
} }
// TODO: Make loading this more efficient // TODO: Make loading this more efficient
// All known equal IDs are already mapped we can add any missing ones from hashes // All known equal IDs are already mapped we can add any missing ones from hashes
b.ids.InsertID(&bmhash.ID) b.ids.InsertID(&hash.ID)
} }
hashCmp := func(existing, target bmHash) int { hashCmp := func(existing, target ch.SavedHash) int {
return cmp.Or( return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash), cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(*existing.ID.Domain, *target.ID.Domain), cmp.Compare(*existing.ID.Domain, *target.ID.Domain),
@ -334,9 +322,9 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
} }
// EncodeHashes should already have a lock // EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) { func (b *basicMapStorage) EncodeHashes() (*ch.SavedHashes, error) {
savedHashes := SavedHashes{ savedHashes := ch.SavedHashes{
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)), Hashes: make([]ch.SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
} }
// savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...) // savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
// savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...) // savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
@ -357,28 +345,28 @@ func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
return &savedHashes, nil return &savedHashes, nil
} }
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error { func (b *basicMapStorage) AssociateIDs(newids []ch.NewIDs) error {
b.hashMutex.RLock() b.hashMutex.RLock()
defer b.hashMutex.RUnlock() defer b.hashMutex.RUnlock()
return b.ids.AssociateIDs(newids) return b.ids.AssociateIDs(newids)
} }
func (b *basicMapStorage) GetIDs(id ID) IDList { func (b *basicMapStorage) GetIDs(id ch.ID) ch.IDList {
b.hashMutex.RLock() b.hashMutex.RLock()
defer b.hashMutex.RUnlock() defer b.hashMutex.RUnlock()
ids := b.ids.GetIDs(&id) ids := b.ids.GetIDs(&id)
return ToIDList(ids) return ch.ToIDList(ids)
} }
func NewBasicMapStorage() (HashStorage, error) { func NewBasicMapStorage() (ch.HashStorage, error) {
storage := &basicMapStorage{ storage := &basicMapStorage{
hashMutex: &sync.RWMutex{}, hashMutex: &sync.RWMutex{},
ids: IDMap{ ids: IDMap{
ids: []IDs{}, ids: []IDs{},
}, },
aHashes: []bmHash{}, aHashes: []ch.SavedHash{},
dHashes: []bmHash{}, dHashes: []ch.SavedHash{},
pHashes: []bmHash{}, pHashes: []ch.SavedHash{},
} }
return storage, nil return storage, nil
} }

View File

@ -1,10 +1,11 @@
package ch package storage
import ( import (
"fmt" "fmt"
"slices" "slices"
"sync" "sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash" "gitea.narnian.us/lordwelch/goimagehash"
) )
@ -15,10 +16,10 @@ type MapStorage struct {
partialPHash [8]map[uint8][]uint64 partialPHash [8]map[uint8][]uint64
} }
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (m *MapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var ( var (
foundMatches []Result foundMatches []ch.Result
tl timeLog tl ch.TimeLog
) )
m.hashMutex.RLock() m.hashMutex.RLock()
defer m.hashMutex.RUnlock() defer m.hashMutex.RUnlock()
@ -26,13 +27,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = m.exactMatches(hashes, max) foundMatches = m.exactMatches(hashes, max)
tl.logTime("Search Exact") tl.LogTime("Search Exact")
if len(foundMatches) > 0 { if len(foundMatches) > 0 {
return foundMatches, nil return foundMatches, nil
} }
} }
tl.resetTime() tl.ResetTime()
defer tl.logTime("Search Complete") defer tl.LogTime("Search Complete")
totalPartialHashes := 0 totalPartialHashes := 0
@ -40,15 +41,18 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind) currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
potentialMatches := []uint64{} potentialMatches := []uint64{}
for i, partialHash := range SplitHash(searchHash.Hash) { for i, partialHash := range ch.SplitHash(searchHash.Hash) {
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...) potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
} }
totalPartialHashes += len(potentialMatches) totalPartialHashes += len(potentialMatches)
mappedIds := map[int]bool{} mappedIds := map[int]bool{}
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) { for _, match := range ch.Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := Hash{match.Hash, searchHash.Kind} matchedHash := ch.Hash{
Hash: match.Hash,
Kind: searchHash.Kind,
}
index, count := m.findHash(matchedHash) index, count := m.findHash(matchedHash)
if count < 1 { if count < 1 {
continue continue
@ -60,7 +64,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
} }
mappedIds[idIndex] = true mappedIds[idIndex] = true
foundMatches = append(foundMatches, Result{ foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash, Hash: storedHash.Hash,
ID: storedHash.ID, ID: storedHash.ID,
Distance: 0, Distance: 0,
@ -75,7 +79,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
} }
// getCurrentHashes must have a read lock before using // getCurrentHashes must have a read lock before using
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[uint8][]uint64) { func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]ch.SavedHash, [8]map[uint8][]uint64) {
if kind == goimagehash.AHash { if kind == goimagehash.AHash {
return m.aHashes, m.partialAHash return m.aHashes, m.partialAHash
} }
@ -88,17 +92,17 @@ func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[u
panic("Unknown hash type: " + kind.String()) panic("Unknown hash type: " + kind.String())
} }
func (m *MapStorage) MapHashes(hash ImageHash) { func (m *MapStorage) MapHashes(hash ch.ImageHash) {
m.basicMapStorage.MapHashes(hash) m.basicMapStorage.MapHashes(hash)
for _, hash := range hash.Hashes { for _, hash := range hash.Hashes {
_, partialHashes := m.getCurrentHashes(hash.Kind) _, partialHashes := m.getCurrentHashes(hash.Kind)
for i, partialHash := range SplitHash(hash.Hash) { for i, partialHash := range ch.SplitHash(hash.Hash) {
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash) partialHashes[i][partialHash] = ch.Insert(partialHashes[i][partialHash], hash.Hash)
} }
} }
} }
func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error { func (m *MapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil { if hashes == nil {
return nil return nil
} }
@ -117,7 +121,7 @@ func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
return nil return nil
} }
func NewMapStorage() (HashStorage, error) { func NewMapStorage() (ch.HashStorage, error) {
storage := &MapStorage{ storage := &MapStorage{
basicMapStorage: basicMapStorage{ basicMapStorage: basicMapStorage{
@ -125,9 +129,9 @@ func NewMapStorage() (HashStorage, error) {
ids: IDMap{ ids: IDMap{
ids: []IDs{}, ids: []IDs{},
}, },
aHashes: []bmHash{}, aHashes: []ch.SavedHash{},
dHashes: []bmHash{}, dHashes: []ch.SavedHash{},
pHashes: []bmHash{}, pHashes: []ch.SavedHash{},
}, },
partialAHash: newPartialHash(), partialAHash: newPartialHash(),
partialDHash: newPartialHash(), partialDHash: newPartialHash(),
@ -149,9 +153,9 @@ func newPartialHash() [8]map[uint8][]uint64 {
} }
} }
func mapPartialHashes(hashes []bmHash, partialHashMap [8]map[uint8][]uint64) { func mapPartialHashes(hashes []ch.SavedHash, partialHashMap [8]map[uint8][]uint64) {
for _, savedHash := range hashes { for _, savedHash := range hashes {
for i, partialHash := range SplitHash(savedHash.Hash.Hash) { for i, partialHash := range ch.SplitHash(savedHash.Hash.Hash) {
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash) partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
} }
} }

View File

@ -1,4 +1,4 @@
package ch package storage
import ( import (
"context" "context"
@ -8,6 +8,7 @@ import (
"log" "log"
"math/bits" "math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
_ "modernc.org/sqlite" _ "modernc.org/sqlite"
) )
@ -26,19 +27,19 @@ type sqliteStorage struct {
idExists *sql.Stmt idExists *sql.Stmt
} }
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID][]ID, error) { func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash ch.Hash) (map[ch.ID][]ch.ID, error) {
if statement == nil { if statement == nil {
statement = s.hashExactMatchStatement statement = s.hashExactMatchStatement
} }
hashes := map[ID][]ID{} hashes := map[ch.ID][]ch.ID{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash)) rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil { if err != nil {
return hashes, err return hashes, err
} }
for rows.Next() { for rows.Next() {
var ( var (
id ID id ch.ID
foundID ID foundID ch.ID
) )
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID) err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
if err != nil { if err != nil {
@ -51,24 +52,24 @@ func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID]
return hashes, nil return hashes, nil
} }
func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max int, hash Hash) ([]Result, error) { func (s *sqliteStorage) findPartialHashes(tl ch.TimeLog, statement *sql.Stmt, max int, hash ch.Hash) ([]ch.Result, error) {
if statement == nil { if statement == nil {
statement = s.hashPartialMatchStatement statement = s.hashPartialMatchStatement
} }
hashResults := []Result{} hashResults := []ch.Result{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash)) rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil { if err != nil {
return hashResults, err return hashResults, err
} }
results := map[SavedHash][]ID{} results := map[ch.SavedHash][]ch.ID{}
for rows.Next() { for rows.Next() {
var ( var (
tmpHash int64 tmpHash int64
sqlHash = SavedHash{ sqlHash = ch.SavedHash{
Hash: Hash{Kind: hash.Kind}, Hash: ch.Hash{Kind: hash.Kind},
} }
id ID id ch.ID
) )
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID) err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
if err != nil { if err != nil {
@ -79,7 +80,7 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
results[sqlHash] = append(results[sqlHash], id) results[sqlHash] = append(results[sqlHash], id)
} }
for sqlHash, ids := range results { for sqlHash, ids := range results {
res := Result{ res := ch.Result{
Hash: sqlHash.Hash, Hash: sqlHash.Hash,
ID: sqlHash.ID, ID: sqlHash.ID,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash), Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
@ -94,18 +95,18 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
func (s *sqliteStorage) dropIndexes() error { func (s *sqliteStorage) dropIndexes() error {
_, err := s.db.Exec(` _, err := s.db.Exec(`
DROP INDEX IF EXISTS hash_index; DROP INDEX IF EXISTS hash_index;
DROP INDEX IF EXISTS hash_1_index; DROP INDEX IF EXISTS hash_1_index;
DROP INDEX IF EXISTS hash_2_index; DROP INDEX IF EXISTS hash_2_index;
DROP INDEX IF EXISTS hash_3_index; DROP INDEX IF EXISTS hash_3_index;
DROP INDEX IF EXISTS hash_4_index; DROP INDEX IF EXISTS hash_4_index;
DROP INDEX IF EXISTS hash_5_index; DROP INDEX IF EXISTS hash_5_index;
DROP INDEX IF EXISTS hash_6_index; DROP INDEX IF EXISTS hash_6_index;
DROP INDEX IF EXISTS hash_7_index; DROP INDEX IF EXISTS hash_7_index;
DROP INDEX IF EXISTS hash_8_index; DROP INDEX IF EXISTS hash_8_index;
DROP INDEX IF EXISTS id_domain; DROP INDEX IF EXISTS id_domain;
`) `)
if err != nil { if err != nil {
return err return err
} }
@ -114,32 +115,32 @@ func (s *sqliteStorage) dropIndexes() error {
func (s *sqliteStorage) createIndexes() error { func (s *sqliteStorage) createIndexes() error {
_, err := s.db.Exec(` _, err := s.db.Exec(`
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash); CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF)); CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid); CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid);
PRAGMA shrink_memory; PRAGMA shrink_memory;
ANALYZE; ANALYZE;
`) `)
if err != nil { if err != nil {
return err return err
} }
return nil return nil
} }
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (s *sqliteStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var ( var (
foundMatches []Result foundMatches []ch.Result
tl timeLog tl ch.TimeLog
) )
tl.resetTime() tl.ResetTime()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes { for _, hash := range hashes {
@ -148,7 +149,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
return foundMatches, err return foundMatches, err
} }
for id, equivalentIDs := range idlist { for id, equivalentIDs := range idlist {
foundMatches = append(foundMatches, Result{ foundMatches = append(foundMatches, ch.Result{
Hash: hash, Hash: hash,
ID: id, ID: id,
Distance: 0, Distance: 0,
@ -157,7 +158,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
} }
} }
tl.logTime("Search Exact") tl.LogTime("Search Exact")
if len(foundMatches) > 0 { if len(foundMatches) > 0 {
return foundMatches, nil return foundMatches, nil
} }
@ -170,7 +171,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if err != nil { if err != nil {
return foundMatches, err return foundMatches, err
} }
tl.logTime(fmt.Sprintf("Search partial %v", hash.Kind)) tl.LogTime(fmt.Sprintf("Search partial %v", hash.Kind))
for _, hash := range results { for _, hash := range results {
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched { if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
@ -185,7 +186,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
return foundMatches, nil return foundMatches, nil
} }
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) { func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ch.ImageHash) {
var err error var err error
insertHash := tx.Stmt(s.insertHash) insertHash := tx.Stmt(s.insertHash)
insertID := tx.Stmt(s.insertID) insertID := tx.Stmt(s.insertID)
@ -234,7 +235,7 @@ func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
} }
} }
} }
func (s *sqliteStorage) MapHashes(hash ImageHash) { func (s *sqliteStorage) MapHashes(hash ch.ImageHash) {
tx, err := s.db.BeginTx(context.Background(), nil) tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil { if err != nil {
panic(err) panic(err)
@ -246,7 +247,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
} }
} }
func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error { func (s *sqliteStorage) DecodeHashes(hashes *ch.SavedHashes) error {
return nil return nil
err := s.dropIndexes() err := s.dropIndexes()
if err != nil { if err != nil {
@ -285,8 +286,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
} }
for _, savedHash := range hashes.Hashes { for _, savedHash := range hashes.Hashes {
s.mapHashes(tx, ImageHash{ s.mapHashes(tx, ch.ImageHash{
Hashes: []Hash{savedHash.Hash}, Hashes: []ch.Hash{savedHash.Hash},
ID: savedHash.ID, ID: savedHash.ID,
}) })
} }
@ -302,8 +303,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
return nil return nil
} }
func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) { func (s *sqliteStorage) EncodeHashes() (*ch.SavedHashes, error) {
hashes := SavedHashes{} hashes := ch.SavedHashes{}
tx, err := s.db.Begin() tx, err := s.db.Begin()
if err != nil { if err != nil {
return &hashes, err return &hashes, err
@ -315,7 +316,7 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
} }
for rows.Next() { for rows.Next() {
var ( var (
hash SavedHash hash ch.SavedHash
tmpHash int64 tmpHash int64
) )
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID) err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
@ -331,11 +332,11 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
} }
var ( var (
previousEid int64 = -1 previousEid int64 = -1
ids []ID ids []ch.ID
) )
for rows.Next() { for rows.Next() {
var ( var (
id ID id ch.ID
newEid int64 newEid int64
) )
err = rows.Scan(&newEid, &id.Domain, &id.Domain) err = rows.Scan(&newEid, &id.Domain, &id.Domain)
@ -348,14 +349,14 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
if len(ids) > 1 { if len(ids) > 1 {
hashes.IDs = append(hashes.IDs, ids) hashes.IDs = append(hashes.IDs, ids)
} }
ids = make([]ID, 0) ids = make([]ch.ID, 0)
} }
ids = append(ids, id) ids = append(ids, id)
} }
return &hashes, nil return &hashes, nil
} }
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error { func (s *sqliteStorage) AssociateIDs(newIDs []ch.NewIDs) error {
tx, err := s.db.BeginTx(context.Background(), nil) tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil { if err != nil {
panic(err) panic(err)
@ -397,21 +398,21 @@ func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
return nil return nil
} }
func (s *sqliteStorage) GetIDs(id ID) IDList { func (s *sqliteStorage) GetIDs(id ch.ID) ch.IDList {
var ids []ID var ids []ch.ID
rows, err := s.idMatchStatement.Query(id.Domain, id.ID) rows, err := s.idMatchStatement.Query(id.Domain, id.ID)
if err != nil { if err != nil {
return nil return nil
} }
for rows.Next() { for rows.Next() {
var id ID var id ch.ID
err = rows.Scan(&id.Domain, &id.ID) err = rows.Scan(&id.Domain, &id.ID)
if err != nil { if err != nil {
return nil return nil
} }
ids = append(ids, id) ids = append(ids, id)
} }
return ToIDList(ids) return ch.ToIDList(ids)
} }
func (s *sqliteStorage) PrepareStatements() error { func (s *sqliteStorage) PrepareStatements() error {
@ -437,50 +438,50 @@ func (s *sqliteStorage) PrepareStatements() error {
return fmt.Errorf("failed to prepare database statements: %w", err) return fmt.Errorf("failed to prepare database statements: %w", err)
} }
s.hashExactMatchStatement, err = s.db.Prepare(` s.hashExactMatchStatement, err = s.db.Prepare(`
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join ( join (
select QEIDs.id as id from EquivalentIDs as QEIDs select QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND Hashes.hash=?) where (Hashes.kind=? AND Hashes.hash=?)
) as EIDs on EIDs.id=IEIDs.equivalentid; ) as EIDs on EIDs.id=IEIDs.equivalentid;
`) `)
if err != nil { if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err) return fmt.Errorf("failed to prepare database statements: %w", err)
} }
s.hashPartialMatchStatement, err = s.db.Prepare(` s.hashPartialMatchStatement, err = s.db.Prepare(`
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join ( join (
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF)))) where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF))))
) as EIDs on EIDs.id=IEIDs.equivalentid; ) as EIDs on EIDs.id=IEIDs.equivalentid;
`) `)
if err != nil { if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err) return fmt.Errorf("failed to prepare database statements: %w", err)
} }
s.idMatchStatement, err = s.db.Prepare(` s.idMatchStatement, err = s.db.Prepare(`
select IDs.domain, IDs.stringid from IDs select IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join ( join (
select EIDs.* from EquivalentIDs as EIDs select EIDs.* from EquivalentIDs as EIDs
join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid join IDs as QIDs on QIDs.id=QIEIDs.idid
where (QIDs.domain=? AND QIDs.stringid=?) where (QIDs.domain=? AND QIDs.stringid=?)
) as EIDs on EIDs.id=IEIDs.equivalentid; ) as EIDs on EIDs.id=IEIDs.equivalentid;
`) `)
if err != nil { if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err) return fmt.Errorf("failed to prepare database statements: %w", err)
} }
return nil return nil
} }
func NewSqliteStorage(db, path string) (HashStorage, error) { func NewSqliteStorage(db, path string) (ch.HashStorage, error) {
sqlite := &sqliteStorage{} sqlite := &sqliteStorage{}
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path)) sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
if err != nil { if err != nil {
@ -488,34 +489,34 @@ func NewSqliteStorage(db, path string) (HashStorage, error) {
} }
sqlite.db = sqlDB sqlite.db = sqlDB
_, err = sqlite.db.Exec(` _, err = sqlite.db.Exec(`
PRAGMA foreign_keys=ON; PRAGMA foreign_keys=ON;
CREATE TABLE IF NOT EXISTS IDs( CREATE TABLE IF NOT EXISTS IDs(
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
stringid TEXT NOT NULL, stringid TEXT NOT NULL,
domain TEXT NOT NULL domain TEXT NOT NULL
); );
CREATE TABLE IF NOT EXISTS Hashes( CREATE TABLE IF NOT EXISTS Hashes(
hash INTEGER NOT NULL, hash INTEGER NOT NULL,
kind INTEGER NOT NULL, kind INTEGER NOT NULL,
id INTEGER NOT NULL, id INTEGER NOT NULL,
FOREIGN KEY(id) REFERENCES IDs(id) FOREIGN KEY(id) REFERENCES IDs(id)
); );
CREATE TABLE IF NOT EXISTS EquivalentIDs( CREATE TABLE IF NOT EXISTS EquivalentIDs(
id integer primary key id integer primary key
); );
CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs( CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs(
idid INTEGER NOT NULL, idid INTEGER NOT NULL,
equivalentid INTEGER NOT NULL, equivalentid INTEGER NOT NULL,
PRIMARY KEY (idid, equivalentid), PRIMARY KEY (idid, equivalentid),
FOREIGN KEY(idid) REFERENCES IDs(id), FOREIGN KEY(idid) REFERENCES IDs(id),
FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id) FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id)
); );
`) `)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -1,6 +1,6 @@
//go:build cgo && !gokrazy //go:build cgo && !gokrazy
package ch package storage
import ( import (
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"

View File

@ -1,6 +1,6 @@
//go:build !cgo && !gokrazy //go:build !cgo && !gokrazy
package ch package storage
import ( import (
_ "github.com/ncruces/go-sqlite3/driver" _ "github.com/ncruces/go-sqlite3/driver"

View File

@ -1,12 +1,13 @@
//go:build !gokrazy //go:build !gokrazy
package ch package storage
import ( import (
"errors" "errors"
"fmt" "fmt"
"math/bits" "math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash" "gitea.narnian.us/lordwelch/goimagehash"
"gonum.org/v1/gonum/spatial/vptree" "gonum.org/v1/gonum/spatial/vptree"
) )
@ -15,14 +16,14 @@ type VPTree struct {
aTree *vptree.Tree aTree *vptree.Tree
dTree *vptree.Tree dTree *vptree.Tree
pTree *vptree.Tree pTree *vptree.Tree
ids map[ID]*[]ID ids map[ch.ID]*[]ch.ID
aHashes []vptree.Comparable // temporary, only used for vptree creation aHashes []vptree.Comparable // temporary, only used for vptree creation
dHashes []vptree.Comparable // temporary, only used for vptree creation dHashes []vptree.Comparable // temporary, only used for vptree creation
pHashes []vptree.Comparable // temporary, only used for vptree creation pHashes []vptree.Comparable // temporary, only used for vptree creation
} }
type VPHash struct { type VPHash struct {
SavedHash ch.SavedHash
} }
func (h *VPHash) Distance(c vptree.Comparable) float64 { func (h *VPHash) Distance(c vptree.Comparable) float64 {
@ -33,22 +34,22 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash)) return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash))
} }
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (v *VPTree) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var ( var (
matches []Result matches []ch.Result
exactMatches []Result exactMatches []ch.Result
tl timeLog tl ch.TimeLog
) )
tl.resetTime() tl.ResetTime()
defer tl.logTime("Search Complete") defer tl.LogTime("Search Complete")
for _, hash := range hashes { for _, hash := range hashes {
results := vptree.NewDistKeeper(float64(max)) results := vptree.NewDistKeeper(float64(max))
currentTree := v.getCurrentTree(hash.Kind) currentTree := v.getCurrentTree(hash.Kind)
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}}) currentTree.NearestSet(results, &VPHash{ch.SavedHash{Hash: hash}})
mappedIds := map[*[]ID]bool{} mappedIds := map[*[]ch.ID]bool{}
for _, result := range results.Heap { for _, result := range results.Heap {
storedHash := result.Comparable.(*VPHash) storedHash := result.Comparable.(*VPHash)
ids := v.ids[storedHash.ID] ids := v.ids[storedHash.ID]
@ -57,14 +58,14 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
} }
mappedIds[ids] = true mappedIds[ids] = true
if result.Dist == 0 { if result.Dist == 0 {
exactMatches = append(exactMatches, Result{ exactMatches = append(exactMatches, ch.Result{
Hash: storedHash.Hash, Hash: storedHash.Hash,
ID: storedHash.ID, ID: storedHash.ID,
Distance: 0, Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID], EquivalentIDs: *v.ids[storedHash.ID],
}) })
} else { } else {
matches = append(matches, Result{ matches = append(matches, ch.Result{
Hash: storedHash.Hash, Hash: storedHash.Hash,
ID: storedHash.ID, ID: storedHash.ID,
Distance: 0, Distance: 0,
@ -93,11 +94,11 @@ func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
panic("Unknown hash type: " + kind.String()) panic("Unknown hash type: " + kind.String())
} }
func (v *VPTree) MapHashes(ImageHash) { func (v *VPTree) MapHashes(ch.ImageHash) {
panic("Not Implemented") panic("Not Implemented")
} }
func (v *VPTree) DecodeHashes(hashes *SavedHashes) error { func (v *VPTree) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil { if hashes == nil {
return nil return nil
} }
@ -120,13 +121,13 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
v.pHashes = append(v.pHashes, &VPHash{savedHash}) v.pHashes = append(v.pHashes, &VPHash{savedHash})
} }
if savedHash.ID == (ID{}) { if savedHash.ID == (ch.ID{}) {
fmt.Println("Empty ID detected") fmt.Println("Empty ID detected")
panic(savedHash) panic(savedHash)
} }
// All known equal IDs are already mapped we can add any missing ones from hashes // All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := v.ids[savedHash.ID]; !ok { if _, ok := v.ids[savedHash.ID]; !ok {
v.ids[savedHash.ID] = &[]ID{savedHash.ID} v.ids[savedHash.ID] = &[]ch.ID{savedHash.ID}
} }
} }
@ -144,23 +145,23 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
} }
return nil return nil
} }
func (v *VPTree) EncodeHashes() (*SavedHashes, error) { func (v *VPTree) EncodeHashes() (*ch.SavedHashes, error) {
return &SavedHashes{}, errors.New("Not Implemented") return &ch.SavedHashes{}, errors.New("Not Implemented")
} }
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error { func (v *VPTree) AssociateIDs(newIDs []ch.NewIDs) error {
return errors.New("Not Implemented") return errors.New("Not Implemented")
} }
func (v *VPTree) GetIDs(id ID) IDList { func (v *VPTree) GetIDs(id ch.ID) ch.IDList {
ids, found := v.ids[id] ids, found := v.ids[id]
if !found { if !found {
return nil return nil
} }
return ToIDList(*ids) return ch.ToIDList(*ids)
} }
func NewVPStorage() (HashStorage, error) { func NewVPStorage() (ch.HashStorage, error) {
var err error var err error
v := &VPTree{ v := &VPTree{
aHashes: []vptree.Comparable{}, aHashes: []vptree.Comparable{},

View File

@ -0,0 +1,13 @@
//go:build gokrazy
package storage
import (
"errors"
ch "gitea.narnian.us/lordwelch/comic-hasher"
)
func NewVPStorage() (ch.HashStorage, error) {
return nil, errors.New("VPTree not available")
}

View File

@ -5,17 +5,17 @@ import (
"time" "time"
) )
type timeLog struct { type TimeLog struct {
total time.Duration total time.Duration
last time.Time last time.Time
} }
func (t *timeLog) resetTime() { func (t *TimeLog) ResetTime() {
t.total = 0 t.total = 0
t.last = time.Now() t.last = time.Now()
} }
func (t *timeLog) logTime(log string) { func (t *TimeLog) LogTime(log string) {
now := time.Now() now := time.Now()
diff := now.Sub(t.last) diff := now.Sub(t.last)
t.last = now t.last = now