Compare commits
1 Commits
ed0b5ba441
...
main
Author | SHA1 | Date | |
---|---|---|---|
22d59aa221 |
@ -35,6 +35,7 @@ import (
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/comic-hasher/cv"
|
||||
"gitea.narnian.us/lordwelch/comic-hasher/storage"
|
||||
)
|
||||
|
||||
var bufPool = &sync.Pool{
|
||||
@ -215,15 +216,15 @@ func signalHandler(s *Server) {
|
||||
func initializeStorage(opts Opts) (ch.HashStorage, error) {
|
||||
switch opts.storageType {
|
||||
case Map:
|
||||
return ch.NewMapStorage()
|
||||
return storage.NewMapStorage()
|
||||
case BasicMap:
|
||||
return ch.NewBasicMapStorage()
|
||||
return storage.NewBasicMapStorage()
|
||||
case Sqlite:
|
||||
return ch.NewSqliteStorage("sqlite", opts.sqlitePath)
|
||||
return storage.NewSqliteStorage("sqlite", opts.sqlitePath)
|
||||
case Sqlite3:
|
||||
return ch.NewSqliteStorage("sqlite3", opts.sqlitePath)
|
||||
return storage.NewSqliteStorage("sqlite3", opts.sqlitePath)
|
||||
case VPTree:
|
||||
return ch.NewVPStorage()
|
||||
return storage.NewVPStorage()
|
||||
}
|
||||
return nil, errors.New("Unknown storage type provided")
|
||||
}
|
||||
|
@ -83,6 +83,19 @@ func (f *Format) Set(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *SavedHash) Clone() SavedHash {
|
||||
return SavedHash{
|
||||
Hash: Hash{
|
||||
Hash: h.Hash.Hash,
|
||||
Kind: h.Hash.Kind,
|
||||
},
|
||||
ID: ID{
|
||||
Domain: NewSource(*h.ID.Domain),
|
||||
ID: strings.Clone(h.ID.ID),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SavedHashes) InsertHash(hash SavedHash) {
|
||||
index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int {
|
||||
return cmp.Or(
|
||||
|
@ -1,4 +1,4 @@
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
@ -6,49 +6,34 @@ import (
|
||||
"fmt"
|
||||
"math/bits"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
)
|
||||
type bmHash struct {
|
||||
Hash Hash
|
||||
ID ID
|
||||
}
|
||||
func NewbmHash(data SavedHash) bmHash {
|
||||
return bmHash{
|
||||
Hash: Hash{
|
||||
Hash: data.Hash.Hash,
|
||||
Kind: data.Hash.Kind,
|
||||
},
|
||||
ID: ID{
|
||||
Domain: data.ID.Domain,
|
||||
ID: strings.Clone(data.ID.ID),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
type basicMapStorage struct {
|
||||
hashMutex *sync.RWMutex
|
||||
|
||||
ids IDMap
|
||||
aHashes []bmHash
|
||||
dHashes []bmHash
|
||||
pHashes []bmHash
|
||||
aHashes []ch.SavedHash
|
||||
dHashes []ch.SavedHash
|
||||
pHashes []ch.SavedHash
|
||||
}
|
||||
type IDs struct {
|
||||
id *ID
|
||||
idList *[]*ID
|
||||
id *ch.ID
|
||||
idList *[]*ch.ID
|
||||
}
|
||||
type IDMap struct {
|
||||
ids []IDs
|
||||
}
|
||||
|
||||
func (m *IDMap) InsertID(id *ID) *ID {
|
||||
return m.insertID(id, &[]*ID{id})
|
||||
func (m *IDMap) InsertID(id *ch.ID) *ch.ID {
|
||||
return m.insertID(id, &[]*ch.ID{id})
|
||||
}
|
||||
|
||||
func (m *IDMap) insertID(id *ID, idList *[]*ID) *ID {
|
||||
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int {
|
||||
func (m *IDMap) insertID(id *ch.ID, idList *[]*ch.ID) *ch.ID {
|
||||
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
|
||||
return id.id.Compare(*target)
|
||||
})
|
||||
if !found {
|
||||
@ -66,40 +51,40 @@ func (m *IDMap) sort() {
|
||||
})
|
||||
}
|
||||
|
||||
func (m *IDMap) FindID(id *ID) (int, bool) {
|
||||
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int {
|
||||
func (m *IDMap) FindID(id *ch.ID) (int, bool) {
|
||||
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
|
||||
return id.id.Compare(*target)
|
||||
})
|
||||
}
|
||||
|
||||
func (m *IDMap) GetIDs(id *ID) []ID {
|
||||
func (m *IDMap) GetIDs(id *ch.ID) []ch.ID {
|
||||
index, found := m.FindID(id)
|
||||
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
ids := make([]ID, 0, len(*m.ids[index].idList))
|
||||
ids := make([]ch.ID, 0, len(*m.ids[index].idList))
|
||||
for _, id := range *m.ids[index].idList {
|
||||
ids = append(ids, *id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
func (m *IDMap) AssociateIDs(newids []NewIDs) error {
|
||||
func (m *IDMap) AssociateIDs(newids []ch.NewIDs) error {
|
||||
for _, newid := range newids {
|
||||
index, found := m.FindID(&newid.OldID)
|
||||
if !found {
|
||||
return ErrIDNotFound
|
||||
}
|
||||
*(m.ids[index].idList) = InsertIDp(*(m.ids[index].idList), &newid.NewID)
|
||||
*(m.ids[index].idList) = ch.InsertIDp(*(m.ids[index].idList), &newid.NewID)
|
||||
m.insertID(&newid.NewID, m.ids[index].idList)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// func (m *IDMap) NewID(domain Source, id string) *ID {
|
||||
// newID := ID{domain, id}
|
||||
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ID, target ID) int {
|
||||
// func (m *IDMap) NewID(domain Source, id string) *ch.ID {
|
||||
// newID := ch.ID{domain, id}
|
||||
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ch.ID, target ch.ID) int {
|
||||
// return id.Compare(*target)
|
||||
// })
|
||||
// if !found {
|
||||
@ -111,11 +96,11 @@ func (m *IDMap) AssociateIDs(newids []NewIDs) error {
|
||||
var ErrIDNotFound = errors.New("ID not found on this server")
|
||||
|
||||
// atleast must have a read lock before using
|
||||
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
||||
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
|
||||
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []ch.Result {
|
||||
matchingHashes := make([]ch.Result, 0, 20) // hope that we don't need more
|
||||
|
||||
mappedIds := map[int]bool{}
|
||||
storedHash := bmHash{} // reduces allocations and ensures queries are <1s
|
||||
storedHash := ch.SavedHash{} // reduces allocations and ensures queries are <1s
|
||||
for _, storedHash = range *b.getCurrentHashes(kind) {
|
||||
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
|
||||
if distance <= maxDistance {
|
||||
@ -124,7 +109,7 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
|
||||
continue
|
||||
}
|
||||
mappedIds[index] = true
|
||||
matchingHashes = append(matchingHashes, Result{
|
||||
matchingHashes = append(matchingHashes, ch.Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: distance,
|
||||
@ -135,8 +120,8 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
|
||||
return matchingHashes
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
|
||||
var foundMatches []Result
|
||||
func (b *basicMapStorage) exactMatches(hashes []ch.Hash, max int) []ch.Result {
|
||||
var foundMatches []ch.Result
|
||||
for _, hash := range hashes {
|
||||
mappedIds := map[int]bool{}
|
||||
|
||||
@ -149,7 +134,7 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
|
||||
}
|
||||
mappedIds[index] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
foundMatches = append(foundMatches, ch.Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
@ -162,20 +147,20 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
|
||||
return foundMatches
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
func (b *basicMapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
foundMatches []ch.Result
|
||||
tl ch.TimeLog
|
||||
)
|
||||
tl.resetTime()
|
||||
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
||||
tl.ResetTime()
|
||||
defer tl.LogTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
foundMatches = b.exactMatches(hashes, max)
|
||||
|
||||
tl.logTime("Search Exact")
|
||||
tl.LogTime("Search Exact")
|
||||
if len(foundMatches) > 0 {
|
||||
return foundMatches, nil
|
||||
}
|
||||
@ -193,7 +178,7 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
|
||||
}
|
||||
|
||||
// getCurrentHashes must have a read lock before using
|
||||
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash {
|
||||
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]ch.SavedHash {
|
||||
if kind == goimagehash.AHash {
|
||||
return &b.aHashes
|
||||
}
|
||||
@ -209,9 +194,9 @@ func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash {
|
||||
// findHash must have a read lock before using
|
||||
// return value is index, count
|
||||
// if count < 1 then no results were found
|
||||
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
|
||||
func (b *basicMapStorage) findHash(hash ch.Hash) (int, int) {
|
||||
currentHashes := *b.getCurrentHashes(hash.Kind)
|
||||
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing bmHash, target Hash) int {
|
||||
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing ch.SavedHash, target ch.Hash) int {
|
||||
return cmp.Compare(existing.Hash.Hash, target.Hash)
|
||||
})
|
||||
if !found {
|
||||
@ -225,7 +210,7 @@ func (b *basicMapStorage) findHash(hash Hash) (int, int) {
|
||||
}
|
||||
|
||||
// insertHash must already have a lock
|
||||
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
|
||||
func (b *basicMapStorage) insertHash(hash ch.Hash, id ch.ID) {
|
||||
currentHashes := b.getCurrentHashes(hash.Kind)
|
||||
index, count := b.findHash(hash)
|
||||
max := index + count
|
||||
@ -235,12 +220,15 @@ func (b *basicMapStorage) insertHash(hash Hash, id ID) {
|
||||
}
|
||||
}
|
||||
|
||||
sh := bmHash{hash, id}
|
||||
sh := ch.SavedHash{
|
||||
Hash: hash,
|
||||
ID: id,
|
||||
}
|
||||
*currentHashes = slices.Insert(*currentHashes, index, sh)
|
||||
b.ids.InsertID(&sh.ID)
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||
func (b *basicMapStorage) MapHashes(hash ch.ImageHash) {
|
||||
b.hashMutex.Lock()
|
||||
defer b.hashMutex.Unlock()
|
||||
for _, ih := range hash.Hashes {
|
||||
@ -249,7 +237,7 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||
}
|
||||
|
||||
// DecodeHashes must already have a lock
|
||||
func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
func (b *basicMapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
@ -257,7 +245,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
|
||||
// Initialize all the known equal IDs
|
||||
for _, ids := range hashes.IDs {
|
||||
new_ids := make([]*ID, 0, len(ids))
|
||||
new_ids := make([]*ch.ID, 0, len(ids))
|
||||
for _, id := range ids {
|
||||
new_ids = append(new_ids, &id)
|
||||
}
|
||||
@ -270,7 +258,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
}
|
||||
b.ids.sort()
|
||||
|
||||
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
|
||||
slices.SortFunc(hashes.Hashes, func(existing, target ch.SavedHash) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(*existing.ID.Domain, *target.ID.Domain), // Sorted for id insertion efficiency
|
||||
cmp.Compare(existing.ID.ID, target.ID.ID), // Sorted for id insertion efficiency
|
||||
@ -295,31 +283,31 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
}
|
||||
|
||||
// Assume they are probably fairly equally split between hash types
|
||||
b.aHashes = make([]bmHash, 0, aHashCount)
|
||||
b.dHashes = make([]bmHash, 0, dHashCount)
|
||||
b.pHashes = make([]bmHash, 0, pHashCount)
|
||||
b.aHashes = make([]ch.SavedHash, 0, aHashCount)
|
||||
b.dHashes = make([]ch.SavedHash, 0, dHashCount)
|
||||
b.pHashes = make([]ch.SavedHash, 0, pHashCount)
|
||||
for i := range hashes.Hashes {
|
||||
bmhash := NewbmHash(hashes.Hashes[i])
|
||||
hash := hashes.Hashes[i].Clone() // Not cloning this will keep strings/slices loaded from json wasting memory
|
||||
if hashes.Hashes[i].Hash.Kind == goimagehash.AHash {
|
||||
b.aHashes = append(b.aHashes, bmhash)
|
||||
b.aHashes = append(b.aHashes, hash)
|
||||
}
|
||||
if hashes.Hashes[i].Hash.Kind == goimagehash.DHash {
|
||||
b.dHashes = append(b.dHashes, bmhash)
|
||||
b.dHashes = append(b.dHashes, hash)
|
||||
}
|
||||
if hashes.Hashes[i].Hash.Kind == goimagehash.PHash {
|
||||
b.pHashes = append(b.pHashes, bmhash)
|
||||
b.pHashes = append(b.pHashes, hash)
|
||||
}
|
||||
|
||||
if hashes.Hashes[i].ID == (ID{}) {
|
||||
if hashes.Hashes[i].ID == (ch.ID{}) {
|
||||
fmt.Println("Empty ID detected")
|
||||
panic(hashes.Hashes[i])
|
||||
}
|
||||
// TODO: Make loading this more efficient
|
||||
// All known equal IDs are already mapped we can add any missing ones from hashes
|
||||
b.ids.InsertID(&bmhash.ID)
|
||||
b.ids.InsertID(&hash.ID)
|
||||
}
|
||||
|
||||
hashCmp := func(existing, target bmHash) int {
|
||||
hashCmp := func(existing, target ch.SavedHash) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
||||
cmp.Compare(*existing.ID.Domain, *target.ID.Domain),
|
||||
@ -334,9 +322,9 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
}
|
||||
|
||||
// EncodeHashes should already have a lock
|
||||
func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
savedHashes := SavedHashes{
|
||||
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
|
||||
func (b *basicMapStorage) EncodeHashes() (*ch.SavedHashes, error) {
|
||||
savedHashes := ch.SavedHashes{
|
||||
Hashes: make([]ch.SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
|
||||
}
|
||||
// savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
|
||||
// savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
|
||||
@ -357,28 +345,28 @@ func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
return &savedHashes, nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
|
||||
func (b *basicMapStorage) AssociateIDs(newids []ch.NewIDs) error {
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
return b.ids.AssociateIDs(newids)
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) GetIDs(id ID) IDList {
|
||||
func (b *basicMapStorage) GetIDs(id ch.ID) ch.IDList {
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
ids := b.ids.GetIDs(&id)
|
||||
return ToIDList(ids)
|
||||
return ch.ToIDList(ids)
|
||||
}
|
||||
|
||||
func NewBasicMapStorage() (HashStorage, error) {
|
||||
func NewBasicMapStorage() (ch.HashStorage, error) {
|
||||
storage := &basicMapStorage{
|
||||
hashMutex: &sync.RWMutex{},
|
||||
ids: IDMap{
|
||||
ids: []IDs{},
|
||||
},
|
||||
aHashes: []bmHash{},
|
||||
dHashes: []bmHash{},
|
||||
pHashes: []bmHash{},
|
||||
aHashes: []ch.SavedHash{},
|
||||
dHashes: []ch.SavedHash{},
|
||||
pHashes: []ch.SavedHash{},
|
||||
}
|
||||
return storage, nil
|
||||
}
|
@ -1,10 +1,11 @@
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
)
|
||||
|
||||
@ -15,10 +16,10 @@ type MapStorage struct {
|
||||
partialPHash [8]map[uint8][]uint64
|
||||
}
|
||||
|
||||
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
func (m *MapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
foundMatches []ch.Result
|
||||
tl ch.TimeLog
|
||||
)
|
||||
m.hashMutex.RLock()
|
||||
defer m.hashMutex.RUnlock()
|
||||
@ -26,13 +27,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
foundMatches = m.exactMatches(hashes, max)
|
||||
|
||||
tl.logTime("Search Exact")
|
||||
tl.LogTime("Search Exact")
|
||||
if len(foundMatches) > 0 {
|
||||
return foundMatches, nil
|
||||
}
|
||||
}
|
||||
tl.resetTime()
|
||||
defer tl.logTime("Search Complete")
|
||||
tl.ResetTime()
|
||||
defer tl.LogTime("Search Complete")
|
||||
|
||||
totalPartialHashes := 0
|
||||
|
||||
@ -40,15 +41,18 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
||||
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
|
||||
potentialMatches := []uint64{}
|
||||
|
||||
for i, partialHash := range SplitHash(searchHash.Hash) {
|
||||
for i, partialHash := range ch.SplitHash(searchHash.Hash) {
|
||||
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
|
||||
}
|
||||
|
||||
totalPartialHashes += len(potentialMatches)
|
||||
mappedIds := map[int]bool{}
|
||||
|
||||
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) {
|
||||
matchedHash := Hash{match.Hash, searchHash.Kind}
|
||||
for _, match := range ch.Atleast(max, searchHash.Hash, potentialMatches) {
|
||||
matchedHash := ch.Hash{
|
||||
Hash: match.Hash,
|
||||
Kind: searchHash.Kind,
|
||||
}
|
||||
index, count := m.findHash(matchedHash)
|
||||
if count < 1 {
|
||||
continue
|
||||
@ -60,7 +64,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
||||
}
|
||||
mappedIds[idIndex] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
foundMatches = append(foundMatches, ch.Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
@ -75,7 +79,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
||||
}
|
||||
|
||||
// getCurrentHashes must have a read lock before using
|
||||
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[uint8][]uint64) {
|
||||
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]ch.SavedHash, [8]map[uint8][]uint64) {
|
||||
if kind == goimagehash.AHash {
|
||||
return m.aHashes, m.partialAHash
|
||||
}
|
||||
@ -88,17 +92,17 @@ func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[u
|
||||
panic("Unknown hash type: " + kind.String())
|
||||
}
|
||||
|
||||
func (m *MapStorage) MapHashes(hash ImageHash) {
|
||||
func (m *MapStorage) MapHashes(hash ch.ImageHash) {
|
||||
m.basicMapStorage.MapHashes(hash)
|
||||
for _, hash := range hash.Hashes {
|
||||
_, partialHashes := m.getCurrentHashes(hash.Kind)
|
||||
for i, partialHash := range SplitHash(hash.Hash) {
|
||||
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash)
|
||||
for i, partialHash := range ch.SplitHash(hash.Hash) {
|
||||
partialHashes[i][partialHash] = ch.Insert(partialHashes[i][partialHash], hash.Hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
func (m *MapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
@ -117,7 +121,7 @@ func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewMapStorage() (HashStorage, error) {
|
||||
func NewMapStorage() (ch.HashStorage, error) {
|
||||
|
||||
storage := &MapStorage{
|
||||
basicMapStorage: basicMapStorage{
|
||||
@ -125,9 +129,9 @@ func NewMapStorage() (HashStorage, error) {
|
||||
ids: IDMap{
|
||||
ids: []IDs{},
|
||||
},
|
||||
aHashes: []bmHash{},
|
||||
dHashes: []bmHash{},
|
||||
pHashes: []bmHash{},
|
||||
aHashes: []ch.SavedHash{},
|
||||
dHashes: []ch.SavedHash{},
|
||||
pHashes: []ch.SavedHash{},
|
||||
},
|
||||
partialAHash: newPartialHash(),
|
||||
partialDHash: newPartialHash(),
|
||||
@ -149,9 +153,9 @@ func newPartialHash() [8]map[uint8][]uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
func mapPartialHashes(hashes []bmHash, partialHashMap [8]map[uint8][]uint64) {
|
||||
func mapPartialHashes(hashes []ch.SavedHash, partialHashMap [8]map[uint8][]uint64) {
|
||||
for _, savedHash := range hashes {
|
||||
for i, partialHash := range SplitHash(savedHash.Hash.Hash) {
|
||||
for i, partialHash := range ch.SplitHash(savedHash.Hash.Hash) {
|
||||
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
@ -8,6 +8,7 @@ import (
|
||||
"log"
|
||||
"math/bits"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
@ -26,19 +27,19 @@ type sqliteStorage struct {
|
||||
idExists *sql.Stmt
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID][]ID, error) {
|
||||
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash ch.Hash) (map[ch.ID][]ch.ID, error) {
|
||||
if statement == nil {
|
||||
statement = s.hashExactMatchStatement
|
||||
}
|
||||
hashes := map[ID][]ID{}
|
||||
hashes := map[ch.ID][]ch.ID{}
|
||||
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
for rows.Next() {
|
||||
var (
|
||||
id ID
|
||||
foundID ID
|
||||
id ch.ID
|
||||
foundID ch.ID
|
||||
)
|
||||
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
|
||||
if err != nil {
|
||||
@ -51,24 +52,24 @@ func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID]
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max int, hash Hash) ([]Result, error) {
|
||||
func (s *sqliteStorage) findPartialHashes(tl ch.TimeLog, statement *sql.Stmt, max int, hash ch.Hash) ([]ch.Result, error) {
|
||||
if statement == nil {
|
||||
statement = s.hashPartialMatchStatement
|
||||
}
|
||||
hashResults := []Result{}
|
||||
hashResults := []ch.Result{}
|
||||
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
|
||||
if err != nil {
|
||||
return hashResults, err
|
||||
}
|
||||
|
||||
results := map[SavedHash][]ID{}
|
||||
results := map[ch.SavedHash][]ch.ID{}
|
||||
for rows.Next() {
|
||||
var (
|
||||
tmpHash int64
|
||||
sqlHash = SavedHash{
|
||||
Hash: Hash{Kind: hash.Kind},
|
||||
sqlHash = ch.SavedHash{
|
||||
Hash: ch.Hash{Kind: hash.Kind},
|
||||
}
|
||||
id ID
|
||||
id ch.ID
|
||||
)
|
||||
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
|
||||
if err != nil {
|
||||
@ -79,7 +80,7 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
|
||||
results[sqlHash] = append(results[sqlHash], id)
|
||||
}
|
||||
for sqlHash, ids := range results {
|
||||
res := Result{
|
||||
res := ch.Result{
|
||||
Hash: sqlHash.Hash,
|
||||
ID: sqlHash.ID,
|
||||
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
|
||||
@ -94,18 +95,18 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
|
||||
|
||||
func (s *sqliteStorage) dropIndexes() error {
|
||||
_, err := s.db.Exec(`
|
||||
DROP INDEX IF EXISTS hash_index;
|
||||
DROP INDEX IF EXISTS hash_1_index;
|
||||
DROP INDEX IF EXISTS hash_2_index;
|
||||
DROP INDEX IF EXISTS hash_3_index;
|
||||
DROP INDEX IF EXISTS hash_4_index;
|
||||
DROP INDEX IF EXISTS hash_5_index;
|
||||
DROP INDEX IF EXISTS hash_6_index;
|
||||
DROP INDEX IF EXISTS hash_7_index;
|
||||
DROP INDEX IF EXISTS hash_8_index;
|
||||
DROP INDEX IF EXISTS hash_index;
|
||||
DROP INDEX IF EXISTS hash_1_index;
|
||||
DROP INDEX IF EXISTS hash_2_index;
|
||||
DROP INDEX IF EXISTS hash_3_index;
|
||||
DROP INDEX IF EXISTS hash_4_index;
|
||||
DROP INDEX IF EXISTS hash_5_index;
|
||||
DROP INDEX IF EXISTS hash_6_index;
|
||||
DROP INDEX IF EXISTS hash_7_index;
|
||||
DROP INDEX IF EXISTS hash_8_index;
|
||||
|
||||
DROP INDEX IF EXISTS id_domain;
|
||||
`)
|
||||
DROP INDEX IF EXISTS id_domain;
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -114,32 +115,32 @@ func (s *sqliteStorage) dropIndexes() error {
|
||||
|
||||
func (s *sqliteStorage) createIndexes() error {
|
||||
_, err := s.db.Exec(`
|
||||
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
|
||||
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
|
||||
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
|
||||
|
||||
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid);
|
||||
PRAGMA shrink_memory;
|
||||
ANALYZE;
|
||||
`)
|
||||
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid);
|
||||
PRAGMA shrink_memory;
|
||||
ANALYZE;
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
func (s *sqliteStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
foundMatches []ch.Result
|
||||
tl ch.TimeLog
|
||||
)
|
||||
tl.resetTime()
|
||||
tl.ResetTime()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
for _, hash := range hashes {
|
||||
@ -148,7 +149,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
return foundMatches, err
|
||||
}
|
||||
for id, equivalentIDs := range idlist {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
foundMatches = append(foundMatches, ch.Result{
|
||||
Hash: hash,
|
||||
ID: id,
|
||||
Distance: 0,
|
||||
@ -157,7 +158,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
}
|
||||
}
|
||||
|
||||
tl.logTime("Search Exact")
|
||||
tl.LogTime("Search Exact")
|
||||
if len(foundMatches) > 0 {
|
||||
return foundMatches, nil
|
||||
}
|
||||
@ -170,7 +171,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
if err != nil {
|
||||
return foundMatches, err
|
||||
}
|
||||
tl.logTime(fmt.Sprintf("Search partial %v", hash.Kind))
|
||||
tl.LogTime(fmt.Sprintf("Search partial %v", hash.Kind))
|
||||
|
||||
for _, hash := range results {
|
||||
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
|
||||
@ -185,7 +186,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
|
||||
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ch.ImageHash) {
|
||||
var err error
|
||||
insertHash := tx.Stmt(s.insertHash)
|
||||
insertID := tx.Stmt(s.insertID)
|
||||
@ -234,7 +235,7 @@ func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
|
||||
}
|
||||
}
|
||||
}
|
||||
func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
||||
func (s *sqliteStorage) MapHashes(hash ch.ImageHash) {
|
||||
tx, err := s.db.BeginTx(context.Background(), nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -246,7 +247,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
func (s *sqliteStorage) DecodeHashes(hashes *ch.SavedHashes) error {
|
||||
return nil
|
||||
err := s.dropIndexes()
|
||||
if err != nil {
|
||||
@ -285,8 +286,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
}
|
||||
|
||||
for _, savedHash := range hashes.Hashes {
|
||||
s.mapHashes(tx, ImageHash{
|
||||
Hashes: []Hash{savedHash.Hash},
|
||||
s.mapHashes(tx, ch.ImageHash{
|
||||
Hashes: []ch.Hash{savedHash.Hash},
|
||||
ID: savedHash.ID,
|
||||
})
|
||||
}
|
||||
@ -302,8 +303,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
hashes := SavedHashes{}
|
||||
func (s *sqliteStorage) EncodeHashes() (*ch.SavedHashes, error) {
|
||||
hashes := ch.SavedHashes{}
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return &hashes, err
|
||||
@ -315,7 +316,7 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
}
|
||||
for rows.Next() {
|
||||
var (
|
||||
hash SavedHash
|
||||
hash ch.SavedHash
|
||||
tmpHash int64
|
||||
)
|
||||
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
|
||||
@ -331,11 +332,11 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
}
|
||||
var (
|
||||
previousEid int64 = -1
|
||||
ids []ID
|
||||
ids []ch.ID
|
||||
)
|
||||
for rows.Next() {
|
||||
var (
|
||||
id ID
|
||||
id ch.ID
|
||||
newEid int64
|
||||
)
|
||||
err = rows.Scan(&newEid, &id.Domain, &id.Domain)
|
||||
@ -348,14 +349,14 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
if len(ids) > 1 {
|
||||
hashes.IDs = append(hashes.IDs, ids)
|
||||
}
|
||||
ids = make([]ID, 0)
|
||||
ids = make([]ch.ID, 0)
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return &hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
|
||||
func (s *sqliteStorage) AssociateIDs(newIDs []ch.NewIDs) error {
|
||||
tx, err := s.db.BeginTx(context.Background(), nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -397,21 +398,21 @@ func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) GetIDs(id ID) IDList {
|
||||
var ids []ID
|
||||
func (s *sqliteStorage) GetIDs(id ch.ID) ch.IDList {
|
||||
var ids []ch.ID
|
||||
rows, err := s.idMatchStatement.Query(id.Domain, id.ID)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
for rows.Next() {
|
||||
var id ID
|
||||
var id ch.ID
|
||||
err = rows.Scan(&id.Domain, &id.ID)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return ToIDList(ids)
|
||||
return ch.ToIDList(ids)
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) PrepareStatements() error {
|
||||
@ -437,50 +438,50 @@ func (s *sqliteStorage) PrepareStatements() error {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
s.hashExactMatchStatement, err = s.db.Prepare(`
|
||||
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
join Hashes on Hashes.id=QIDs.id
|
||||
where (Hashes.kind=? AND Hashes.hash=?)
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
join Hashes on Hashes.id=QIDs.id
|
||||
where (Hashes.kind=? AND Hashes.hash=?)
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
s.hashPartialMatchStatement, err = s.db.Prepare(`
|
||||
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
join Hashes on Hashes.id=QIDs.id
|
||||
where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF))))
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
join Hashes on Hashes.id=QIDs.id
|
||||
where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF))))
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
s.idMatchStatement, err = s.db.Prepare(`
|
||||
select IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select EIDs.* from EquivalentIDs as EIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
where (QIDs.domain=? AND QIDs.stringid=?)
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
select IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select EIDs.* from EquivalentIDs as EIDs
|
||||
join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid
|
||||
join IDs as QIDs on QIDs.id=QIEIDs.idid
|
||||
where (QIDs.domain=? AND QIDs.stringid=?)
|
||||
) as EIDs on EIDs.id=IEIDs.equivalentid;
|
||||
`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewSqliteStorage(db, path string) (HashStorage, error) {
|
||||
func NewSqliteStorage(db, path string) (ch.HashStorage, error) {
|
||||
sqlite := &sqliteStorage{}
|
||||
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
|
||||
if err != nil {
|
||||
@ -488,34 +489,34 @@ func NewSqliteStorage(db, path string) (HashStorage, error) {
|
||||
}
|
||||
sqlite.db = sqlDB
|
||||
_, err = sqlite.db.Exec(`
|
||||
PRAGMA foreign_keys=ON;
|
||||
CREATE TABLE IF NOT EXISTS IDs(
|
||||
id INTEGER PRIMARY KEY,
|
||||
stringid TEXT NOT NULL,
|
||||
domain TEXT NOT NULL
|
||||
);
|
||||
PRAGMA foreign_keys=ON;
|
||||
CREATE TABLE IF NOT EXISTS IDs(
|
||||
id INTEGER PRIMARY KEY,
|
||||
stringid TEXT NOT NULL,
|
||||
domain TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS Hashes(
|
||||
hash INTEGER NOT NULL,
|
||||
kind INTEGER NOT NULL,
|
||||
id INTEGER NOT NULL,
|
||||
CREATE TABLE IF NOT EXISTS Hashes(
|
||||
hash INTEGER NOT NULL,
|
||||
kind INTEGER NOT NULL,
|
||||
id INTEGER NOT NULL,
|
||||
|
||||
FOREIGN KEY(id) REFERENCES IDs(id)
|
||||
);
|
||||
FOREIGN KEY(id) REFERENCES IDs(id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EquivalentIDs(
|
||||
id integer primary key
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS EquivalentIDs(
|
||||
id integer primary key
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs(
|
||||
idid INTEGER NOT NULL,
|
||||
equivalentid INTEGER NOT NULL,
|
||||
PRIMARY KEY (idid, equivalentid),
|
||||
CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs(
|
||||
idid INTEGER NOT NULL,
|
||||
equivalentid INTEGER NOT NULL,
|
||||
PRIMARY KEY (idid, equivalentid),
|
||||
|
||||
FOREIGN KEY(idid) REFERENCES IDs(id),
|
||||
FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id)
|
||||
);
|
||||
`)
|
||||
FOREIGN KEY(idid) REFERENCES IDs(id),
|
||||
FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id)
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
//go:build cgo && !gokrazy
|
||||
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
_ "github.com/mattn/go-sqlite3"
|
@ -1,6 +1,6 @@
|
||||
//go:build !cgo && !gokrazy
|
||||
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
_ "github.com/ncruces/go-sqlite3/driver"
|
@ -1,12 +1,13 @@
|
||||
//go:build !gokrazy
|
||||
|
||||
package ch
|
||||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/bits"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
"gonum.org/v1/gonum/spatial/vptree"
|
||||
)
|
||||
@ -15,14 +16,14 @@ type VPTree struct {
|
||||
aTree *vptree.Tree
|
||||
dTree *vptree.Tree
|
||||
pTree *vptree.Tree
|
||||
ids map[ID]*[]ID
|
||||
ids map[ch.ID]*[]ch.ID
|
||||
|
||||
aHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
dHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
pHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
}
|
||||
type VPHash struct {
|
||||
SavedHash
|
||||
ch.SavedHash
|
||||
}
|
||||
|
||||
func (h *VPHash) Distance(c vptree.Comparable) float64 {
|
||||
@ -33,22 +34,22 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
|
||||
return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash))
|
||||
}
|
||||
|
||||
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
func (v *VPTree) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
|
||||
var (
|
||||
matches []Result
|
||||
exactMatches []Result
|
||||
tl timeLog
|
||||
matches []ch.Result
|
||||
exactMatches []ch.Result
|
||||
tl ch.TimeLog
|
||||
)
|
||||
tl.resetTime()
|
||||
defer tl.logTime("Search Complete")
|
||||
tl.ResetTime()
|
||||
defer tl.LogTime("Search Complete")
|
||||
|
||||
for _, hash := range hashes {
|
||||
results := vptree.NewDistKeeper(float64(max))
|
||||
|
||||
currentTree := v.getCurrentTree(hash.Kind)
|
||||
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}})
|
||||
currentTree.NearestSet(results, &VPHash{ch.SavedHash{Hash: hash}})
|
||||
|
||||
mappedIds := map[*[]ID]bool{}
|
||||
mappedIds := map[*[]ch.ID]bool{}
|
||||
for _, result := range results.Heap {
|
||||
storedHash := result.Comparable.(*VPHash)
|
||||
ids := v.ids[storedHash.ID]
|
||||
@ -57,14 +58,14 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
if result.Dist == 0 {
|
||||
exactMatches = append(exactMatches, Result{
|
||||
exactMatches = append(exactMatches, ch.Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
EquivalentIDs: *v.ids[storedHash.ID],
|
||||
})
|
||||
} else {
|
||||
matches = append(matches, Result{
|
||||
matches = append(matches, ch.Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
@ -93,11 +94,11 @@ func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
|
||||
panic("Unknown hash type: " + kind.String())
|
||||
}
|
||||
|
||||
func (v *VPTree) MapHashes(ImageHash) {
|
||||
func (v *VPTree) MapHashes(ch.ImageHash) {
|
||||
panic("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
|
||||
func (v *VPTree) DecodeHashes(hashes *ch.SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
@ -120,13 +121,13 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
|
||||
v.pHashes = append(v.pHashes, &VPHash{savedHash})
|
||||
}
|
||||
|
||||
if savedHash.ID == (ID{}) {
|
||||
if savedHash.ID == (ch.ID{}) {
|
||||
fmt.Println("Empty ID detected")
|
||||
panic(savedHash)
|
||||
}
|
||||
// All known equal IDs are already mapped we can add any missing ones from hashes
|
||||
if _, ok := v.ids[savedHash.ID]; !ok {
|
||||
v.ids[savedHash.ID] = &[]ID{savedHash.ID}
|
||||
v.ids[savedHash.ID] = &[]ch.ID{savedHash.ID}
|
||||
}
|
||||
}
|
||||
|
||||
@ -144,23 +145,23 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (v *VPTree) EncodeHashes() (*SavedHashes, error) {
|
||||
return &SavedHashes{}, errors.New("Not Implemented")
|
||||
func (v *VPTree) EncodeHashes() (*ch.SavedHashes, error) {
|
||||
return &ch.SavedHashes{}, errors.New("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
|
||||
func (v *VPTree) AssociateIDs(newIDs []ch.NewIDs) error {
|
||||
return errors.New("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) GetIDs(id ID) IDList {
|
||||
func (v *VPTree) GetIDs(id ch.ID) ch.IDList {
|
||||
ids, found := v.ids[id]
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
return ToIDList(*ids)
|
||||
return ch.ToIDList(*ids)
|
||||
}
|
||||
|
||||
func NewVPStorage() (HashStorage, error) {
|
||||
func NewVPStorage() (ch.HashStorage, error) {
|
||||
var err error
|
||||
v := &VPTree{
|
||||
aHashes: []vptree.Comparable{},
|
13
storage/vp-tree_gokrazy.go
Normal file
13
storage/vp-tree_gokrazy.go
Normal file
@ -0,0 +1,13 @@
|
||||
//go:build gokrazy
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
)
|
||||
|
||||
func NewVPStorage() (ch.HashStorage, error) {
|
||||
return nil, errors.New("VPTree not available")
|
||||
}
|
@ -5,17 +5,17 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type timeLog struct {
|
||||
type TimeLog struct {
|
||||
total time.Duration
|
||||
last time.Time
|
||||
}
|
||||
|
||||
func (t *timeLog) resetTime() {
|
||||
func (t *TimeLog) ResetTime() {
|
||||
t.total = 0
|
||||
t.last = time.Now()
|
||||
}
|
||||
|
||||
func (t *timeLog) logTime(log string) {
|
||||
func (t *TimeLog) LogTime(log string) {
|
||||
now := time.Now()
|
||||
diff := now.Sub(t.last)
|
||||
t.last = now
|
||||
|
Reference in New Issue
Block a user