Compare commits

..

1 Commits

Author SHA1 Message Date
22d59aa221 Move HashStorage to its own package 2025-05-31 19:00:40 -07:00
10 changed files with 266 additions and 245 deletions

View File

@ -35,6 +35,7 @@ import (
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/comic-hasher/cv"
"gitea.narnian.us/lordwelch/comic-hasher/storage"
)
var bufPool = &sync.Pool{
@ -215,15 +216,15 @@ func signalHandler(s *Server) {
func initializeStorage(opts Opts) (ch.HashStorage, error) {
switch opts.storageType {
case Map:
return ch.NewMapStorage()
return storage.NewMapStorage()
case BasicMap:
return ch.NewBasicMapStorage()
return storage.NewBasicMapStorage()
case Sqlite:
return ch.NewSqliteStorage("sqlite", opts.sqlitePath)
return storage.NewSqliteStorage("sqlite", opts.sqlitePath)
case Sqlite3:
return ch.NewSqliteStorage("sqlite3", opts.sqlitePath)
return storage.NewSqliteStorage("sqlite3", opts.sqlitePath)
case VPTree:
return ch.NewVPStorage()
return storage.NewVPStorage()
}
return nil, errors.New("Unknown storage type provided")
}

View File

@ -83,6 +83,19 @@ func (f *Format) Set(s string) error {
return nil
}
func (h *SavedHash) Clone() SavedHash {
return SavedHash{
Hash: Hash{
Hash: h.Hash.Hash,
Kind: h.Hash.Kind,
},
ID: ID{
Domain: NewSource(*h.ID.Domain),
ID: strings.Clone(h.ID.ID),
},
}
}
func (s *SavedHashes) InsertHash(hash SavedHash) {
index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int {
return cmp.Or(

View File

@ -1,4 +1,4 @@
package ch
package storage
import (
"cmp"
@ -6,49 +6,34 @@ import (
"fmt"
"math/bits"
"slices"
"strings"
"sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
)
type bmHash struct {
Hash Hash
ID ID
}
func NewbmHash(data SavedHash) bmHash {
return bmHash{
Hash: Hash{
Hash: data.Hash.Hash,
Kind: data.Hash.Kind,
},
ID: ID{
Domain: data.ID.Domain,
ID: strings.Clone(data.ID.ID),
},
}
}
type basicMapStorage struct {
hashMutex *sync.RWMutex
ids IDMap
aHashes []bmHash
dHashes []bmHash
pHashes []bmHash
aHashes []ch.SavedHash
dHashes []ch.SavedHash
pHashes []ch.SavedHash
}
type IDs struct {
id *ID
idList *[]*ID
id *ch.ID
idList *[]*ch.ID
}
type IDMap struct {
ids []IDs
}
func (m *IDMap) InsertID(id *ID) *ID {
return m.insertID(id, &[]*ID{id})
func (m *IDMap) InsertID(id *ch.ID) *ch.ID {
return m.insertID(id, &[]*ch.ID{id})
}
func (m *IDMap) insertID(id *ID, idList *[]*ID) *ID {
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int {
func (m *IDMap) insertID(id *ch.ID, idList *[]*ch.ID) *ch.ID {
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target)
})
if !found {
@ -66,40 +51,40 @@ func (m *IDMap) sort() {
})
}
func (m *IDMap) FindID(id *ID) (int, bool) {
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ID) int {
func (m *IDMap) FindID(id *ch.ID) (int, bool) {
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target)
})
}
func (m *IDMap) GetIDs(id *ID) []ID {
func (m *IDMap) GetIDs(id *ch.ID) []ch.ID {
index, found := m.FindID(id)
if !found {
return nil
}
ids := make([]ID, 0, len(*m.ids[index].idList))
ids := make([]ch.ID, 0, len(*m.ids[index].idList))
for _, id := range *m.ids[index].idList {
ids = append(ids, *id)
}
return ids
}
func (m *IDMap) AssociateIDs(newids []NewIDs) error {
func (m *IDMap) AssociateIDs(newids []ch.NewIDs) error {
for _, newid := range newids {
index, found := m.FindID(&newid.OldID)
if !found {
return ErrIDNotFound
}
*(m.ids[index].idList) = InsertIDp(*(m.ids[index].idList), &newid.NewID)
*(m.ids[index].idList) = ch.InsertIDp(*(m.ids[index].idList), &newid.NewID)
m.insertID(&newid.NewID, m.ids[index].idList)
}
return nil
}
// func (m *IDMap) NewID(domain Source, id string) *ID {
// newID := ID{domain, id}
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ID, target ID) int {
// func (m *IDMap) NewID(domain Source, id string) *ch.ID {
// newID := ch.ID{domain, id}
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ch.ID, target ch.ID) int {
// return id.Compare(*target)
// })
// if !found {
@ -111,11 +96,11 @@ func (m *IDMap) AssociateIDs(newids []NewIDs) error {
var ErrIDNotFound = errors.New("ID not found on this server")
// atleast must have a read lock before using
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []ch.Result {
matchingHashes := make([]ch.Result, 0, 20) // hope that we don't need more
mappedIds := map[int]bool{}
storedHash := bmHash{} // reduces allocations and ensures queries are <1s
storedHash := ch.SavedHash{} // reduces allocations and ensures queries are <1s
for _, storedHash = range *b.getCurrentHashes(kind) {
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
if distance <= maxDistance {
@ -124,7 +109,7 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
continue
}
mappedIds[index] = true
matchingHashes = append(matchingHashes, Result{
matchingHashes = append(matchingHashes, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: distance,
@ -135,8 +120,8 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
return matchingHashes
}
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
var foundMatches []Result
func (b *basicMapStorage) exactMatches(hashes []ch.Hash, max int) []ch.Result {
var foundMatches []ch.Result
for _, hash := range hashes {
mappedIds := map[int]bool{}
@ -149,7 +134,7 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
}
mappedIds[index] = true
foundMatches = append(foundMatches, Result{
foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
@ -162,20 +147,20 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
return foundMatches
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (b *basicMapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []Result
tl timeLog
foundMatches []ch.Result
tl ch.TimeLog
)
tl.resetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
tl.ResetTime()
defer tl.LogTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = b.exactMatches(hashes, max)
tl.logTime("Search Exact")
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
@ -193,7 +178,7 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
}
// getCurrentHashes must have a read lock before using
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash {
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]ch.SavedHash {
if kind == goimagehash.AHash {
return &b.aHashes
}
@ -209,9 +194,9 @@ func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]bmHash {
// findHash must have a read lock before using
// return value is index, count
// if count < 1 then no results were found
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
func (b *basicMapStorage) findHash(hash ch.Hash) (int, int) {
currentHashes := *b.getCurrentHashes(hash.Kind)
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing bmHash, target Hash) int {
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing ch.SavedHash, target ch.Hash) int {
return cmp.Compare(existing.Hash.Hash, target.Hash)
})
if !found {
@ -225,7 +210,7 @@ func (b *basicMapStorage) findHash(hash Hash) (int, int) {
}
// insertHash must already have a lock
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
func (b *basicMapStorage) insertHash(hash ch.Hash, id ch.ID) {
currentHashes := b.getCurrentHashes(hash.Kind)
index, count := b.findHash(hash)
max := index + count
@ -235,12 +220,15 @@ func (b *basicMapStorage) insertHash(hash Hash, id ID) {
}
}
sh := bmHash{hash, id}
sh := ch.SavedHash{
Hash: hash,
ID: id,
}
*currentHashes = slices.Insert(*currentHashes, index, sh)
b.ids.InsertID(&sh.ID)
}
func (b *basicMapStorage) MapHashes(hash ImageHash) {
func (b *basicMapStorage) MapHashes(hash ch.ImageHash) {
b.hashMutex.Lock()
defer b.hashMutex.Unlock()
for _, ih := range hash.Hashes {
@ -249,7 +237,7 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
}
// DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
func (b *basicMapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
@ -257,7 +245,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
new_ids := make([]*ID, 0, len(ids))
new_ids := make([]*ch.ID, 0, len(ids))
for _, id := range ids {
new_ids = append(new_ids, &id)
}
@ -270,7 +258,7 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
}
b.ids.sort()
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
slices.SortFunc(hashes.Hashes, func(existing, target ch.SavedHash) int {
return cmp.Or(
cmp.Compare(*existing.ID.Domain, *target.ID.Domain), // Sorted for id insertion efficiency
cmp.Compare(existing.ID.ID, target.ID.ID), // Sorted for id insertion efficiency
@ -295,31 +283,31 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
}
// Assume they are probably fairly equally split between hash types
b.aHashes = make([]bmHash, 0, aHashCount)
b.dHashes = make([]bmHash, 0, dHashCount)
b.pHashes = make([]bmHash, 0, pHashCount)
b.aHashes = make([]ch.SavedHash, 0, aHashCount)
b.dHashes = make([]ch.SavedHash, 0, dHashCount)
b.pHashes = make([]ch.SavedHash, 0, pHashCount)
for i := range hashes.Hashes {
bmhash := NewbmHash(hashes.Hashes[i])
hash := hashes.Hashes[i].Clone() // Not cloning this will keep strings/slices loaded from json wasting memory
if hashes.Hashes[i].Hash.Kind == goimagehash.AHash {
b.aHashes = append(b.aHashes, bmhash)
b.aHashes = append(b.aHashes, hash)
}
if hashes.Hashes[i].Hash.Kind == goimagehash.DHash {
b.dHashes = append(b.dHashes, bmhash)
b.dHashes = append(b.dHashes, hash)
}
if hashes.Hashes[i].Hash.Kind == goimagehash.PHash {
b.pHashes = append(b.pHashes, bmhash)
b.pHashes = append(b.pHashes, hash)
}
if hashes.Hashes[i].ID == (ID{}) {
if hashes.Hashes[i].ID == (ch.ID{}) {
fmt.Println("Empty ID detected")
panic(hashes.Hashes[i])
}
// TODO: Make loading this more efficient
// All known equal IDs are already mapped we can add any missing ones from hashes
b.ids.InsertID(&bmhash.ID)
b.ids.InsertID(&hash.ID)
}
hashCmp := func(existing, target bmHash) int {
hashCmp := func(existing, target ch.SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(*existing.ID.Domain, *target.ID.Domain),
@ -334,9 +322,9 @@ func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
}
// EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
savedHashes := SavedHashes{
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
func (b *basicMapStorage) EncodeHashes() (*ch.SavedHashes, error) {
savedHashes := ch.SavedHashes{
Hashes: make([]ch.SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
}
// savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
// savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
@ -357,28 +345,28 @@ func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
return &savedHashes, nil
}
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
func (b *basicMapStorage) AssociateIDs(newids []ch.NewIDs) error {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
return b.ids.AssociateIDs(newids)
}
func (b *basicMapStorage) GetIDs(id ID) IDList {
func (b *basicMapStorage) GetIDs(id ch.ID) ch.IDList {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
ids := b.ids.GetIDs(&id)
return ToIDList(ids)
return ch.ToIDList(ids)
}
func NewBasicMapStorage() (HashStorage, error) {
func NewBasicMapStorage() (ch.HashStorage, error) {
storage := &basicMapStorage{
hashMutex: &sync.RWMutex{},
ids: IDMap{
ids: []IDs{},
},
aHashes: []bmHash{},
dHashes: []bmHash{},
pHashes: []bmHash{},
aHashes: []ch.SavedHash{},
dHashes: []ch.SavedHash{},
pHashes: []ch.SavedHash{},
}
return storage, nil
}

View File

@ -1,10 +1,11 @@
package ch
package storage
import (
"fmt"
"slices"
"sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
)
@ -15,10 +16,10 @@ type MapStorage struct {
partialPHash [8]map[uint8][]uint64
}
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (m *MapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []Result
tl timeLog
foundMatches []ch.Result
tl ch.TimeLog
)
m.hashMutex.RLock()
defer m.hashMutex.RUnlock()
@ -26,13 +27,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = m.exactMatches(hashes, max)
tl.logTime("Search Exact")
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
tl.resetTime()
defer tl.logTime("Search Complete")
tl.ResetTime()
defer tl.LogTime("Search Complete")
totalPartialHashes := 0
@ -40,15 +41,18 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
potentialMatches := []uint64{}
for i, partialHash := range SplitHash(searchHash.Hash) {
for i, partialHash := range ch.SplitHash(searchHash.Hash) {
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
}
totalPartialHashes += len(potentialMatches)
mappedIds := map[int]bool{}
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := Hash{match.Hash, searchHash.Kind}
for _, match := range ch.Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := ch.Hash{
Hash: match.Hash,
Kind: searchHash.Kind,
}
index, count := m.findHash(matchedHash)
if count < 1 {
continue
@ -60,7 +64,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
}
mappedIds[idIndex] = true
foundMatches = append(foundMatches, Result{
foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
@ -75,7 +79,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
}
// getCurrentHashes must have a read lock before using
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[uint8][]uint64) {
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]ch.SavedHash, [8]map[uint8][]uint64) {
if kind == goimagehash.AHash {
return m.aHashes, m.partialAHash
}
@ -88,17 +92,17 @@ func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]bmHash, [8]map[u
panic("Unknown hash type: " + kind.String())
}
func (m *MapStorage) MapHashes(hash ImageHash) {
func (m *MapStorage) MapHashes(hash ch.ImageHash) {
m.basicMapStorage.MapHashes(hash)
for _, hash := range hash.Hashes {
_, partialHashes := m.getCurrentHashes(hash.Kind)
for i, partialHash := range SplitHash(hash.Hash) {
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash)
for i, partialHash := range ch.SplitHash(hash.Hash) {
partialHashes[i][partialHash] = ch.Insert(partialHashes[i][partialHash], hash.Hash)
}
}
}
func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
func (m *MapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
@ -117,7 +121,7 @@ func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
return nil
}
func NewMapStorage() (HashStorage, error) {
func NewMapStorage() (ch.HashStorage, error) {
storage := &MapStorage{
basicMapStorage: basicMapStorage{
@ -125,9 +129,9 @@ func NewMapStorage() (HashStorage, error) {
ids: IDMap{
ids: []IDs{},
},
aHashes: []bmHash{},
dHashes: []bmHash{},
pHashes: []bmHash{},
aHashes: []ch.SavedHash{},
dHashes: []ch.SavedHash{},
pHashes: []ch.SavedHash{},
},
partialAHash: newPartialHash(),
partialDHash: newPartialHash(),
@ -149,9 +153,9 @@ func newPartialHash() [8]map[uint8][]uint64 {
}
}
func mapPartialHashes(hashes []bmHash, partialHashMap [8]map[uint8][]uint64) {
func mapPartialHashes(hashes []ch.SavedHash, partialHashMap [8]map[uint8][]uint64) {
for _, savedHash := range hashes {
for i, partialHash := range SplitHash(savedHash.Hash.Hash) {
for i, partialHash := range ch.SplitHash(savedHash.Hash.Hash) {
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
}
}

View File

@ -1,4 +1,4 @@
package ch
package storage
import (
"context"
@ -8,6 +8,7 @@ import (
"log"
"math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
_ "modernc.org/sqlite"
)
@ -26,19 +27,19 @@ type sqliteStorage struct {
idExists *sql.Stmt
}
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID][]ID, error) {
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash ch.Hash) (map[ch.ID][]ch.ID, error) {
if statement == nil {
statement = s.hashExactMatchStatement
}
hashes := map[ID][]ID{}
hashes := map[ch.ID][]ch.ID{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil {
return hashes, err
}
for rows.Next() {
var (
id ID
foundID ID
id ch.ID
foundID ch.ID
)
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
if err != nil {
@ -51,24 +52,24 @@ func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID]
return hashes, nil
}
func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max int, hash Hash) ([]Result, error) {
func (s *sqliteStorage) findPartialHashes(tl ch.TimeLog, statement *sql.Stmt, max int, hash ch.Hash) ([]ch.Result, error) {
if statement == nil {
statement = s.hashPartialMatchStatement
}
hashResults := []Result{}
hashResults := []ch.Result{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil {
return hashResults, err
}
results := map[SavedHash][]ID{}
results := map[ch.SavedHash][]ch.ID{}
for rows.Next() {
var (
tmpHash int64
sqlHash = SavedHash{
Hash: Hash{Kind: hash.Kind},
sqlHash = ch.SavedHash{
Hash: ch.Hash{Kind: hash.Kind},
}
id ID
id ch.ID
)
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
if err != nil {
@ -79,7 +80,7 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
results[sqlHash] = append(results[sqlHash], id)
}
for sqlHash, ids := range results {
res := Result{
res := ch.Result{
Hash: sqlHash.Hash,
ID: sqlHash.ID,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
@ -134,12 +135,12 @@ func (s *sqliteStorage) createIndexes() error {
return nil
}
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (s *sqliteStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []Result
tl timeLog
foundMatches []ch.Result
tl ch.TimeLog
)
tl.resetTime()
tl.ResetTime()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
@ -148,7 +149,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
return foundMatches, err
}
for id, equivalentIDs := range idlist {
foundMatches = append(foundMatches, Result{
foundMatches = append(foundMatches, ch.Result{
Hash: hash,
ID: id,
Distance: 0,
@ -157,7 +158,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
}
}
tl.logTime("Search Exact")
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
@ -170,7 +171,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if err != nil {
return foundMatches, err
}
tl.logTime(fmt.Sprintf("Search partial %v", hash.Kind))
tl.LogTime(fmt.Sprintf("Search partial %v", hash.Kind))
for _, hash := range results {
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
@ -185,7 +186,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
return foundMatches, nil
}
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ch.ImageHash) {
var err error
insertHash := tx.Stmt(s.insertHash)
insertID := tx.Stmt(s.insertID)
@ -234,7 +235,7 @@ func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
}
}
}
func (s *sqliteStorage) MapHashes(hash ImageHash) {
func (s *sqliteStorage) MapHashes(hash ch.ImageHash) {
tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil {
panic(err)
@ -246,7 +247,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
}
}
func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
func (s *sqliteStorage) DecodeHashes(hashes *ch.SavedHashes) error {
return nil
err := s.dropIndexes()
if err != nil {
@ -285,8 +286,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
}
for _, savedHash := range hashes.Hashes {
s.mapHashes(tx, ImageHash{
Hashes: []Hash{savedHash.Hash},
s.mapHashes(tx, ch.ImageHash{
Hashes: []ch.Hash{savedHash.Hash},
ID: savedHash.ID,
})
}
@ -302,8 +303,8 @@ func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
return nil
}
func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
hashes := SavedHashes{}
func (s *sqliteStorage) EncodeHashes() (*ch.SavedHashes, error) {
hashes := ch.SavedHashes{}
tx, err := s.db.Begin()
if err != nil {
return &hashes, err
@ -315,7 +316,7 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
}
for rows.Next() {
var (
hash SavedHash
hash ch.SavedHash
tmpHash int64
)
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
@ -331,11 +332,11 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
}
var (
previousEid int64 = -1
ids []ID
ids []ch.ID
)
for rows.Next() {
var (
id ID
id ch.ID
newEid int64
)
err = rows.Scan(&newEid, &id.Domain, &id.Domain)
@ -348,14 +349,14 @@ func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
if len(ids) > 1 {
hashes.IDs = append(hashes.IDs, ids)
}
ids = make([]ID, 0)
ids = make([]ch.ID, 0)
}
ids = append(ids, id)
}
return &hashes, nil
}
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
func (s *sqliteStorage) AssociateIDs(newIDs []ch.NewIDs) error {
tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil {
panic(err)
@ -397,21 +398,21 @@ func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
return nil
}
func (s *sqliteStorage) GetIDs(id ID) IDList {
var ids []ID
func (s *sqliteStorage) GetIDs(id ch.ID) ch.IDList {
var ids []ch.ID
rows, err := s.idMatchStatement.Query(id.Domain, id.ID)
if err != nil {
return nil
}
for rows.Next() {
var id ID
var id ch.ID
err = rows.Scan(&id.Domain, &id.ID)
if err != nil {
return nil
}
ids = append(ids, id)
}
return ToIDList(ids)
return ch.ToIDList(ids)
}
func (s *sqliteStorage) PrepareStatements() error {
@ -480,7 +481,7 @@ func (s *sqliteStorage) PrepareStatements() error {
return nil
}
func NewSqliteStorage(db, path string) (HashStorage, error) {
func NewSqliteStorage(db, path string) (ch.HashStorage, error) {
sqlite := &sqliteStorage{}
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
if err != nil {

View File

@ -1,6 +1,6 @@
//go:build cgo && !gokrazy
package ch
package storage
import (
_ "github.com/mattn/go-sqlite3"

View File

@ -1,6 +1,6 @@
//go:build !cgo && !gokrazy
package ch
package storage
import (
_ "github.com/ncruces/go-sqlite3/driver"

View File

@ -1,12 +1,13 @@
//go:build !gokrazy
package ch
package storage
import (
"errors"
"fmt"
"math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
"gonum.org/v1/gonum/spatial/vptree"
)
@ -15,14 +16,14 @@ type VPTree struct {
aTree *vptree.Tree
dTree *vptree.Tree
pTree *vptree.Tree
ids map[ID]*[]ID
ids map[ch.ID]*[]ch.ID
aHashes []vptree.Comparable // temporary, only used for vptree creation
dHashes []vptree.Comparable // temporary, only used for vptree creation
pHashes []vptree.Comparable // temporary, only used for vptree creation
}
type VPHash struct {
SavedHash
ch.SavedHash
}
func (h *VPHash) Distance(c vptree.Comparable) float64 {
@ -33,22 +34,22 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash))
}
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (v *VPTree) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
matches []Result
exactMatches []Result
tl timeLog
matches []ch.Result
exactMatches []ch.Result
tl ch.TimeLog
)
tl.resetTime()
defer tl.logTime("Search Complete")
tl.ResetTime()
defer tl.LogTime("Search Complete")
for _, hash := range hashes {
results := vptree.NewDistKeeper(float64(max))
currentTree := v.getCurrentTree(hash.Kind)
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}})
currentTree.NearestSet(results, &VPHash{ch.SavedHash{Hash: hash}})
mappedIds := map[*[]ID]bool{}
mappedIds := map[*[]ch.ID]bool{}
for _, result := range results.Heap {
storedHash := result.Comparable.(*VPHash)
ids := v.ids[storedHash.ID]
@ -57,14 +58,14 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
}
mappedIds[ids] = true
if result.Dist == 0 {
exactMatches = append(exactMatches, Result{
exactMatches = append(exactMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID],
})
} else {
matches = append(matches, Result{
matches = append(matches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
@ -93,11 +94,11 @@ func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
panic("Unknown hash type: " + kind.String())
}
func (v *VPTree) MapHashes(ImageHash) {
func (v *VPTree) MapHashes(ch.ImageHash) {
panic("Not Implemented")
}
func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
func (v *VPTree) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
@ -120,13 +121,13 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
v.pHashes = append(v.pHashes, &VPHash{savedHash})
}
if savedHash.ID == (ID{}) {
if savedHash.ID == (ch.ID{}) {
fmt.Println("Empty ID detected")
panic(savedHash)
}
// All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := v.ids[savedHash.ID]; !ok {
v.ids[savedHash.ID] = &[]ID{savedHash.ID}
v.ids[savedHash.ID] = &[]ch.ID{savedHash.ID}
}
}
@ -144,23 +145,23 @@ func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
}
return nil
}
func (v *VPTree) EncodeHashes() (*SavedHashes, error) {
return &SavedHashes{}, errors.New("Not Implemented")
func (v *VPTree) EncodeHashes() (*ch.SavedHashes, error) {
return &ch.SavedHashes{}, errors.New("Not Implemented")
}
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
func (v *VPTree) AssociateIDs(newIDs []ch.NewIDs) error {
return errors.New("Not Implemented")
}
func (v *VPTree) GetIDs(id ID) IDList {
func (v *VPTree) GetIDs(id ch.ID) ch.IDList {
ids, found := v.ids[id]
if !found {
return nil
}
return ToIDList(*ids)
return ch.ToIDList(*ids)
}
func NewVPStorage() (HashStorage, error) {
func NewVPStorage() (ch.HashStorage, error) {
var err error
v := &VPTree{
aHashes: []vptree.Comparable{},

View File

@ -0,0 +1,13 @@
//go:build gokrazy
package storage
import (
"errors"
ch "gitea.narnian.us/lordwelch/comic-hasher"
)
func NewVPStorage() (ch.HashStorage, error) {
return nil, errors.New("VPTree not available")
}

View File

@ -5,17 +5,17 @@ import (
"time"
)
type timeLog struct {
type TimeLog struct {
total time.Duration
last time.Time
}
func (t *timeLog) resetTime() {
func (t *TimeLog) ResetTime() {
t.total = 0
t.last = time.Now()
}
func (t *timeLog) logTime(log string) {
func (t *TimeLog) LogTime(log string) {
now := time.Now()
diff := now.Sub(t.last)
t.last = now