Update internal hash storage
This commit is contained in:
parent
6452f2e50d
commit
130b7dec4a
262
BasicMap.go
262
BasicMap.go
@ -14,155 +14,215 @@ import (
|
||||
type basicMapStorage struct {
|
||||
hashMutex *sync.RWMutex
|
||||
|
||||
ids map[ID]*[]ID
|
||||
hashes [3][]structHash
|
||||
ids map[ID]*[]ID
|
||||
aHashes []SavedHash
|
||||
dHashes []SavedHash
|
||||
pHashes []SavedHash
|
||||
}
|
||||
|
||||
type structHash struct {
|
||||
hash uint64
|
||||
ids *[]ID
|
||||
}
|
||||
// atleast must have a read lock before using
|
||||
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
||||
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
|
||||
|
||||
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
||||
hashType := int(hashKind) - 1
|
||||
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
for _, storedHash := range b.hashes[hashType] {
|
||||
distance := bits.OnesCount64(searchHash ^ storedHash.hash)
|
||||
mappedIds := map[*[]ID]bool{}
|
||||
for _, storedHash := range *b.getCurrentHashes(kind) {
|
||||
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
|
||||
if distance <= maxDistance {
|
||||
matchingHashes = append(matchingHashes, Result{ToIDList(*storedHash.ids), distance, Hash{storedHash.hash, hashKind}})
|
||||
ids := b.ids[storedHash.ID]
|
||||
if mappedIds[ids] {
|
||||
continue
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
|
||||
}
|
||||
}
|
||||
return matchingHashes
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var foundMatches []Result
|
||||
resetTime()
|
||||
defer logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
)
|
||||
tl.resetTime()
|
||||
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
for _, hash := range hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
b.hashMutex.RLock()
|
||||
index, hashFound := b.findHash(hashType, hash.Hash)
|
||||
if hashFound {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: hash,
|
||||
IDs: ToIDList(*b.hashes[hashType][index].ids),
|
||||
})
|
||||
mappedIds := map[*[]ID]bool{}
|
||||
|
||||
index, count := b.findHash(hash)
|
||||
if count > 0 {
|
||||
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
|
||||
ids := b.ids[storedHash.ID]
|
||||
if mappedIds[ids] {
|
||||
continue
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: storedHash.Hash,
|
||||
IDs: ToIDList(*b.ids[storedHash.ID]),
|
||||
})
|
||||
}
|
||||
}
|
||||
b.hashMutex.RUnlock()
|
||||
|
||||
}
|
||||
|
||||
logTime("Search Exact")
|
||||
// If we have exact matches don't bother with other matches
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
tl.logTime("Search Exact")
|
||||
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
totalPartialHashes := 0
|
||||
|
||||
for _, hash := range hashes {
|
||||
for _, match := range b.Atleast(hash.Kind, max, hash.Hash) {
|
||||
_, alreadyMatched := foundHashes[match.Hash.Hash]
|
||||
if alreadyMatched {
|
||||
continue
|
||||
}
|
||||
foundHashes[match.Hash.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, match)
|
||||
}
|
||||
foundMatches = append(foundMatches, b.atleast(hash.Kind, max, hash.Hash)...)
|
||||
|
||||
}
|
||||
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
// findHash must have a read lock before using
|
||||
func (b *basicMapStorage) findHash(hashType int, hash uint64) (int, bool) {
|
||||
return slices.BinarySearchFunc(b.hashes[hashType], hash, func(e structHash, t uint64) int {
|
||||
return cmp.Compare(e.hash, t)
|
||||
})
|
||||
// getCurrentHashes must have a read lock before using
|
||||
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]SavedHash {
|
||||
if kind == goimagehash.AHash {
|
||||
return &b.aHashes
|
||||
}
|
||||
if kind == goimagehash.DHash {
|
||||
return &b.dHashes
|
||||
}
|
||||
if kind == goimagehash.PHash {
|
||||
return &b.pHashes
|
||||
}
|
||||
panic("Unknown hash type: " + kind.String())
|
||||
}
|
||||
|
||||
// insertHash will take a write lock if the hash is not found
|
||||
func (b *basicMapStorage) insertHash(hashType int, hash uint64, ids *[]ID) {
|
||||
b.hashMutex.RLock()
|
||||
index, hashFound := b.findHash(hashType, hash)
|
||||
b.hashMutex.RUnlock()
|
||||
if hashFound {
|
||||
return
|
||||
// findHash must have a read lock before using
|
||||
// return value is index, count
|
||||
// if count < 1 then no results were found
|
||||
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
|
||||
currentHashes := *b.getCurrentHashes(hash.Kind)
|
||||
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing SavedHash, target Hash) int {
|
||||
return cmp.Compare(existing.Hash.Hash, target.Hash)
|
||||
})
|
||||
if !found {
|
||||
return index, 0
|
||||
}
|
||||
count := 0
|
||||
for i := index + 1; i < len(currentHashes) && currentHashes[i].Hash.Hash == hash.Hash; i++ {
|
||||
count++
|
||||
}
|
||||
return index, count
|
||||
}
|
||||
|
||||
// insertHash must already have a lock
|
||||
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
|
||||
currentHashes := b.getCurrentHashes(hash.Kind)
|
||||
index, count := b.findHash(hash)
|
||||
max := index + count
|
||||
for ; index < max; index++ {
|
||||
if (*currentHashes)[index].ID == id {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
*currentHashes = slices.Insert(*currentHashes, index, SavedHash{hash, id})
|
||||
if _, mapped := b.ids[id]; !mapped {
|
||||
b.ids[id] = &[]ID{id}
|
||||
}
|
||||
b.hashMutex.Lock()
|
||||
b.hashes[hashType] = slices.Insert(b.hashes[hashType], index, structHash{hash, ids})
|
||||
b.hashMutex.Unlock()
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||
b.hashMutex.Lock()
|
||||
defer b.hashMutex.Unlock()
|
||||
for _, ih := range hash.Hashes {
|
||||
var (
|
||||
hashType = int(ih.Kind) - 1
|
||||
)
|
||||
b.hashMutex.RLock()
|
||||
ids, ok := b.ids[hash.ID]
|
||||
b.hashMutex.RUnlock()
|
||||
if !ok {
|
||||
b.hashMutex.Lock()
|
||||
ids = &[]ID{hash.ID}
|
||||
b.ids[hash.ID] = ids
|
||||
b.hashMutex.Unlock()
|
||||
}
|
||||
|
||||
b.insertHash(hashType, ih.Hash, ids)
|
||||
b.insertHash(ih, hash.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeHashes should already have a lock
|
||||
// DecodeHashes must already have a lock
|
||||
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
b.hashes[hashType] = make([]structHash, len(sourceHashes))
|
||||
for savedHash, idlistLocation := range sourceHashes {
|
||||
b.hashes[hashType] = append(b.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
|
||||
for _, id := range hashes.IDs[idlistLocation] {
|
||||
b.ids[id] = &hashes.IDs[idlistLocation]
|
||||
}
|
||||
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
|
||||
|
||||
// Initialize all the known equal IDs
|
||||
for _, ids := range hashes.IDs {
|
||||
for _, id := range ids {
|
||||
b.ids[id] = &ids
|
||||
}
|
||||
}
|
||||
for hashType := range b.hashes {
|
||||
slices.SortFunc(b.hashes[hashType], func(a, b structHash) int {
|
||||
return cmp.Compare(a.hash, b.hash)
|
||||
})
|
||||
|
||||
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
|
||||
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
||||
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
||||
cmp.Compare(existing.ID.ID, target.ID.ID),
|
||||
)
|
||||
})
|
||||
|
||||
// Assume they are probably fairly equally split between hash types
|
||||
b.aHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
||||
b.dHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
||||
b.pHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
|
||||
for _, savedHash := range hashes.Hashes {
|
||||
|
||||
if savedHash.Hash.Kind == goimagehash.AHash {
|
||||
b.aHashes = append(b.aHashes, savedHash)
|
||||
}
|
||||
if savedHash.Hash.Kind == goimagehash.DHash {
|
||||
b.dHashes = append(b.dHashes, savedHash)
|
||||
}
|
||||
if savedHash.Hash.Kind == goimagehash.PHash {
|
||||
b.pHashes = append(b.pHashes, savedHash)
|
||||
}
|
||||
|
||||
if savedHash.ID == (ID{}) {
|
||||
fmt.Println("Empty ID detected")
|
||||
panic(savedHash)
|
||||
}
|
||||
// All known equal IDs are already mapped we can add any missing ones from hashes
|
||||
if _, ok := b.ids[savedHash.ID]; !ok {
|
||||
b.ids[savedHash.ID] = &[]ID{savedHash.ID}
|
||||
}
|
||||
}
|
||||
|
||||
hashCmp := func(existing, target SavedHash) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
||||
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
||||
cmp.Compare(existing.ID.ID, target.ID.ID),
|
||||
)
|
||||
}
|
||||
slices.SortFunc(b.aHashes, hashCmp)
|
||||
slices.SortFunc(b.dHashes, hashCmp)
|
||||
slices.SortFunc(b.pHashes, hashCmp)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// EncodeHashes should already have a lock
|
||||
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
||||
hashes := SavedHashes{
|
||||
Hashes: [3]map[uint64]int{
|
||||
make(map[uint64]int),
|
||||
make(map[uint64]int),
|
||||
make(map[uint64]int),
|
||||
},
|
||||
savedHashes := SavedHashes{
|
||||
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
|
||||
}
|
||||
idmap := map[*[]ID]int{}
|
||||
|
||||
// Only keep groups >1 as they will be mapped in SavedHashes.Hashes
|
||||
for _, ids := range b.ids {
|
||||
if _, ok := idmap[ids]; ok {
|
||||
continue
|
||||
if len(*ids) > 1 {
|
||||
savedHashes.IDs = append(savedHashes.IDs, *ids)
|
||||
}
|
||||
idmap[ids] = len(hashes.IDs)
|
||||
hashes.IDs = append(hashes.IDs, *ids)
|
||||
}
|
||||
|
||||
for hashType, hashToID := range b.hashes {
|
||||
for _, hash := range hashToID {
|
||||
hashes.Hashes[hashType][hash.hash] = idmap[hash.ids]
|
||||
}
|
||||
}
|
||||
return hashes, nil
|
||||
savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
|
||||
savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
|
||||
savedHashes.Hashes = append(savedHashes.Hashes, b.pHashes...)
|
||||
|
||||
return savedHashes, nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
|
||||
@ -171,7 +231,7 @@ func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
|
||||
ids, found := b.ids[newid.OldID]
|
||||
b.hashMutex.RUnlock()
|
||||
if !found {
|
||||
msg := "No IDs belonging to " + string(newid.OldID.Domain) + " exist on this server"
|
||||
msg := "ID not found on this server"
|
||||
return errors.New(msg)
|
||||
}
|
||||
b.hashMutex.Lock()
|
||||
@ -195,7 +255,9 @@ func NewBasicMapStorage() (HashStorage, error) {
|
||||
storage := &basicMapStorage{
|
||||
hashMutex: &sync.RWMutex{},
|
||||
ids: make(map[ID]*[]ID),
|
||||
hashes: [3][]structHash{},
|
||||
aHashes: []SavedHash{},
|
||||
dHashes: []SavedHash{},
|
||||
pHashes: []SavedHash{},
|
||||
}
|
||||
return storage, nil
|
||||
}
|
||||
|
113
CHDB.go
113
CHDB.go
@ -1,108 +1,11 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
type CHDB struct {
|
||||
comicvinePath string
|
||||
sql *sql.DB
|
||||
deleteExisting bool
|
||||
}
|
||||
|
||||
func OpenCHDB(path string, comicvinePath string, deleteExisting bool) (CHDB, error) {
|
||||
path, _ = filepath.Abs(path)
|
||||
err := os.MkdirAll(filepath.Dir(path), 0o755)
|
||||
if err != nil {
|
||||
panic("Unable to create directory " + filepath.Dir(path))
|
||||
}
|
||||
println(fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||
sql, err := sql.Open("sqlite", fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||
if err != nil {
|
||||
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||
}
|
||||
err = sql.Ping()
|
||||
if err != nil {
|
||||
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||
}
|
||||
_, err = sql.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS paths(
|
||||
path STRING PRIMARY KEY
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS bad_urls(
|
||||
url STRING PRIMARY KEY
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Failed to create table: %w", err)
|
||||
}
|
||||
return CHDB{comicvinePath, sql, deleteExisting}, err
|
||||
}
|
||||
|
||||
func (s CHDB) PathHashed(path string) bool {
|
||||
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||
dbPath := ""
|
||||
|
||||
if s.deleteExisting {
|
||||
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
|
||||
|
||||
if dbPath == path {
|
||||
os.Remove(filepath.Join(s.comicvinePath, path))
|
||||
}
|
||||
return dbPath == path
|
||||
}
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", path).Scan(&count)
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func (s CHDB) PathDownloaded(path string) bool {
|
||||
relPath, _ := filepath.Rel(s.comicvinePath, path)
|
||||
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", relPath).Scan(&count)
|
||||
if count != 1 {
|
||||
f, err := os.Open(path)
|
||||
if err == nil {
|
||||
defer f.Close()
|
||||
}
|
||||
return !os.IsNotExist(err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (s CHDB) AddPath(path string) {
|
||||
relPath, _ := filepath.Rel(s.comicvinePath, path)
|
||||
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath)
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err))
|
||||
}
|
||||
|
||||
if s.deleteExisting {
|
||||
_ = os.Remove(path)
|
||||
_ = RmdirP(filepath.Dir(path))
|
||||
}
|
||||
}
|
||||
|
||||
func (s CHDB) CheckURL(url string) bool {
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(url) FROM bad_urls where url=?", url).Scan(&count)
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func (s CHDB) AddURL(url string) {
|
||||
_, err := s.sql.Exec("INSERT INTO bad_urls VALUES(?) ON CONFLICT DO NOTHING", url)
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
|
||||
}
|
||||
}
|
||||
|
||||
func (s CHDB) Close() error {
|
||||
return s.sql.Close()
|
||||
type CHDB interface {
|
||||
// OpenCHDB(path string, comicvinePath string, deleteExisting bool) (CHDB, error)
|
||||
PathHashed(path string) bool
|
||||
PathDownloaded(path string) bool
|
||||
AddPath(path string)
|
||||
CheckURL(url string) bool
|
||||
AddURL(url string)
|
||||
Close() error
|
||||
}
|
||||
|
177
CHDB_bolt.go
Normal file
177
CHDB_bolt.go
Normal file
@ -0,0 +1,177 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
type CHDBBolt struct {
|
||||
comicvinePath string
|
||||
db *bolt.DB
|
||||
deleteExisting bool
|
||||
}
|
||||
|
||||
func OpenCHDBBolt(path string, comicvinePath string, deleteExisting bool) (CHDBBolt, error) {
|
||||
path, _ = filepath.Abs(path)
|
||||
err := os.MkdirAll(filepath.Dir(path), 0o755)
|
||||
if err != nil {
|
||||
panic("Unable to create directory " + filepath.Dir(path))
|
||||
}
|
||||
db, err := bolt.Open(path, 0o644, nil)
|
||||
if err != nil {
|
||||
return CHDBBolt{comicvinePath, db, deleteExisting}, fmt.Errorf("failed to open database: %w", err)
|
||||
}
|
||||
err = db.Update(func(tx *bolt.Tx) error {
|
||||
|
||||
_, err = tx.CreateBucketIfNotExists([]byte("paths"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create bucket %v: %w", "paths", err)
|
||||
}
|
||||
_, err = tx.CreateBucketIfNotExists([]byte("bad_urls"))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create bucket %v: %w", "paths", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
db.Close()
|
||||
return CHDBBolt{comicvinePath, db, deleteExisting}, fmt.Errorf("failed to init database: %w", err)
|
||||
}
|
||||
|
||||
return CHDBBolt{comicvinePath, db, deleteExisting}, nil
|
||||
}
|
||||
|
||||
func (c CHDBBolt) Import(paths []string, bad_urls []string) {
|
||||
slices.Sort(paths)
|
||||
slices.Sort(bad_urls)
|
||||
c.db.Update(func(tx *bolt.Tx) error {
|
||||
p := tx.Bucket([]byte("paths"))
|
||||
b := tx.Bucket([]byte("bad_urls"))
|
||||
|
||||
for _, path := range paths {
|
||||
p.Put([]byte(path), []byte{})
|
||||
}
|
||||
for _, url := range bad_urls {
|
||||
b.Put([]byte(url), []byte{})
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
func (c CHDBBolt) Dump() (paths []string, bad_urls []string) {
|
||||
|
||||
c.db.View(func(tx *bolt.Tx) error {
|
||||
p := tx.Bucket([]byte("paths"))
|
||||
b := tx.Bucket([]byte("bad_urls"))
|
||||
paths = make([]string, 0, p.Inspect().KeyN)
|
||||
bad_urls = make([]string, 0, b.Inspect().KeyN)
|
||||
b.ForEach(func(k, v []byte) error {
|
||||
bad_urls = append(bad_urls, string(k)+"")
|
||||
return nil
|
||||
})
|
||||
p.ForEach(func(k, v []byte) error {
|
||||
paths = append(paths, string(k)+"")
|
||||
return nil
|
||||
})
|
||||
return nil
|
||||
})
|
||||
return paths, bad_urls
|
||||
}
|
||||
|
||||
func (c CHDBBolt) PathHashed(path string) bool {
|
||||
path, _ = filepath.Rel(c.comicvinePath, path)
|
||||
|
||||
tx, err := c.db.Begin(false)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer tx.Rollback()
|
||||
b := tx.Bucket([]byte("paths"))
|
||||
dbRes := b.Get([]byte(path))
|
||||
if dbRes != nil {
|
||||
if c.deleteExisting {
|
||||
os.Remove(filepath.Join(c.comicvinePath, path))
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (c CHDBBolt) PathDownloaded(path string) bool {
|
||||
relPath, _ := filepath.Rel(c.comicvinePath, path)
|
||||
|
||||
tx, err := c.db.Begin(false)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer tx.Rollback()
|
||||
b := tx.Bucket([]byte("paths"))
|
||||
dbRes := b.Get([]byte(relPath))
|
||||
if dbRes == nil {
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err == nil {
|
||||
defer f.Close()
|
||||
}
|
||||
return !os.IsNotExist(err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (c CHDBBolt) AddPath(path string) {
|
||||
relPath, _ := filepath.Rel(c.comicvinePath, path)
|
||||
|
||||
tx, err := c.db.Begin(true)
|
||||
if err != nil {
|
||||
c.db.Logger().Errorf("Failed to open transaction: %v", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
b := tx.Bucket([]byte("paths"))
|
||||
|
||||
err = b.Put([]byte(relPath), []byte{})
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v (%v) into paths: %w", path, relPath, err))
|
||||
}
|
||||
tx.Commit()
|
||||
if c.deleteExisting {
|
||||
_ = os.Remove(path)
|
||||
_ = RmdirP(filepath.Dir(path))
|
||||
}
|
||||
}
|
||||
|
||||
func (c CHDBBolt) CheckURL(url string) bool {
|
||||
|
||||
tx, err := c.db.Begin(true)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer tx.Rollback()
|
||||
b := tx.Bucket([]byte("bad_urls"))
|
||||
return b.Get([]byte(url)) != nil
|
||||
}
|
||||
|
||||
func (c CHDBBolt) AddURL(url string) {
|
||||
|
||||
tx, err := c.db.Begin(true)
|
||||
if err != nil {
|
||||
c.db.Logger().Errorf("Failed to open transaction: %v", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
b := tx.Bucket([]byte("bad_urls"))
|
||||
|
||||
err = b.Put([]byte(url), []byte{})
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
|
||||
}
|
||||
tx.Commit()
|
||||
}
|
||||
|
||||
func (c CHDBBolt) Close() error {
|
||||
return c.db.Close()
|
||||
}
|
142
CHDB_sqlite.go
Normal file
142
CHDB_sqlite.go
Normal file
@ -0,0 +1,142 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
type CHDBSqlite struct {
|
||||
comicvinePath string
|
||||
sql *sql.DB
|
||||
deleteExisting bool
|
||||
}
|
||||
|
||||
func OpenCHDBSqlite(path string, comicvinePath string, deleteExisting bool) (CHDBSqlite, error) {
|
||||
path, _ = filepath.Abs(path)
|
||||
err := os.MkdirAll(filepath.Dir(path), 0o755)
|
||||
if err != nil {
|
||||
panic("Unable to create directory " + filepath.Dir(path))
|
||||
}
|
||||
println(fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||
sql, err := sql.Open("sqlite", fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||
if err != nil {
|
||||
return CHDBSqlite{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||
}
|
||||
err = sql.Ping()
|
||||
if err != nil {
|
||||
return CHDBSqlite{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||
}
|
||||
_, err = sql.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS paths(
|
||||
path STRING PRIMARY KEY
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS bad_urls(
|
||||
url STRING PRIMARY KEY
|
||||
);
|
||||
`)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Failed to create table: %w", err)
|
||||
}
|
||||
return CHDBSqlite{comicvinePath, sql, deleteExisting}, err
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) Dump() (paths []string, bad_urls []string) {
|
||||
|
||||
rows, err := s.sql.Query("SELECT path from paths")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var value string
|
||||
err = rows.Scan(&value)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
paths = append(paths, value)
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
rows, err = s.sql.Query("SELECT url from bad_urls")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var value string
|
||||
err = rows.Scan(&value)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
bad_urls = append(bad_urls, value)
|
||||
}
|
||||
rows.Close()
|
||||
return paths, bad_urls
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) PathHashed(path string) bool {
|
||||
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||
dbPath := ""
|
||||
|
||||
if s.deleteExisting {
|
||||
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
|
||||
|
||||
if dbPath == path {
|
||||
os.Remove(filepath.Join(s.comicvinePath, path))
|
||||
}
|
||||
return dbPath == path
|
||||
}
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", path).Scan(&count)
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) PathDownloaded(path string) bool {
|
||||
relPath, _ := filepath.Rel(s.comicvinePath, path)
|
||||
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", relPath).Scan(&count)
|
||||
if count != 1 {
|
||||
f, err := os.Open(path)
|
||||
if err == nil {
|
||||
defer f.Close()
|
||||
}
|
||||
return !os.IsNotExist(err)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) AddPath(path string) {
|
||||
relPath, _ := filepath.Rel(s.comicvinePath, path)
|
||||
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath)
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err))
|
||||
}
|
||||
|
||||
if s.deleteExisting {
|
||||
_ = os.Remove(path)
|
||||
_ = RmdirP(filepath.Dir(path))
|
||||
}
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) CheckURL(url string) bool {
|
||||
count := 0
|
||||
_ = s.sql.QueryRow("SELECT count(url) FROM bad_urls where url=?", url).Scan(&count)
|
||||
return count > 0
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) AddURL(url string) {
|
||||
_, err := s.sql.Exec("INSERT INTO bad_urls VALUES(?) ON CONFLICT DO NOTHING", url)
|
||||
if err != nil {
|
||||
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
|
||||
}
|
||||
}
|
||||
|
||||
func (s CHDBSqlite) Close() error {
|
||||
return s.sql.Close()
|
||||
}
|
31
cmd/bolt-migrate/main.go
Normal file
31
cmd/bolt-migrate/main.go
Normal file
@ -0,0 +1,31 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("cv path: %s Sqlite path: %s Bolt path: %s\n", os.Args[1], os.Args[2], os.Args[3])
|
||||
sql, err := ch.OpenCHDBSqlite(os.Args[2], os.Args[1], false)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
db, err := ch.OpenCHDBBolt(os.Args[3], os.Args[1], false)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
paths, bad_urls := sql.Dump()
|
||||
fmt.Printf("Dumped %d %d", len(paths), len(bad_urls))
|
||||
db.Import(paths, bad_urls)
|
||||
// for _, path := range paths {
|
||||
// db.AddPath(filepath.Join(os.Args[1], path))
|
||||
// }
|
||||
// for _, url := range bad_urls {
|
||||
// db.AddURL(url)
|
||||
// }
|
||||
sql.Close()
|
||||
db.Close()
|
||||
}
|
@ -29,10 +29,9 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
"github.com/kr/pretty"
|
||||
|
||||
"github.com/vmihailenco/msgpack/v5"
|
||||
|
||||
_ "golang.org/x/image/tiff"
|
||||
_ "golang.org/x/image/vp8"
|
||||
_ "golang.org/x/image/vp8l"
|
||||
@ -57,23 +56,6 @@ type Server struct {
|
||||
onlyHashNewIDs bool
|
||||
}
|
||||
|
||||
type Format int
|
||||
|
||||
const (
|
||||
Msgpack = iota + 1
|
||||
JSON
|
||||
)
|
||||
|
||||
var formatNames = map[Format]string{
|
||||
JSON: "json",
|
||||
Msgpack: "msgpack",
|
||||
}
|
||||
|
||||
var formatValues = map[string]Format{
|
||||
"json": JSON,
|
||||
"msgpack": Msgpack,
|
||||
}
|
||||
|
||||
var bufPool = &sync.Pool{
|
||||
New: func() any {
|
||||
// The Pool's New function should generally only return pointer
|
||||
@ -83,22 +65,6 @@ var bufPool = &sync.Pool{
|
||||
},
|
||||
}
|
||||
|
||||
func (f Format) String() string {
|
||||
if name, known := formatNames[f]; known {
|
||||
return name
|
||||
}
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
func (f *Format) Set(s string) error {
|
||||
if format, known := formatValues[strings.ToLower(s)]; known {
|
||||
*f = format
|
||||
} else {
|
||||
return fmt.Errorf("Unknown format: %d", f)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type Storage int
|
||||
|
||||
const (
|
||||
@ -141,8 +107,6 @@ func (f *Storage) Set(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type Encoder func(any) ([]byte, error)
|
||||
type Decoder func([]byte, interface{}) error
|
||||
type CVOpts struct {
|
||||
downloadCovers bool
|
||||
APIKey string
|
||||
@ -158,7 +122,7 @@ type Opts struct {
|
||||
sqlitePath string
|
||||
loadEmbeddedHashes bool
|
||||
saveEmbeddedHashes bool
|
||||
format Format
|
||||
format ch.Format
|
||||
hashesPath string
|
||||
storageType Storage
|
||||
onlyHashNewIDs bool
|
||||
@ -169,7 +133,7 @@ type Opts struct {
|
||||
}
|
||||
|
||||
func main() {
|
||||
opts := Opts{format: Msgpack, storageType: BasicMap} // flag is weird
|
||||
opts := Opts{format: ch.Msgpack, storageType: BasicMap} // flag is weird
|
||||
wd, err := os.Getwd()
|
||||
fmt.Println(err)
|
||||
if err != nil {
|
||||
@ -208,13 +172,11 @@ func main() {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
// opts.onlyHashNewIDs = opts.onlyHashNewIDs || opts.deleteHashedImages
|
||||
if opts.cv.downloadCovers {
|
||||
if opts.cv.APIKey == "" {
|
||||
log.Fatal("No ComicVine API Key provided")
|
||||
}
|
||||
}
|
||||
opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && (opts.deleteHashedImages || !opts.cv.keepDownloaded))
|
||||
opts.path, _ = filepath.Abs(opts.path)
|
||||
if opts.hashesPath == "" {
|
||||
opts.hashesPath = filepath.Join(opts.path, "hashes.gz")
|
||||
@ -230,9 +192,7 @@ func main() {
|
||||
opts.cv.path, _ = filepath.Abs(opts.cv.path)
|
||||
pretty.Log(opts)
|
||||
|
||||
if !opts.cv.keepDownloaded && opts.onlyHashNewIDs {
|
||||
panic("You need to fix your -cv-keep-downloaded and -only-hash-new-ids flags")
|
||||
}
|
||||
// TODO: Fix options
|
||||
|
||||
startServer(opts)
|
||||
}
|
||||
@ -553,9 +513,7 @@ func (s *Server) hasher(workerID int, done func(int)) {
|
||||
}
|
||||
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
return
|
||||
// TODO: Check channel pipelines
|
||||
case s.mappingQueue <- hash:
|
||||
default:
|
||||
}
|
||||
@ -589,59 +547,12 @@ func (s *Server) reader(workerID int, done func(i int)) {
|
||||
NewOnly: s.onlyHashNewIDs,
|
||||
}
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
return
|
||||
case s.hashingQueue <- im:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// EncodeHashes must have a lock to s.hashMutex
|
||||
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
|
||||
var encoder Encoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
encoder = msgpack.Marshal
|
||||
case JSON:
|
||||
encoder = json.Marshal
|
||||
default:
|
||||
return nil, fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
hashes, err := s.hashes.EncodeHashes()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return encoder(hashes)
|
||||
}
|
||||
|
||||
// DecodeHashes must have a lock to s.hashMutex
|
||||
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
|
||||
var decoder Decoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
decoder = msgpack.Unmarshal
|
||||
case JSON:
|
||||
decoder = json.Unmarshal
|
||||
|
||||
default:
|
||||
return fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
loadedHashes := ch.SavedHashes{}
|
||||
err := decoder(hashes, &loadedHashes)
|
||||
if err != nil || len(loadedHashes.Hashes[0]) == 0 {
|
||||
fmt.Println("Failed to load hashes, checking if they are old hashes", format, ":", err)
|
||||
oldHashes := make(ch.OldSavedHashes)
|
||||
if err = decoder(hashes, &oldHashes); err != nil {
|
||||
return err
|
||||
}
|
||||
loadedHashes = ch.ConvertSavedHashes(oldHashes)
|
||||
}
|
||||
|
||||
return s.hashes.DecodeHashes(loadedHashes)
|
||||
}
|
||||
|
||||
func (s *Server) HashLocalImages(opts Opts) {
|
||||
if opts.coverPath == "" {
|
||||
return
|
||||
@ -700,28 +611,17 @@ func initializeStorage(opts Opts) (ch.HashStorage, error) {
|
||||
return nil, errors.New("Unknown storage type provided")
|
||||
}
|
||||
|
||||
func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error) {
|
||||
func loadHashes(opts Opts) *ch.SavedHashes {
|
||||
var hashes []byte
|
||||
if opts.loadEmbeddedHashes && len(ch.Hashes) != 0 {
|
||||
fmt.Println("Loading embedded hashes")
|
||||
var err error
|
||||
hashes := ch.Hashes
|
||||
hashes = ch.Hashes
|
||||
if gr, err := gzip.NewReader(bytes.NewReader(ch.Hashes)); err == nil {
|
||||
hashes, err = io.ReadAll(gr)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to read embedded hashes: %s", err))
|
||||
}
|
||||
}
|
||||
|
||||
var format Format
|
||||
for _, format = range []Format{Msgpack, JSON} {
|
||||
if err = decodeHashes(format, hashes); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded embedded %s hashes\n", format)
|
||||
} else {
|
||||
fmt.Println("Loading saved hashes")
|
||||
if f, err := os.Open(opts.hashesPath); err == nil {
|
||||
@ -731,64 +631,67 @@ func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error
|
||||
} else {
|
||||
_, _ = f.Seek(0, io.SeekStart)
|
||||
}
|
||||
hashes, err := io.ReadAll(buf)
|
||||
hashes, err = io.ReadAll(buf)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to load hashes from disk: %s", err))
|
||||
}
|
||||
|
||||
var format Format
|
||||
for _, format = range []Format{Msgpack, JSON} {
|
||||
if err = decodeHashes(format, hashes); err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded %s hashes from %q\n", format, opts.hashesPath)
|
||||
} else {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
log.Println("No saved hashes to load")
|
||||
} else {
|
||||
log.Println("Unable to load saved hashes", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
format ch.Format
|
||||
loadedHashes *ch.SavedHashes
|
||||
err error
|
||||
)
|
||||
for _, format = range []ch.Format{ch.Msgpack, ch.JSON} {
|
||||
if loadedHashes, err = ch.DecodeHashes(format, hashes); errors.Is(err, ch.DecodeError) {
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode hashes: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded %s hashes\n", format)
|
||||
return loadedHashes
|
||||
}
|
||||
func saveHashes(opts Opts, encodeHashes func(format Format) ([]byte, error)) {
|
||||
if !opts.loadEmbeddedHashes || opts.saveEmbeddedHashes {
|
||||
encodedHashes, err := encodeHashes(opts.format)
|
||||
if err == nil {
|
||||
if f, err := os.Create(opts.hashesPath); err == nil {
|
||||
failed := false
|
||||
gzw := gzip.NewWriter(f)
|
||||
_, err := gzw.Write(encodedHashes)
|
||||
if err != nil {
|
||||
log.Println("Failed to write hashes", err)
|
||||
failed = true
|
||||
}
|
||||
err = gzw.Close()
|
||||
if err != nil {
|
||||
log.Println("Failed to write hashes", err)
|
||||
failed = true
|
||||
}
|
||||
err = f.Close()
|
||||
if err != nil {
|
||||
log.Println("Failed to write hashes", err)
|
||||
failed = true
|
||||
}
|
||||
if !failed {
|
||||
log.Println("Successfully saved hashes")
|
||||
}
|
||||
} else {
|
||||
log.Println("Unabled to save hashes", err)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Unable to encode hashes as %v: %v", opts.format, err)
|
||||
}
|
||||
func saveHashes(opts Opts, hashes ch.SavedHashes) error {
|
||||
if opts.loadEmbeddedHashes && !opts.saveEmbeddedHashes {
|
||||
return errors.New("refusing to save embedded hashes")
|
||||
}
|
||||
|
||||
encodedHashes, err := ch.EncodeHashes(hashes, opts.format)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to encode hashes as %v: %w", opts.format, err)
|
||||
}
|
||||
f, err := os.Create(opts.hashesPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unabled to save hashes: %w", err)
|
||||
}
|
||||
|
||||
gzw := gzip.NewWriter(f)
|
||||
|
||||
if _, err = gzw.Write(encodedHashes); err != nil {
|
||||
return fmt.Errorf("failed to write hashes: %w", err)
|
||||
}
|
||||
|
||||
if err = gzw.Close(); err != nil {
|
||||
return fmt.Errorf("failed to write hashes: %w", err)
|
||||
}
|
||||
|
||||
if err = f.Close(); err != nil {
|
||||
return fmt.Errorf("failed to write hashes: %w", err)
|
||||
}
|
||||
log.Println("Successfully saved hashes")
|
||||
return nil
|
||||
}
|
||||
|
||||
func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, server Server) {
|
||||
@ -803,7 +706,6 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
||||
}
|
||||
|
||||
if chdb.PathHashed(path.Dest) {
|
||||
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
|
||||
continue
|
||||
}
|
||||
var (
|
||||
@ -832,7 +734,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
||||
}
|
||||
continue // skip this image
|
||||
}
|
||||
chdb.AddPath(path.Dest) // Add to sqlite db and remove file if opts.deleteHashedImages is true
|
||||
chdb.AddPath(path.Dest) // Add to db and remove file if opts.deleteHashedImages is true
|
||||
|
||||
im := ch.Im{
|
||||
Im: i,
|
||||
@ -845,6 +747,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
||||
}
|
||||
|
||||
func startServer(opts Opts) {
|
||||
imaging.SetMaxProcs(2)
|
||||
if opts.cpuprofile != "" {
|
||||
f, err := os.Create(opts.cpuprofile)
|
||||
if err != nil {
|
||||
@ -904,31 +807,37 @@ func startServer(opts Opts) {
|
||||
mwg.Add(1)
|
||||
go server.mapper(func() { log.Println("Mapper 0 completed"); mwg.Done() })
|
||||
|
||||
// server.DecodeHashes would normally need a write lock
|
||||
// DecodeHashes would normally need a write lock
|
||||
// nothing else has been started yet so we don't need one
|
||||
loadHashes(opts, server.DecodeHashes)
|
||||
if err := server.hashes.DecodeHashes(*loadHashes(opts)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
server.HashLocalImages(opts)
|
||||
chdb, err := ch.OpenCHDB(filepath.Join(opts.path, "ch.sqlite"), opts.cv.path, opts.deleteHashedImages)
|
||||
chdb, err := ch.OpenCHDBBolt(filepath.Join(opts.path, "chdb.bolt"), opts.cv.path, opts.deleteHashedImages)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
log.Println("Init downloaders")
|
||||
dwg := sync.WaitGroup{}
|
||||
dcwg := sync.WaitGroup{}
|
||||
finishedDownloadQueue := make(chan cv.Download, 1)
|
||||
go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
|
||||
dcwg.Add(1)
|
||||
go func() {
|
||||
defer dcwg.Done()
|
||||
downloadProcessor(chdb, opts, finishedDownloadQueue, server)
|
||||
}()
|
||||
|
||||
if opts.cv.downloadCovers {
|
||||
dwg.Add(1)
|
||||
imageTypes := []string{}
|
||||
if opts.cv.thumbOnly {
|
||||
imageTypes = append(imageTypes, "thumb_url")
|
||||
}
|
||||
if opts.cv.originalOnly {
|
||||
} else if opts.cv.originalOnly {
|
||||
imageTypes = append(imageTypes, "original_url")
|
||||
}
|
||||
cvdownloader := cv.NewCVDownloader(server.Context, bufPool, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue)
|
||||
cvdownloader := cv.NewCVDownloader(server.Context, bufPool, opts.onlyHashNewIDs, server.hashes.GetIDs, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue)
|
||||
go func() {
|
||||
defer dwg.Done()
|
||||
cv.DownloadCovers(cvdownloader)
|
||||
@ -954,7 +863,8 @@ func startServer(opts Opts) {
|
||||
close(server.readerQueue)
|
||||
log.Println("waiting on readers")
|
||||
rwg.Wait()
|
||||
for range server.readerQueue {
|
||||
for dw := range server.readerQueue {
|
||||
fmt.Println("Skipping read", dw)
|
||||
}
|
||||
|
||||
log.Println("waiting on downloaders")
|
||||
@ -962,28 +872,39 @@ func startServer(opts Opts) {
|
||||
|
||||
log.Println("waiting on downloader")
|
||||
close(finishedDownloadQueue)
|
||||
for range finishedDownloadQueue {
|
||||
dcwg.Wait() // Wait for the download processor to finish
|
||||
for dw := range finishedDownloadQueue {
|
||||
fmt.Println("Skipping download", dw.IssueID)
|
||||
}
|
||||
|
||||
// close(server.hashingQueue) // Closed by downloadProcessor
|
||||
log.Println("waiting on hashers")
|
||||
hwg.Wait()
|
||||
for range server.hashingQueue {
|
||||
for dw := range server.hashingQueue {
|
||||
fmt.Println("Skipping hashing", dw.ID)
|
||||
}
|
||||
|
||||
close(server.mappingQueue)
|
||||
log.Println("waiting on mapper")
|
||||
mwg.Wait()
|
||||
for range server.mappingQueue {
|
||||
for dw := range server.mappingQueue {
|
||||
fmt.Println("Skipping mapping", dw.ID)
|
||||
}
|
||||
|
||||
close(server.signalQueue)
|
||||
for range server.signalQueue {
|
||||
for dw := range server.signalQueue {
|
||||
fmt.Println("Skipping", dw)
|
||||
}
|
||||
|
||||
_ = chdb.Close()
|
||||
|
||||
// server.EncodeHashes would normally need a read lock
|
||||
// the server has been stopped so it's not needed here
|
||||
saveHashes(opts, server.EncodeHashes)
|
||||
hashes, err := server.hashes.EncodeHashes()
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Failed to save hashes: %w", err))
|
||||
}
|
||||
if err = saveHashes(opts, hashes); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
98
cv/cv.go
98
cv/cv.go
@ -73,14 +73,16 @@ type CVDownloader struct {
|
||||
Context context.Context
|
||||
FinishedDownloadQueue chan Download
|
||||
|
||||
fileList []string
|
||||
totalResults int
|
||||
imageWG sync.WaitGroup
|
||||
downloadQueue chan *CVResult
|
||||
imageDownloads chan download
|
||||
notFound chan download
|
||||
chdb ch.CHDB
|
||||
bufPool *sync.Pool
|
||||
fileList []string
|
||||
totalResults int
|
||||
imageWG sync.WaitGroup
|
||||
downloadQueue chan *CVResult
|
||||
imageDownloads chan download
|
||||
notFound chan download
|
||||
chdb ch.CHDB
|
||||
bufPool *sync.Pool
|
||||
get_id func(id ch.ID) ch.IDList
|
||||
only_hash_new_ids bool
|
||||
}
|
||||
|
||||
var (
|
||||
@ -128,8 +130,8 @@ func (c *CVDownloader) loadIssues(filename string) (*CVResult, error) {
|
||||
return tmp, nil
|
||||
}
|
||||
|
||||
func Get(ctx context.Context, url string) (*http.Response, error, func()) {
|
||||
ctx, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||
func Get(url string) (*http.Response, error, func()) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*20)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err, cancel
|
||||
@ -144,7 +146,7 @@ func getOffset(name string) int {
|
||||
}
|
||||
|
||||
// updateIssues c.downloadQueue must not be closed before this function has returned
|
||||
func (c *CVDownloader) updateIssues() {
|
||||
func (c *CVDownloader) updateIssues() int {
|
||||
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,image,volume")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@ -183,7 +185,7 @@ func (c *CVDownloader) updateIssues() {
|
||||
for offset = 0; offset <= c.totalResults; offset += 100 {
|
||||
index := offset / 100
|
||||
if c.hasQuit() {
|
||||
return
|
||||
return offset - 100
|
||||
}
|
||||
if index < len(c.fileList) {
|
||||
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
|
||||
@ -195,7 +197,7 @@ func (c *CVDownloader) updateIssues() {
|
||||
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
|
||||
if index != len(c.fileList)-1 {
|
||||
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
|
||||
return
|
||||
return offset - 100
|
||||
}
|
||||
log.Println("Deleting the last page to detect new comics")
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
@ -231,7 +233,7 @@ func (c *CVDownloader) updateIssues() {
|
||||
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
|
||||
if index != len(c.fileList)-1 {
|
||||
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
|
||||
return
|
||||
return offset - 100
|
||||
}
|
||||
log.Println("Deleting the last page to detect new comics")
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
@ -255,17 +257,17 @@ func (c *CVDownloader) updateIssues() {
|
||||
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
return
|
||||
return offset - 100
|
||||
case <-time.After(10 * time.Second):
|
||||
}
|
||||
resp, err, cancelDownloadCTX := Get(c.Context, URI.String())
|
||||
resp, err, cancelDownloadCTX := Get(URI.String())
|
||||
if err != nil {
|
||||
cancelDownloadCTX()
|
||||
if retry(URI.String(), err) {
|
||||
continue
|
||||
}
|
||||
// Fail and let comic-hasher try the whole thing again later
|
||||
return
|
||||
return offset - 100
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
cancelDownloadCTX()
|
||||
@ -277,7 +279,7 @@ func (c *CVDownloader) updateIssues() {
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
_ = resp.Body.Close()
|
||||
return
|
||||
return offset - 100
|
||||
case <-time.After(1 * time.Hour):
|
||||
}
|
||||
}
|
||||
@ -295,7 +297,7 @@ func (c *CVDownloader) updateIssues() {
|
||||
if retry(URI.String(), err) {
|
||||
continue
|
||||
}
|
||||
return
|
||||
return offset - 100
|
||||
}
|
||||
cancelDownloadCTX()
|
||||
if issue.NumberOfTotalResults > c.totalResults {
|
||||
@ -303,15 +305,13 @@ func (c *CVDownloader) updateIssues() {
|
||||
}
|
||||
prev = -1
|
||||
failCount = 0
|
||||
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
|
||||
select {
|
||||
case <-c.Context.Done():
|
||||
return
|
||||
case c.downloadQueue <- issue:
|
||||
}
|
||||
c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset))
|
||||
log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset)
|
||||
}
|
||||
return offset
|
||||
}
|
||||
|
||||
type download struct {
|
||||
@ -328,16 +328,9 @@ func (c *CVDownloader) start_downloader() {
|
||||
go func() {
|
||||
log.Println("starting downloader", i)
|
||||
for dl := range c.imageDownloads {
|
||||
if c.hasQuit() {
|
||||
c.imageWG.Done()
|
||||
continue // We must continue so that c.imageWG will complete otherwise it will hang forever
|
||||
}
|
||||
if dl.finished {
|
||||
|
||||
select {
|
||||
case <-c.Context.Done():
|
||||
c.imageWG.Done()
|
||||
continue
|
||||
case c.FinishedDownloadQueue <- Download{
|
||||
URL: dl.url,
|
||||
Dest: dl.dest,
|
||||
@ -348,7 +341,7 @@ func (c *CVDownloader) start_downloader() {
|
||||
continue
|
||||
}
|
||||
dir := filepath.Dir(dl.dest)
|
||||
resp, err, cancelDownload := Get(c.Context, dl.url)
|
||||
resp, err, cancelDownload := Get(dl.url)
|
||||
if err != nil {
|
||||
cancelDownload()
|
||||
log.Println("Failed to download", dl.volumeID, "/", dl.issueID, dl.url, err)
|
||||
@ -449,9 +442,16 @@ func (c *CVDownloader) downloadImages() {
|
||||
}
|
||||
imageURLs := []i{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
|
||||
for _, image := range imageURLs {
|
||||
if c.hasQuit() {
|
||||
return
|
||||
if strings.HasSuffix(image.url, "6373148-blank.png") {
|
||||
c.notFound <- download{
|
||||
url: image.url,
|
||||
offset: list.Offset,
|
||||
volumeID: issue.Volume.ID,
|
||||
issueID: issue.ID,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
|
||||
continue
|
||||
}
|
||||
@ -469,6 +469,7 @@ func (c *CVDownloader) downloadImages() {
|
||||
issueID: issue.ID,
|
||||
finished: true,
|
||||
}
|
||||
continue
|
||||
}
|
||||
ext := strings.TrimSuffix(strings.ToLower(path.Ext(uri.Path)), "~original")
|
||||
if ext == "" || (len(ext) > 4 && !slices.Contains([]string{".avif", ".webp", ".tiff", ".heif"}, ext)) {
|
||||
@ -477,7 +478,11 @@ func (c *CVDownloader) downloadImages() {
|
||||
dir := filepath.Join(c.ImagePath, strconv.Itoa(issue.Volume.ID), strconv.Itoa(issue.ID))
|
||||
path := filepath.Join(dir, image.name+ext)
|
||||
|
||||
if c.chdb.PathDownloaded(path) {
|
||||
ids := c.get_id(ch.ID{
|
||||
Domain: ch.ComicVine,
|
||||
ID: strconv.Itoa(issue.ID),
|
||||
})
|
||||
if c.chdb.PathDownloaded(path) || c.only_hash_new_ids && len(ids) > 0 {
|
||||
if _, err = os.Stat(path); c.SendExistingImages && err == nil {
|
||||
// We don't add to the count of added as these should be processed immediately
|
||||
log.Printf("Sending Existing image %v/%v %v", issue.Volume.ID, issue.ID, path)
|
||||
@ -516,8 +521,6 @@ func (c *CVDownloader) downloadImages() {
|
||||
t := 10 * time.Second
|
||||
log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited)
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
return
|
||||
case <-time.After(t):
|
||||
}
|
||||
} else {
|
||||
@ -543,9 +546,6 @@ list:
|
||||
}
|
||||
for _, issue := range list.Results {
|
||||
for _, url := range []string{issue.Image.IconURL, issue.Image.MediumURL, issue.Image.ScreenURL, issue.Image.ScreenLargeURL, issue.Image.SmallURL, issue.Image.SuperURL, issue.Image.ThumbURL, issue.Image.TinyURL, issue.Image.OriginalURL} {
|
||||
if c.hasQuit() {
|
||||
return ErrQuit
|
||||
}
|
||||
if c.chdb.CheckURL(url) {
|
||||
indexesToRemove = append(indexesToRemove, i)
|
||||
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile)); err != nil {
|
||||
@ -590,7 +590,7 @@ func (c *CVDownloader) cleanDirs() {
|
||||
})
|
||||
}
|
||||
|
||||
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
||||
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids bool, get_id func(id ch.ID) ch.IDList, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
||||
return &CVDownloader{
|
||||
Context: ctx,
|
||||
JSONPath: filepath.Join(workPath, "_json"),
|
||||
@ -602,6 +602,8 @@ func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, chdb ch.CHDB, work
|
||||
KeepDownloadedImages: keepDownloadedImages,
|
||||
ImageTypes: imageTypes,
|
||||
chdb: chdb,
|
||||
get_id: get_id,
|
||||
only_hash_new_ids: only_hash_new_ids,
|
||||
}
|
||||
}
|
||||
|
||||
@ -609,9 +611,9 @@ func DownloadCovers(c *CVDownloader) {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
c.downloadQueue = make(chan *CVResult, 100) // This is just json it shouldn't take up much more than 122 MB
|
||||
c.imageDownloads = make(chan download, 1) // These are just URLs should only take a few MB
|
||||
c.notFound = make(chan download, 1) // Same here
|
||||
c.downloadQueue = make(chan *CVResult) // This is just json it shouldn't take up much more than 122 MB
|
||||
c.imageDownloads = make(chan download, 1) // These are just URLs should only take a few MB
|
||||
c.notFound = make(chan download, 1) // Same here
|
||||
os.MkdirAll(c.JSONPath, 0o777)
|
||||
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
|
||||
f.Close()
|
||||
@ -643,7 +645,7 @@ func DownloadCovers(c *CVDownloader) {
|
||||
dwg.Done()
|
||||
}()
|
||||
|
||||
c.updateIssues()
|
||||
offset := c.updateIssues()
|
||||
issueCount := len(c.fileList) * 100
|
||||
|
||||
log.Println("Number of issues", issueCount, " expected:", c.totalResults)
|
||||
@ -654,15 +656,19 @@ func DownloadCovers(c *CVDownloader) {
|
||||
log.Println("Waiting for downloaders")
|
||||
dwg.Wait()
|
||||
close(c.imageDownloads)
|
||||
for range c.imageDownloads {
|
||||
for dw := range c.imageDownloads {
|
||||
fmt.Println("Skipping cv download", dw.issueID)
|
||||
}
|
||||
close(c.notFound)
|
||||
for range c.notFound {
|
||||
for dw := range c.notFound {
|
||||
fmt.Println("Skipping not found", dw.issueID)
|
||||
}
|
||||
|
||||
// We drain this at the end because we need to wait for the images to download
|
||||
for range c.downloadQueue {
|
||||
for dw := range c.downloadQueue {
|
||||
fmt.Println("Skipping page download", dw.Offset)
|
||||
}
|
||||
|
||||
log.Println("Completed downloading images")
|
||||
log.Println("Last offset", offset)
|
||||
}
|
||||
|
38
go.mod
38
go.mod
@ -1,25 +1,23 @@
|
||||
module gitea.narnian.us/lordwelch/comic-hasher
|
||||
|
||||
go 1.22.0
|
||||
go 1.23.0
|
||||
|
||||
toolchain go1.22.5
|
||||
toolchain go1.24.0
|
||||
|
||||
require (
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d
|
||||
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09
|
||||
github.com/fmartingr/go-comicinfo/v2 v2.0.2
|
||||
github.com/kr/pretty v0.1.0
|
||||
github.com/mattn/go-sqlite3 v1.14.24
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8
|
||||
github.com/ncruces/go-sqlite3 v0.22.0
|
||||
golang.org/x/image v0.23.0
|
||||
golang.org/x/text v0.21.0
|
||||
github.com/ncruces/go-sqlite3 v0.23.1
|
||||
github.com/vmihailenco/msgpack v4.0.4+incompatible
|
||||
go.etcd.io/bbolt v1.4.0
|
||||
golang.org/x/image v0.24.0
|
||||
golang.org/x/text v0.22.0
|
||||
gonum.org/v1/gonum v0.15.1
|
||||
modernc.org/sqlite v1.34.5
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
|
||||
modernc.org/sqlite v1.35.0
|
||||
)
|
||||
|
||||
require (
|
||||
@ -28,10 +26,10 @@ require (
|
||||
github.com/bodgit/sevenzip v1.3.0 // indirect
|
||||
github.com/bodgit/windows v1.0.0 // indirect
|
||||
github.com/connesc/cipherio v0.2.1 // indirect
|
||||
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 // indirect
|
||||
github.com/dsnet/compress v0.0.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/golang/mock v1.6.0 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
@ -45,15 +43,17 @@ require (
|
||||
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.15 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/tetratelabs/wazero v1.8.2 // indirect
|
||||
github.com/tetratelabs/wazero v1.9.0 // indirect
|
||||
github.com/therootcompany/xz v1.0.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.10 // indirect
|
||||
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
|
||||
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
|
||||
golang.org/x/sys v0.29.0 // indirect
|
||||
modernc.org/libc v1.55.3 // indirect
|
||||
modernc.org/mathutil v1.6.0 // indirect
|
||||
modernc.org/memory v1.8.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
google.golang.org/appengine v1.6.8 // indirect
|
||||
google.golang.org/protobuf v1.36.5 // indirect
|
||||
modernc.org/libc v1.61.13 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.8.2 // indirect
|
||||
)
|
||||
|
||||
replace golang.org/x/text v0.17.0 => github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f
|
||||
|
119
go.sum
119
go.sum
@ -15,8 +15,8 @@ cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+
|
||||
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
|
||||
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6 h1:DqwlGXgaLjXVEio1+podh25e7q/phY02aTMsYkfryqQ=
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6/go.mod h1:q+HjeXYjflX3nk3qt74Gho8z+6MGe5lZO/Po+kiUK7E=
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d h1:mFnVC/tEHk6woq6FBulwzGcuNdYn+zNhXNBILuetQJs=
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d/go.mod h1:UDwa7njhbB5nzxIjHbT9Mjlve9GYn3wzxAcQax1XRvE=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
|
||||
@ -65,6 +65,10 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||
@ -73,6 +77,9 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
|
||||
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||
@ -111,8 +118,8 @@ github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBW
|
||||
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM=
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A=
|
||||
github.com/ncruces/go-sqlite3 v0.22.0 h1:FkGSBhd0TY6e66k1LVhyEpA+RnG/8QkQNed5pjIk4cs=
|
||||
github.com/ncruces/go-sqlite3 v0.22.0/go.mod h1:ueXOZXYZS2OFQirCU3mHneDwJm5fGKHrtccYBeGEV7M=
|
||||
github.com/ncruces/go-sqlite3 v0.23.1 h1:zGAd76q+Tr18z/xKGatUlzBQdjR3J+rexfANUcjAgkY=
|
||||
github.com/ncruces/go-sqlite3 v0.23.1/go.mod h1:Xg3FyAZl25HcBSFmcbymdfoTqD7jRnBUmv1jSrbIjdE=
|
||||
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
|
||||
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
|
||||
@ -131,20 +138,21 @@ github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4=
|
||||
github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
|
||||
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
|
||||
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
|
||||
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
|
||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
|
||||
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
|
||||
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
|
||||
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
|
||||
github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
|
||||
github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
|
||||
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
|
||||
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
|
||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
||||
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
@ -155,6 +163,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
|
||||
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
|
||||
@ -163,13 +172,13 @@ golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE
|
||||
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
|
||||
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
|
||||
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
|
||||
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
|
||||
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa h1:t2QcU6V556bFjYgu4L6C+6VrCPyJZ+eyRsABUPs1mz4=
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk=
|
||||
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
|
||||
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
|
||||
golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
|
||||
golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
|
||||
golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
|
||||
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
@ -186,8 +195,9 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
|
||||
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
@ -202,7 +212,9 @@ golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLL
|
||||
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||
@ -215,8 +227,9 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@ -233,17 +246,23 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
|
||||
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
|
||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
@ -271,8 +290,9 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
|
||||
golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
@ -292,6 +312,8 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
|
||||
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
|
||||
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
|
||||
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
|
||||
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
|
||||
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
|
||||
@ -312,7 +334,12 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
|
||||
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
@ -324,28 +351,28 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh
|
||||
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
|
||||
modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ=
|
||||
modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
|
||||
modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y=
|
||||
modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s=
|
||||
modernc.org/cc/v4 v4.24.4 h1:TFkx1s6dCkQpd6dKurBNmpo+G8Zl4Sq/ztJ+2+DEsh0=
|
||||
modernc.org/cc/v4 v4.24.4/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
|
||||
modernc.org/ccgo/v4 v4.23.16 h1:Z2N+kk38b7SfySC1ZkpGLN2vthNJP1+ZzGZIlH7uBxo=
|
||||
modernc.org/ccgo/v4 v4.23.16/go.mod h1:nNma8goMTY7aQZQNTyN9AIoJfxav4nvTnvKThAeMDdo=
|
||||
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
|
||||
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
|
||||
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
|
||||
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
|
||||
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
|
||||
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
|
||||
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
|
||||
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
|
||||
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
|
||||
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
|
||||
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
|
||||
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
|
||||
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
|
||||
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
|
||||
modernc.org/sqlite v1.34.5 h1:Bb6SR13/fjp15jt70CL4f18JIN7p7dnMExd+UFnF15g=
|
||||
modernc.org/sqlite v1.34.5/go.mod h1:YLuNmX9NKs8wRNK2ko1LW1NGYcc9FkBO69JOt1AR9JE=
|
||||
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
|
||||
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
|
||||
modernc.org/gc/v2 v2.6.3 h1:aJVhcqAte49LF+mGveZ5KPlsp4tdGdAOT4sipJXADjw=
|
||||
modernc.org/gc/v2 v2.6.3/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
|
||||
modernc.org/libc v1.61.13 h1:3LRd6ZO1ezsFiX1y+bHd1ipyEHIJKvuprv0sLTBwLW8=
|
||||
modernc.org/libc v1.61.13/go.mod h1:8F/uJWL/3nNil0Lgt1Dpz+GgkApWh04N3el3hxJcA6E=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.8.2 h1:cL9L4bcoAObu4NkxOlKWBWtNHIsnnACGF/TbqQ6sbcI=
|
||||
modernc.org/memory v1.8.2/go.mod h1:ZbjSvMO5NQ1A2i3bWeDiVMxIorXwdClKE/0SZ+BMotU=
|
||||
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
||||
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||
modernc.org/sqlite v1.35.0 h1:yQps4fegMnZFdphtzlfQTCNBWtS0CZv48pRpW3RFHRw=
|
||||
modernc.org/sqlite v1.35.0/go.mod h1:9cr2sicr7jIaWTBKQmAxQLfBv9LL0su4ZTEV+utt3ic=
|
||||
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
|
||||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
||||
|
66
hashing.go
66
hashing.go
@ -39,7 +39,8 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
ComicVine Source = "comicvine.gamespot.com"
|
||||
ComicVine Source = "comicvine.gamespot.com"
|
||||
SavedHashVersion int = 2
|
||||
)
|
||||
|
||||
type Source string
|
||||
@ -78,16 +79,9 @@ type Hash struct {
|
||||
}
|
||||
|
||||
// IDList is a map of domain to ID eg IDs["comicvine.gamespot.com"] = []string{"1235"}
|
||||
// Maps are extremely expensive in go for small maps this should only be used to return info to a user no internal code should use this
|
||||
// Maps are extremely expensive in go for small maps this should only be used to return info to a user or as a map containing all IDs for a source
|
||||
type IDList map[Source][]string
|
||||
|
||||
type OldSavedHashes map[Source]map[string][3]uint64
|
||||
|
||||
type SavedHashes struct {
|
||||
IDs [][]ID
|
||||
Hashes [3]map[uint64]int
|
||||
}
|
||||
|
||||
func ToIDList(ids []ID) IDList {
|
||||
idlist := IDList{}
|
||||
for _, id := range ids {
|
||||
@ -96,10 +90,10 @@ func ToIDList(ids []ID) IDList {
|
||||
return idlist
|
||||
}
|
||||
func InsertID(ids []ID, id ID) []ID {
|
||||
index, itemFound := slices.BinarySearchFunc(ids, id, func(e ID, t ID) int {
|
||||
index, itemFound := slices.BinarySearchFunc(ids, id, func(existing ID, target ID) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(e.Domain, t.Domain),
|
||||
cmp.Compare(e.ID, t.ID),
|
||||
cmp.Compare(existing.Domain, target.Domain),
|
||||
cmp.Compare(existing.ID, target.ID),
|
||||
)
|
||||
})
|
||||
if itemFound {
|
||||
@ -107,52 +101,6 @@ func InsertID(ids []ID, id ID) []ID {
|
||||
}
|
||||
return slices.Insert(ids, index, id)
|
||||
}
|
||||
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
|
||||
for i, h := range s.Hashes {
|
||||
if h == nil {
|
||||
s.Hashes[i] = make(map[uint64]int)
|
||||
}
|
||||
}
|
||||
|
||||
hashType := int(hash.Kind) - 1
|
||||
idx, hashFound := s.Hashes[hashType][hash.Hash]
|
||||
if !hashFound {
|
||||
idx = len(s.IDs)
|
||||
s.IDs = append(s.IDs, make([]ID, 0, 3))
|
||||
}
|
||||
s.IDs[idx] = InsertID(s.IDs[idx], id)
|
||||
s.Hashes[hashType][hash.Hash] = idx
|
||||
}
|
||||
|
||||
func ConvertSavedHashes(oldHashes OldSavedHashes) SavedHashes {
|
||||
t := SavedHashes{}
|
||||
idcount := 0
|
||||
for _, ids := range oldHashes {
|
||||
idcount += len(ids)
|
||||
}
|
||||
t.IDs = make([][]ID, 0, idcount)
|
||||
t.Hashes[0] = make(map[uint64]int, idcount)
|
||||
t.Hashes[1] = make(map[uint64]int, idcount)
|
||||
t.Hashes[2] = make(map[uint64]int, idcount)
|
||||
for domain, sourceHashes := range oldHashes {
|
||||
for id, hashes := range sourceHashes {
|
||||
idx := len(t.IDs)
|
||||
t.IDs = append(t.IDs, []ID{{domain, id}})
|
||||
for hashType, hash := range hashes {
|
||||
t.Hashes[hashType][hash] = idx
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("Expected number of IDs", idcount)
|
||||
idcount = 0
|
||||
for _, ids := range t.IDs {
|
||||
idcount += len(ids)
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes[0])+len(t.Hashes[1])+len(t.Hashes[2]))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
fmt.Println("Total number of IDs", idcount)
|
||||
return t
|
||||
}
|
||||
|
||||
type NewIDs struct {
|
||||
OldID ID
|
||||
@ -169,7 +117,7 @@ type HashStorage interface {
|
||||
}
|
||||
|
||||
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
||||
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
||||
matchingHashes := make([]Match, 0, 20) // hope that we don't need all of them
|
||||
for _, storedHash := range hashes {
|
||||
distance := bits.OnesCount64(searchHash ^ storedHash)
|
||||
if distance <= maxDistance {
|
||||
|
206
map.go
206
map.go
@ -1,150 +1,156 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
)
|
||||
|
||||
type MapStorage struct {
|
||||
basicMapStorage
|
||||
partialHash [3][8]map[uint8][]uint64
|
||||
partialAHash [8]map[uint8][]uint64
|
||||
partialDHash [8]map[uint8][]uint64
|
||||
partialPHash [8]map[uint8][]uint64
|
||||
}
|
||||
|
||||
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var foundMatches []Result
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
)
|
||||
m.hashMutex.RLock()
|
||||
defer m.hashMutex.RUnlock()
|
||||
resetTime()
|
||||
defer logTime("Search Complete")
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
for _, hash := range hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
index, hashFound := m.findHash(hashType, hash.Hash)
|
||||
if hashFound {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: hash,
|
||||
IDs: ToIDList(*m.hashes[hashType][index].ids),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// If we have exact matches don't bother with other matches
|
||||
logTime("Search Exact")
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
if exactOnly {
|
||||
return m.basicMapStorage.GetMatches(hashes, max, exactOnly)
|
||||
}
|
||||
tl.resetTime()
|
||||
defer tl.logTime("Search Complete")
|
||||
|
||||
totalPartialHashes := 0
|
||||
|
||||
for _, searchHash := range hashes {
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
hashType := int(searchHash.Kind) - 1
|
||||
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
|
||||
potentialMatches := []uint64{}
|
||||
|
||||
for i, partialHash := range SplitHash(searchHash.Hash) {
|
||||
partialHashes := m.partialHash[hashType][i][partialHash]
|
||||
totalPartialHashes += len(partialHashes)
|
||||
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
|
||||
_, alreadyMatched := foundHashes[match.Hash]
|
||||
if index, hashFound := m.findHash(hashType, match.Hash); hashFound && !alreadyMatched {
|
||||
foundHashes[match.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, Result{IDs: ToIDList(*m.hashes[hashType][index].ids), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
|
||||
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
|
||||
}
|
||||
|
||||
totalPartialHashes += len(potentialMatches)
|
||||
mappedIds := map[*[]ID]bool{}
|
||||
|
||||
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) {
|
||||
matchedHash := Hash{match.Hash, searchHash.Kind}
|
||||
index, count := m.findHash(matchedHash)
|
||||
if count < 1 {
|
||||
continue
|
||||
}
|
||||
for _, storedHash := range currentHashes[index : index+count] {
|
||||
ids := m.ids[storedHash.ID]
|
||||
if mappedIds[ids] {
|
||||
continue
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: storedHash.Hash,
|
||||
IDs: ToIDList(*m.ids[storedHash.ID]),
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("Total partial hashes tested:", totalPartialHashes)
|
||||
go m.printSizes()
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
// getCurrentHashes must have a read lock before using
|
||||
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]SavedHash, [8]map[uint8][]uint64) {
|
||||
if kind == goimagehash.AHash {
|
||||
return m.aHashes, m.partialAHash
|
||||
}
|
||||
if kind == goimagehash.DHash {
|
||||
return m.dHashes, m.partialDHash
|
||||
}
|
||||
if kind == goimagehash.PHash {
|
||||
return m.pHashes, m.partialPHash
|
||||
}
|
||||
panic("Unknown hash type: " + kind.String())
|
||||
}
|
||||
|
||||
func (m *MapStorage) MapHashes(hash ImageHash) {
|
||||
m.basicMapStorage.MapHashes(hash)
|
||||
for _, hash := range hash.Hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
_, partialHashes := m.getCurrentHashes(hash.Kind)
|
||||
for i, partialHash := range SplitHash(hash.Hash) {
|
||||
m.partialHash[hashType][i][partialHash] = Insert(m.partialHash[hashType][i][partialHash], hash.Hash)
|
||||
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
m.hashes[hashType] = make([]structHash, len(sourceHashes))
|
||||
for savedHash, idlistLocation := range sourceHashes {
|
||||
m.hashes[hashType] = append(m.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
|
||||
}
|
||||
if err := m.basicMapStorage.DecodeHashes(hashes); err != nil {
|
||||
return err
|
||||
}
|
||||
for hashType := range m.hashes {
|
||||
slices.SortFunc(m.hashes[hashType], func(a, b structHash) int {
|
||||
return cmp.Compare(a.hash, b.hash)
|
||||
})
|
||||
}
|
||||
m.printSizes()
|
||||
for _, partialHashes := range m.partialHash {
|
||||
for _, partMap := range partialHashes {
|
||||
for part, hashes := range partMap {
|
||||
slices.Sort(hashes)
|
||||
partMap[part] = slices.Compact(hashes)
|
||||
}
|
||||
}
|
||||
}
|
||||
m.printSizes()
|
||||
|
||||
mapPartialHashes(m.aHashes, m.partialAHash)
|
||||
mapPartialHashes(m.dHashes, m.partialDHash)
|
||||
mapPartialHashes(m.pHashes, m.partialPHash)
|
||||
|
||||
compactPartialHashes(m.partialAHash)
|
||||
compactPartialHashes(m.partialDHash)
|
||||
compactPartialHashes(m.partialPHash)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MapStorage) printSizes() {
|
||||
fmt.Println("Length of hashes:", len(m.hashes[0])+len(m.hashes[1])+len(m.hashes[2]))
|
||||
// fmt.Println("Size of", "hashes:", size.Of(m.hashes)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "ids:", size.Of(m.ids)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "MapStorage:", size.Of(m)/1024/1024, "MB")
|
||||
|
||||
}
|
||||
|
||||
func NewMapStorage() (HashStorage, error) {
|
||||
|
||||
storage := &MapStorage{
|
||||
basicMapStorage: basicMapStorage{
|
||||
hashMutex: &sync.RWMutex{},
|
||||
hashes: [3][]structHash{
|
||||
[]structHash{},
|
||||
[]structHash{},
|
||||
[]structHash{},
|
||||
},
|
||||
},
|
||||
partialHash: [3][8]map[uint8][]uint64{
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
ids: make(map[ID]*[]ID),
|
||||
aHashes: []SavedHash{},
|
||||
dHashes: []SavedHash{},
|
||||
pHashes: []SavedHash{},
|
||||
},
|
||||
partialAHash: newPartialHash(),
|
||||
partialDHash: newPartialHash(),
|
||||
partialPHash: newPartialHash(),
|
||||
}
|
||||
return storage, nil
|
||||
}
|
||||
|
||||
func newPartialHash() [8]map[uint8][]uint64 {
|
||||
return [8]map[uint8][]uint64{
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
map[uint8][]uint64{},
|
||||
}
|
||||
}
|
||||
|
||||
func mapPartialHashes(hashes []SavedHash, partialHashMap [8]map[uint8][]uint64) {
|
||||
for _, savedHash := range hashes {
|
||||
for i, partialHash := range SplitHash(savedHash.Hash.Hash) {
|
||||
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func compactPartialHashes(partialHashMap [8]map[uint8][]uint64) {
|
||||
for _, partMap := range partialHashMap {
|
||||
for part, hashes := range partMap {
|
||||
slices.Sort(hashes)
|
||||
partMap[part] = slices.Compact(hashes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
254
savedHashes.go
Normal file
254
savedHashes.go
Normal file
@ -0,0 +1,254 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
"github.com/vmihailenco/msgpack"
|
||||
)
|
||||
|
||||
type Format int
|
||||
|
||||
const (
|
||||
Msgpack Format = iota + 1
|
||||
JSON
|
||||
|
||||
CurrentSavedHashesVersion int = 2
|
||||
)
|
||||
|
||||
var versionMap map[int]versionDecoder
|
||||
|
||||
var formatNames = map[Format]string{
|
||||
JSON: "json",
|
||||
Msgpack: "msgpack",
|
||||
}
|
||||
|
||||
var formatValues = map[string]Format{
|
||||
"json": JSON,
|
||||
"msgpack": Msgpack,
|
||||
}
|
||||
|
||||
type OldSavedHashes map[Source]map[string][3]uint64
|
||||
type SavedHashesv1 struct {
|
||||
IDs [][]ID
|
||||
Hashes [3]map[uint64]int
|
||||
}
|
||||
|
||||
// SavedHashes The IDs and Hashes fields have no direct correlation
|
||||
// It is perfectly valid to have an empty IDs or an empty Hashes field
|
||||
// If two covers have identical hashes then they should be two entries in Hashes not a set in IDs with two IDs from the same source
|
||||
type SavedHashes struct {
|
||||
Version int
|
||||
IDs [][]ID // List of sets of IDs that are the same across Sources, should generally only have one Source per set
|
||||
Hashes []SavedHash // List of all known hashes, hashes will be duplicated for each source
|
||||
}
|
||||
|
||||
type SavedHash struct {
|
||||
Hash Hash
|
||||
ID ID
|
||||
}
|
||||
type Encoder func(any) ([]byte, error)
|
||||
type Decoder func([]byte, interface{}) error
|
||||
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
|
||||
|
||||
var NoHashes = errors.New("no hashes")
|
||||
var DecodeError = errors.New("decoder failure")
|
||||
|
||||
func (f Format) String() string {
|
||||
if name, known := formatNames[f]; known {
|
||||
return name
|
||||
}
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
func (f *Format) Set(s string) error {
|
||||
if format, known := formatValues[strings.ToLower(s)]; known {
|
||||
*f = format
|
||||
} else {
|
||||
return fmt.Errorf("Unknown format: %d", f)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
|
||||
h := SavedHash{
|
||||
hash,
|
||||
id,
|
||||
}
|
||||
index, itemFound := slices.BinarySearchFunc(s.Hashes, h, func(existing SavedHash, target SavedHash) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
||||
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
|
||||
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
||||
cmp.Compare(existing.ID.ID, target.ID.ID),
|
||||
)
|
||||
})
|
||||
if !itemFound {
|
||||
s.Hashes = slices.Insert(s.Hashes, index, h)
|
||||
}
|
||||
}
|
||||
|
||||
func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
|
||||
t := SavedHashes{}
|
||||
idcount := 0
|
||||
for _, ids := range oldHashes {
|
||||
idcount += len(ids)
|
||||
}
|
||||
t.IDs = make([][]ID, 0, idcount)
|
||||
t.Hashes = make([]SavedHash, 0, idcount)
|
||||
for domain, sourceHashes := range oldHashes {
|
||||
for id, hashes := range sourceHashes {
|
||||
t.IDs = append(t.IDs, []ID{{domain, id}})
|
||||
for hashType, hash := range hashes {
|
||||
t.Hashes = append(t.Hashes, SavedHash{
|
||||
Hash: Hash{
|
||||
Kind: goimagehash.Kind(hashType + 1),
|
||||
Hash: hash,
|
||||
},
|
||||
ID: ID{domain, id},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
return &t
|
||||
}
|
||||
|
||||
func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
|
||||
t := SavedHashes{}
|
||||
hashCount := 0
|
||||
for _, hashes := range oldHashes.Hashes {
|
||||
hashCount += len(hashes)
|
||||
}
|
||||
t.IDs = oldHashes.IDs
|
||||
t.Hashes = make([]SavedHash, 0, hashCount)
|
||||
for hashType, sourceHashes := range oldHashes.Hashes {
|
||||
for hash, index := range sourceHashes {
|
||||
for _, id := range oldHashes.IDs[index] {
|
||||
t.Hashes = append(t.Hashes, SavedHash{
|
||||
ID: id,
|
||||
Hash: Hash{
|
||||
Kind: goimagehash.Kind(hashType + 1),
|
||||
Hash: hash,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
return &t
|
||||
}
|
||||
|
||||
func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
||||
loadedHashes := OldSavedHashes{}
|
||||
err := decode(hashes, &loadedHashes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
||||
}
|
||||
if len(loadedHashes) == 0 {
|
||||
return nil, NoHashes
|
||||
}
|
||||
return ConvertHashesV0(loadedHashes), nil
|
||||
}
|
||||
|
||||
func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
||||
loadedHashes := SavedHashesv1{}
|
||||
err := decode(hashes, &loadedHashes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
||||
}
|
||||
hashesCount := 0
|
||||
for _, hashes := range loadedHashes.Hashes {
|
||||
hashesCount += len(hashes)
|
||||
}
|
||||
if hashesCount < 1 {
|
||||
return nil, NoHashes
|
||||
}
|
||||
return ConvertHashesV1(loadedHashes), nil
|
||||
}
|
||||
|
||||
func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
||||
loadedHashes := SavedHashes{}
|
||||
err := decode(hashes, &loadedHashes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
||||
}
|
||||
if len(loadedHashes.Hashes) < 1 && len(loadedHashes.IDs) < 1 {
|
||||
return nil, NoHashes
|
||||
}
|
||||
|
||||
return &loadedHashes, nil
|
||||
}
|
||||
|
||||
func getSavedHashesVersion(decode Decoder, hashes []byte) (int, error) {
|
||||
type version struct {
|
||||
Version int
|
||||
}
|
||||
var savedVersion version
|
||||
err := decode(hashes, &savedVersion)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("%w: %w", DecodeError, err)
|
||||
}
|
||||
if savedVersion.Version > 1 {
|
||||
return savedVersion.Version, nil
|
||||
}
|
||||
return -1, nil
|
||||
}
|
||||
func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
|
||||
var decode Decoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
decode = msgpack.Unmarshal
|
||||
case JSON:
|
||||
decode = json.Unmarshal
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
version, err := getSavedHashesVersion(decode, hashes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if decodeVersion, knownVersion := versionMap[version]; knownVersion {
|
||||
return decodeVersion(decode, hashes)
|
||||
}
|
||||
|
||||
for _, decodeVersion := range []versionDecoder{
|
||||
DecodeHashesV0,
|
||||
DecodeHashesV1,
|
||||
DecodeHashesV2,
|
||||
} {
|
||||
loadedHashes, err := decodeVersion(decode, hashes)
|
||||
if err == nil {
|
||||
return loadedHashes, nil
|
||||
}
|
||||
if !errors.Is(err, NoHashes) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return nil, NoHashes
|
||||
}
|
||||
|
||||
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
|
||||
var encoder Encoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
encoder = msgpack.Marshal
|
||||
case JSON:
|
||||
encoder = json.Marshal
|
||||
default:
|
||||
return nil, fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
|
||||
hashes.Version = CurrentSavedHashesVersion
|
||||
return encoder(hashes)
|
||||
}
|
96
sqlite.go
96
sqlite.go
@ -8,7 +8,6 @@ import (
|
||||
"log"
|
||||
"math/bits"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
_ "modernc.org/sqlite"
|
||||
@ -66,7 +65,7 @@ func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, items ...interface{
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
func (s *sqliteStorage) findPartialHashes(tl timeLog, max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
hashes := []sqliteHash{}
|
||||
statement, err := s.db.PrepareContext(context.Background(), `SELECT rowid,hash,kind FROM Hashes WHERE (kind=?) AND (((hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF)));`)
|
||||
if err != nil {
|
||||
@ -94,7 +93,7 @@ func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goima
|
||||
}
|
||||
}
|
||||
rows.Close()
|
||||
logTime("Filter partial " + kind.String())
|
||||
tl.logTime("Filter partial " + kind.String())
|
||||
|
||||
statement, err = s.db.PrepareContext(context.Background(), `SELECT DISTINCT IDS.domain, IDs.id, id_hash.hashid FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid in (`+strings.TrimRight(strings.Repeat("?,", len(hashes)), ",")+`)) ORDER BY IDs.domain, IDs.ID;`)
|
||||
if err != nil {
|
||||
@ -171,35 +170,18 @@ ANALYZE;
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
total time.Duration
|
||||
t = time.Now()
|
||||
)
|
||||
|
||||
func resetTime() {
|
||||
total = 0
|
||||
t = time.Now()
|
||||
}
|
||||
|
||||
func logTime(log string) {
|
||||
n := time.Now()
|
||||
s := n.Sub(t)
|
||||
t = n
|
||||
total += s
|
||||
fmt.Printf("total: %v, %s: %v\n", total, log, s)
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var (
|
||||
foundMatches []Result
|
||||
tl timeLog
|
||||
)
|
||||
resetTime()
|
||||
tl.resetTime()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
|
||||
statement, err := s.db.Prepare(`SELECT rowid,hash,kind FROM Hashes WHERE ` + strings.TrimSuffix(strings.Repeat("(hash=? AND kind=?) OR", len(hashes)), "OR") + `ORDER BY kind,hash;`)
|
||||
if err != nil {
|
||||
logTime("Fail exact")
|
||||
tl.logTime("Fail exact")
|
||||
return foundMatches, err
|
||||
}
|
||||
|
||||
@ -221,17 +203,17 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
logTime("Search Exact")
|
||||
tl.logTime("Search Exact")
|
||||
}
|
||||
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
|
||||
for _, hash := range hashes {
|
||||
hashes, err := s.findPartialHashes(max, int64(hash.Hash), hash.Kind)
|
||||
hashes, err := s.findPartialHashes(tl, max, int64(hash.Hash), hash.Kind)
|
||||
if err != nil {
|
||||
return foundMatches, err
|
||||
}
|
||||
logTime("Search partial " + hash.Kind.String())
|
||||
tl.logTime("Search partial " + hash.Kind.String())
|
||||
|
||||
for _, hash := range hashes {
|
||||
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
|
||||
@ -251,27 +233,26 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
insertHashes, err := tx.Prepare(`
|
||||
INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO UPDATE SET hash=?1 RETURNING hashid
|
||||
`)
|
||||
insertHashes, err := tx.Prepare(`INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO UPDATE SET hash=?1 RETURNING hashid`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows, err := tx.Query(`
|
||||
INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RETURNING idid
|
||||
`, hash.ID.Domain, hash.ID.ID)
|
||||
|
||||
rows, err := tx.Query(`INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RETURNING idid`, hash.ID.Domain, hash.ID.ID)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if !rows.Next() {
|
||||
panic("Unable to insert IDs")
|
||||
panic("Unable to insert ID")
|
||||
}
|
||||
|
||||
var id_id int64
|
||||
err = rows.Scan(&id_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
hash_ids := []int64{}
|
||||
for _, hash := range hash.Hashes {
|
||||
rows, err := insertHashes.Query(int64(hash.Hash), hash.Kind)
|
||||
@ -280,21 +261,24 @@ INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RET
|
||||
}
|
||||
|
||||
if !rows.Next() {
|
||||
panic("Unable to insert IDs")
|
||||
panic("Unable to insert Hash")
|
||||
}
|
||||
|
||||
var id int64
|
||||
err = rows.Scan(&id)
|
||||
rows.Close()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
hash_ids = append(hash_ids, id)
|
||||
}
|
||||
var ids []any
|
||||
var ids []any = make([]any, 0, len(hash_ids)+1)
|
||||
ids = append(ids, id_id)
|
||||
for _, hash_id := range hash_ids {
|
||||
ids = append(ids, hash_id, id_id)
|
||||
ids = append(ids, hash_id)
|
||||
}
|
||||
_, err = tx.Exec(`INSERT INTO id_hash (hashid,idid) VALUES `+strings.TrimSuffix(strings.Repeat("(?, ?),", len(hash_ids)), ",")+` ON CONFLICT DO NOTHING;`, ids...)
|
||||
_, err = tx.Exec(`INSERT INTO id_hash (idid, hashid) VALUES `+strings.TrimSuffix(strings.Repeat("(?1, ?),", len(hash_ids)), ",")+` ON CONFLICT DO NOTHING;`, ids...)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Failed inserting: %v,%v: %w", hash.ID.Domain, hash.ID.ID, err))
|
||||
}
|
||||
@ -311,16 +295,11 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
return err
|
||||
}
|
||||
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
hashKind := goimagehash.Kind(hashType + 1)
|
||||
for hash, idsLocations := range sourceHashes {
|
||||
for _, id := range hashes.IDs[idsLocations] {
|
||||
s.MapHashes(ImageHash{
|
||||
Hashes: []Hash{{hash, hashKind}},
|
||||
ID: id,
|
||||
})
|
||||
}
|
||||
}
|
||||
for _, savedHash := range hashes.Hashes {
|
||||
s.MapHashes(ImageHash{
|
||||
Hashes: []Hash{savedHash.Hash},
|
||||
ID: savedHash.ID,
|
||||
})
|
||||
}
|
||||
err = s.createIndexes()
|
||||
if err != nil {
|
||||
@ -434,28 +413,27 @@ func NewSqliteStorage(db, path string) (HashStorage, error) {
|
||||
_, err = sqlite.db.Exec(`
|
||||
PRAGMA foreign_keys=ON;
|
||||
CREATE TABLE IF NOT EXISTS Hashes(
|
||||
hashid INTEGER PRIMARY KEY,
|
||||
hash INT NOT NULL,
|
||||
kind int NOT NULL,
|
||||
hashid INTEGER PRIMARY KEY,
|
||||
hash INTEGER NOT NULL,
|
||||
kind INTEGER NOT NULL,
|
||||
id INTEGER NOT NULL,
|
||||
FOREIGN KEY(id) REFERENCES IDs(idid),
|
||||
UNIQUE(kind, hash)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS IDs(
|
||||
id TEXT NOT NULL,
|
||||
domain TEXT NOT NULL,
|
||||
idid INTEGER PRIMARY KEY,
|
||||
idid INTEGER PRIMARY KEY,
|
||||
UNIQUE (domain, id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS id_hash(
|
||||
hashid INTEGER,
|
||||
idid INTEGER,
|
||||
FOREIGN KEY(hashid) REFERENCES Hashes(hashid),
|
||||
FOREIGN KEY(idid) REFERENCES IDs(idid)
|
||||
UNIQUE (hashid, idid)
|
||||
CREATE TABLE IF NOT EXISTS EquivalentIDs(
|
||||
id INTEGER
|
||||
groupid INTEGER,
|
||||
FOREIGN KEY(idid) REFERENCES IDs(idid)
|
||||
UNIQUE (groupid, id)
|
||||
);
|
||||
|
||||
`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
24
timing.go
Normal file
24
timing.go
Normal file
@ -0,0 +1,24 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
type timeLog struct {
|
||||
total time.Duration
|
||||
last time.Time
|
||||
}
|
||||
|
||||
func (t *timeLog) resetTime() {
|
||||
t.total = 0
|
||||
t.last = time.Now()
|
||||
}
|
||||
|
||||
func (t *timeLog) logTime(log string) {
|
||||
now := time.Now()
|
||||
diff := now.Sub(t.last)
|
||||
t.last = now
|
||||
t.total += diff
|
||||
fmt.Printf("total: %v, %s: %v\n", t.total, log, diff)
|
||||
}
|
145
vp-tree.go
145
vp-tree.go
@ -10,12 +10,17 @@ import (
|
||||
)
|
||||
|
||||
type VPTree struct {
|
||||
trees [3]*vptree.Tree
|
||||
hashes [3][]vptree.Comparable
|
||||
aTree *vptree.Tree
|
||||
dTree *vptree.Tree
|
||||
pTree *vptree.Tree
|
||||
ids map[ID]*[]ID
|
||||
|
||||
aHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
dHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
pHashes []vptree.Comparable // temporary, only used for vptree creation
|
||||
}
|
||||
type VPHash struct {
|
||||
Hash Hash
|
||||
IDs []ID
|
||||
SavedHash
|
||||
}
|
||||
|
||||
func (h *VPHash) Distance(c vptree.Comparable) float64 {
|
||||
@ -27,57 +32,108 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
|
||||
}
|
||||
|
||||
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var matches []Result
|
||||
var exactMatches []Result
|
||||
fmt.Println(hashes)
|
||||
var (
|
||||
matches []Result
|
||||
exactMatches []Result
|
||||
tl timeLog
|
||||
)
|
||||
tl.resetTime()
|
||||
defer tl.logTime("Search Complete")
|
||||
|
||||
for _, hash := range hashes {
|
||||
results := vptree.NewDistKeeper(float64(max))
|
||||
hashType := int(hash.Kind) - 1
|
||||
v.trees[hashType].NearestSet(results, &VPHash{Hash: hash})
|
||||
|
||||
currentTree := v.getCurrentTree(hash.Kind)
|
||||
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}})
|
||||
|
||||
mappedIds := map[*[]ID]bool{}
|
||||
for _, result := range results.Heap {
|
||||
vphash := result.Comparable.(*VPHash)
|
||||
storedHash := result.Comparable.(*VPHash)
|
||||
ids := v.ids[storedHash.ID]
|
||||
if mappedIds[ids] {
|
||||
continue
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
if result.Dist == 0 {
|
||||
exactMatches = append(exactMatches, Result{
|
||||
IDs: ToIDList(vphash.IDs),
|
||||
IDs: ToIDList(*v.ids[storedHash.ID]),
|
||||
Distance: int(result.Dist),
|
||||
Hash: vphash.Hash,
|
||||
Hash: storedHash.Hash,
|
||||
})
|
||||
} else {
|
||||
matches = append(matches, Result{
|
||||
IDs: ToIDList(vphash.IDs),
|
||||
IDs: ToIDList(*v.ids[storedHash.ID]),
|
||||
Distance: int(result.Dist),
|
||||
Hash: vphash.Hash,
|
||||
Hash: storedHash.Hash,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(exactMatches) > 0 && exactOnly {
|
||||
if exactOnly {
|
||||
return exactMatches, nil
|
||||
}
|
||||
matches = append(exactMatches[:len(exactMatches):len(exactMatches)], matches...)
|
||||
exactMatches = append(exactMatches, matches...)
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
|
||||
if kind == goimagehash.AHash {
|
||||
return v.aTree
|
||||
}
|
||||
if kind == goimagehash.DHash {
|
||||
return v.dTree
|
||||
}
|
||||
if kind == goimagehash.PHash {
|
||||
return v.pTree
|
||||
}
|
||||
panic("Unknown hash type: " + kind.String())
|
||||
}
|
||||
|
||||
func (v *VPTree) MapHashes(ImageHash) {
|
||||
panic("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
|
||||
var err error
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
for hash, idsLocation := range sourceHashes {
|
||||
var (
|
||||
hashKind = goimagehash.Kind(hashType + 1)
|
||||
)
|
||||
hash := &VPHash{Hash{hash, hashKind}, hashes.IDs[idsLocation]}
|
||||
v.hashes[hashType] = append(v.hashes[hashType], hash)
|
||||
|
||||
// Initialize all the known equal IDs
|
||||
for _, ids := range hashes.IDs {
|
||||
for _, id := range ids {
|
||||
v.ids[id] = &ids
|
||||
}
|
||||
}
|
||||
for hashType := range 3 {
|
||||
v.trees[hashType], err = vptree.New(v.hashes[hashType], 3, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
var err error
|
||||
for _, savedHash := range hashes.Hashes {
|
||||
if savedHash.Hash.Kind == goimagehash.AHash {
|
||||
v.aHashes = append(v.aHashes, &VPHash{savedHash})
|
||||
}
|
||||
if savedHash.Hash.Kind == goimagehash.DHash {
|
||||
v.dHashes = append(v.dHashes, &VPHash{savedHash})
|
||||
}
|
||||
if savedHash.Hash.Kind == goimagehash.PHash {
|
||||
v.pHashes = append(v.pHashes, &VPHash{savedHash})
|
||||
}
|
||||
|
||||
if savedHash.ID == (ID{}) {
|
||||
fmt.Println("Empty ID detected")
|
||||
panic(savedHash)
|
||||
}
|
||||
// All known equal IDs are already mapped we can add any missing ones from hashes
|
||||
if _, ok := v.ids[savedHash.ID]; !ok {
|
||||
v.ids[savedHash.ID] = &[]ID{savedHash.ID}
|
||||
}
|
||||
}
|
||||
|
||||
v.aTree, err = vptree.New(v.aHashes, 3, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v.dTree, err = vptree.New(v.dHashes, 3, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v.pTree, err = vptree.New(v.pHashes, 3, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -90,16 +146,31 @@ func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
|
||||
}
|
||||
|
||||
func (v *VPTree) GetIDs(id ID) IDList {
|
||||
return nil
|
||||
ids, found := v.ids[id]
|
||||
if !found {
|
||||
return nil
|
||||
}
|
||||
return ToIDList(*ids)
|
||||
}
|
||||
|
||||
func NewVPStorage() (HashStorage, error) {
|
||||
|
||||
return &VPTree{
|
||||
hashes: [3][]vptree.Comparable{
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
},
|
||||
}, nil
|
||||
var err error
|
||||
v := &VPTree{
|
||||
aHashes: []vptree.Comparable{},
|
||||
dHashes: []vptree.Comparable{},
|
||||
pHashes: []vptree.Comparable{},
|
||||
}
|
||||
v.aTree, err = vptree.New(v.aHashes, 3, nil)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
v.dTree, err = vptree.New(v.dHashes, 3, nil)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
v.pTree, err = vptree.New(v.pHashes, 3, nil)
|
||||
if err != nil {
|
||||
return v, err
|
||||
}
|
||||
return v, nil
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user