440 lines
13 KiB
Go
440 lines
13 KiB
Go
package ch
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"log"
|
|
"math/bits"
|
|
"strings"
|
|
|
|
"gitea.narnian.us/lordwelch/goimagehash"
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
type sqliteStorage struct {
|
|
db *sql.DB
|
|
}
|
|
type sqliteHash struct {
|
|
hashid int
|
|
Result
|
|
}
|
|
|
|
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, items ...interface{}) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
hashes := []sqliteHash{}
|
|
rows, err := statement.Query(items...)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
|
|
for rows.Next() {
|
|
var (
|
|
r = sqliteHash{Result: Result{IDs: make(IDList)}}
|
|
h int64
|
|
)
|
|
err = rows.Scan(&r.hashid, &h, &r.Hash.Kind)
|
|
if err != nil {
|
|
rows.Close()
|
|
return hashes, err
|
|
}
|
|
r.Hash.Hash = uint64(h)
|
|
hashes = append(hashes, r)
|
|
}
|
|
rows.Close()
|
|
statement, err = s.db.PrepareContext(context.Background(), `SELECT IDS.domain, IDs.id FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid=?) ORDER BY IDs.domain, IDs.ID;`)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
for _, hash := range hashes {
|
|
rows, err := statement.Query(hash.hashid)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
for rows.Next() {
|
|
var source Source
|
|
var id string
|
|
err := rows.Scan(&source, &id)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
hash.IDs[source] = append(hash.IDs[source], id)
|
|
}
|
|
rows.Close()
|
|
}
|
|
return hashes, nil
|
|
}
|
|
|
|
func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
hashes := []sqliteHash{}
|
|
statement, err := s.db.PrepareContext(context.Background(), `SELECT rowid,hash,kind FROM Hashes WHERE (kind=?) AND (((hash >> (0 * 8) & 0xFF)=(? >> (0 * 8) & 0xFF)) OR ((hash >> (1 * 8) & 0xFF)=(? >> (1 * 8) & 0xFF)) OR ((hash >> (2 * 8) & 0xFF)=(? >> (2 * 8) & 0xFF)) OR ((hash >> (3 * 8) & 0xFF)=(? >> (3 * 8) & 0xFF)) OR ((hash >> (4 * 8) & 0xFF)=(? >> (4 * 8) & 0xFF)) OR ((hash >> (5 * 8) & 0xFF)=(? >> (5 * 8) & 0xFF)) OR ((hash >> (6 * 8) & 0xFF)=(? >> (6 * 8) & 0xFF)) OR ((hash >> (7 * 8) & 0xFF)=(? >> (7 * 8) & 0xFF))) ORDER BY kind,hash;`)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
rows, err := statement.Query(kind, int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash))
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
|
|
for rows.Next() {
|
|
var (
|
|
r = sqliteHash{Result: Result{IDs: make(IDList)}}
|
|
h int64
|
|
)
|
|
err = rows.Scan(&r.hashid, &h, &r.Hash.Kind)
|
|
if err != nil {
|
|
rows.Close()
|
|
return hashes, err
|
|
}
|
|
r.Hash.Hash = uint64(h)
|
|
r.Distance = bits.OnesCount64(uint64(search_hash) ^ r.Hash.Hash)
|
|
if r.Distance <= max {
|
|
hashes = append(hashes, r)
|
|
}
|
|
}
|
|
rows.Close()
|
|
|
|
statement, err = s.db.PrepareContext(context.Background(), `SELECT DISTINCT IDS.domain, IDs.id, id_hash.hashid FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid in (`+strings.TrimRight(strings.Repeat("?,", len(hashes)), ",")+`)) ORDER BY IDs.domain, IDs.ID;`)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
|
|
var ids []any
|
|
for _, hash := range hashes {
|
|
ids = append(ids, hash.hashid)
|
|
}
|
|
rows, err = statement.Query(ids...)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
for rows.Next() {
|
|
var source Source
|
|
var id string
|
|
var hashid int
|
|
err := rows.Scan(&source, &id, &hashid)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
for _, hash := range hashes {
|
|
if hash.hashid == hashid {
|
|
hash.IDs[source] = append(hash.IDs[source], id)
|
|
}
|
|
}
|
|
}
|
|
rows.Close()
|
|
return hashes, nil
|
|
}
|
|
|
|
func (s *sqliteStorage) dropIndexes() error {
|
|
_, err := s.db.Exec(`
|
|
|
|
DROP INDEX IF EXISTS hash_index;
|
|
DROP INDEX IF EXISTS hash_1_index;
|
|
DROP INDEX IF EXISTS hash_2_index;
|
|
DROP INDEX IF EXISTS hash_3_index;
|
|
DROP INDEX IF EXISTS hash_4_index;
|
|
DROP INDEX IF EXISTS hash_5_index;
|
|
DROP INDEX IF EXISTS hash_6_index;
|
|
DROP INDEX IF EXISTS hash_7_index;
|
|
DROP INDEX IF EXISTS hash_8_index;
|
|
|
|
DROP INDEX IF EXISTS id_domain;
|
|
`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *sqliteStorage) createIndexes() error {
|
|
_, err := s.db.Exec(`
|
|
|
|
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
|
|
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
|
|
|
|
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
|
|
PRAGMA shrink_memory;
|
|
`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
|
var foundMatches []Result
|
|
|
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
|
|
statement, err := s.db.Prepare(`SELECT rowid,hash,kind FROM Hashes WHERE ` + strings.TrimSuffix(strings.Repeat("(hash=? AND kind=?) OR", len(hashes)), "OR") + `ORDER BY kind,hash;`)
|
|
if err != nil {
|
|
return foundMatches, err
|
|
}
|
|
args := make([]interface{}, 0, len(hashes)*2)
|
|
for _, hash := range hashes {
|
|
if hash.Hash != 0 {
|
|
args = append(args, int64(hash.Hash), hash.Kind)
|
|
}
|
|
}
|
|
hashes, err := s.findExactHashes(statement, args...)
|
|
if err != nil {
|
|
return foundMatches, err
|
|
}
|
|
for _, hash := range hashes {
|
|
foundMatches = append(foundMatches, hash.Result)
|
|
}
|
|
|
|
// If we have exact matches don't bother with other matches
|
|
if len(foundMatches) > 0 && exactOnly {
|
|
return foundMatches, nil
|
|
}
|
|
}
|
|
|
|
foundHashes := make(map[uint64]struct{})
|
|
|
|
for _, hash := range hashes {
|
|
hashes, err := s.findPartialHashes(max, int64(hash.Hash), hash.Kind)
|
|
if err != nil {
|
|
return foundMatches, err
|
|
}
|
|
|
|
for _, hash := range hashes {
|
|
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
|
|
foundHashes[hash.Hash.Hash] = struct{}{}
|
|
foundMatches = append(foundMatches, hash.Result)
|
|
} else {
|
|
log.Println("Hash already found", hash)
|
|
}
|
|
}
|
|
}
|
|
|
|
return foundMatches, nil
|
|
}
|
|
|
|
func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
|
insertHashes, err := s.db.Prepare(`
|
|
INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO NOTHING;
|
|
`)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
IDInsertResult, err := s.db.Exec(`
|
|
INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO NOTHING;
|
|
`, hash.ID.Domain, hash.ID.Domain)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
id_id, err := IDInsertResult.LastInsertId()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
hash_ids := []int64{}
|
|
for _, hash := range hash.Hashes {
|
|
hashInsertResult, err := insertHashes.Exec(int64(hash.Hash), hash.Kind)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
id, err := hashInsertResult.LastInsertId()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
hash_ids = append(hash_ids, id)
|
|
}
|
|
for _, hash_id := range hash_ids {
|
|
_, err = s.db.Exec(`INSERT INTO id_hash VALUES (?, ?) ON CONFLICT DO NOTHING;`, hash_id, id_id)
|
|
if err != nil {
|
|
panic(fmt.Errorf("Failed inserting: %v,%v: %w", hash.ID.Domain, hash.ID.ID, err))
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
|
|
err := s.dropIndexes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for domain, sourceHashes := range hashes {
|
|
for id, h := range sourceHashes {
|
|
s.MapHashes(ImageHash{[]Hash{{h[0], goimagehash.AHash}, {h[1], goimagehash.DHash}, {h[2], goimagehash.PHash}}, ID{domain, id}})
|
|
}
|
|
}
|
|
err = s.createIndexes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
|
hashes := make(SavedHashes)
|
|
conn, err := s.db.Conn(context.Background())
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
defer conn.Close()
|
|
rows, err := conn.QueryContext(context.Background(), "SELECT DISTINCT (domain) FROM IDs ORDER BY domain;")
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
sources := make([]string, 0, 10)
|
|
for rows.Next() {
|
|
var source string
|
|
if err = rows.Scan(&source); err != nil {
|
|
rows.Close()
|
|
return hashes, err
|
|
}
|
|
sources = append(sources, source)
|
|
}
|
|
for _, source := range sources {
|
|
rows, err = conn.QueryContext(context.Background(), "SELECT IDs.id,Hashes.hash,Hashes.kind FROM Hashes JOIN id_hash ON id_hash.hashid = hashes.rowid JOIN IDs ON IDs.rowid = id_hash.idid WHERE IDs.domain = ? ORDER BY IDs.ID,Hashes.kind,Hashes.hash;", source)
|
|
if err != nil {
|
|
rows.Close()
|
|
return hashes, err
|
|
}
|
|
var (
|
|
id string
|
|
hash int64
|
|
typ goimagehash.Kind
|
|
)
|
|
err = rows.Scan(&id, &hash, &typ)
|
|
if err != nil {
|
|
return hashes, err
|
|
}
|
|
_, ok := hashes[Source(source)]
|
|
if !ok {
|
|
hashes[Source(source)] = make(map[string][3]uint64)
|
|
}
|
|
h := hashes[Source(source)][id]
|
|
h[typ-1] = uint64(hash)
|
|
hashes[Source(source)][id] = h
|
|
}
|
|
return hashes, nil
|
|
}
|
|
|
|
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) {
|
|
for _, ids := range newIDs {
|
|
var oldIDID, newIDID int
|
|
_, err := s.db.Exec(`INSERT INTO IDs domain,id VALUES (?,?)`, ids.NewID.Domain, ids.NewID.ID)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.NewID.Domain, ids.NewID.ID)
|
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
|
panic(err)
|
|
}
|
|
if rows.Next() {
|
|
rows.Scan(&newIDID)
|
|
} else {
|
|
panic("Unable to insert New ID into database")
|
|
}
|
|
rows.Close()
|
|
rows, err = s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.OldID.Domain, ids.OldID.ID)
|
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
|
panic(err)
|
|
}
|
|
if rows.Next() {
|
|
rows.Scan(&oldIDID)
|
|
} else {
|
|
continue
|
|
}
|
|
_, err = s.db.Exec(`INSERT INTO id_hash (hashid, id_id) SELECT hashid,? FROM id_hash where id_id=?`, newIDID, oldIDID)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *sqliteStorage) GetIDs(id ID) IDList {
|
|
var idid int
|
|
rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, id.Domain, id.ID)
|
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
|
panic(err)
|
|
}
|
|
if rows.Next() {
|
|
rows.Scan(&idid)
|
|
} else {
|
|
return nil
|
|
}
|
|
rows, err = s.db.Query(`SELECT id_hash FROM id_hash WHERE id_id=?`, idid)
|
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
|
panic(err)
|
|
}
|
|
var hashIDs []interface{}
|
|
for rows.Next() {
|
|
var hashID int
|
|
rows.Scan(&hashID)
|
|
hashIDs = append(hashIDs, hashID)
|
|
}
|
|
rows.Close()
|
|
|
|
IDs := make(IDList)
|
|
rows, err = s.db.Query(`SELECT IDs.domain,IDs.id FROM id_hash JOIN IDs ON id_hash.idid==IDs.idid WHERE hash_id in (`+strings.TrimRight(strings.Repeat("?,", len(hashIDs)), ",")+`)`, hashIDs...)
|
|
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
|
panic(err)
|
|
}
|
|
for rows.Next() {
|
|
var id ID
|
|
rows.Scan(&id.Domain, id.ID)
|
|
IDs[id.Domain] = append(IDs[id.Domain], id.ID)
|
|
}
|
|
return IDs
|
|
}
|
|
|
|
func NewSqliteStorage(db, path string) (HashStorage, error) {
|
|
sqlite := &sqliteStorage{}
|
|
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
sqlite.db = sqlDB
|
|
_, err = sqlite.db.Exec(`
|
|
PRAGMA foreign_keys=ON;
|
|
CREATE TABLE IF NOT EXISTS Hashes(
|
|
hashid INTEGER PRIMARY KEY,
|
|
hash INT NOT NULL,
|
|
kind int NOT NULL,
|
|
UNIQUE(kind, hash)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
|
|
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
|
|
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
|
|
|
|
CREATE TABLE IF NOT EXISTS IDs(
|
|
id TEXT NOT NULL,
|
|
domain TEXT NOT NULL,
|
|
idid INTEGER PRIMARY KEY,
|
|
UNIQUE (domain, id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
|
|
|
|
CREATE TABLE IF NOT EXISTS id_hash(
|
|
hashid INTEGER,
|
|
idid INTEGER,
|
|
FOREIGN KEY(hashid) REFERENCES Hashes(hashid),
|
|
FOREIGN KEY(idid) REFERENCES IDs(idid)
|
|
UNIQUE (hashid, idid)
|
|
);
|
|
|
|
`)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
sqlite.db.SetMaxOpenConns(1)
|
|
return sqlite, nil
|
|
}
|