Update internal hash storage

This commit is contained in:
Timmy Welch 2025-02-22 13:45:41 -08:00
parent 6452f2e50d
commit 130b7dec4a
15 changed files with 1288 additions and 738 deletions

View File

@ -14,155 +14,215 @@ import (
type basicMapStorage struct {
hashMutex *sync.RWMutex
ids map[ID]*[]ID
hashes [3][]structHash
ids map[ID]*[]ID
aHashes []SavedHash
dHashes []SavedHash
pHashes []SavedHash
}
type structHash struct {
hash uint64
ids *[]ID
}
// atleast must have a read lock before using
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
hashType := int(hashKind) - 1
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
for _, storedHash := range b.hashes[hashType] {
distance := bits.OnesCount64(searchHash ^ storedHash.hash)
mappedIds := map[*[]ID]bool{}
for _, storedHash := range *b.getCurrentHashes(kind) {
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
if distance <= maxDistance {
matchingHashes = append(matchingHashes, Result{ToIDList(*storedHash.ids), distance, Hash{storedHash.hash, hashKind}})
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
}
}
return matchingHashes
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var foundMatches []Result
resetTime()
defer logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
var (
foundMatches []Result
tl timeLog
)
tl.resetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
hashType := int(hash.Kind) - 1
b.hashMutex.RLock()
index, hashFound := b.findHash(hashType, hash.Hash)
if hashFound {
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: hash,
IDs: ToIDList(*b.hashes[hashType][index].ids),
})
mappedIds := map[*[]ID]bool{}
index, count := b.findHash(hash)
if count > 0 {
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*b.ids[storedHash.ID]),
})
}
}
b.hashMutex.RUnlock()
}
logTime("Search Exact")
// If we have exact matches don't bother with other matches
if len(foundMatches) > 0 && exactOnly {
return foundMatches, nil
}
tl.logTime("Search Exact")
return foundMatches, nil
}
foundHashes := make(map[uint64]struct{})
totalPartialHashes := 0
for _, hash := range hashes {
for _, match := range b.Atleast(hash.Kind, max, hash.Hash) {
_, alreadyMatched := foundHashes[match.Hash.Hash]
if alreadyMatched {
continue
}
foundHashes[match.Hash.Hash] = struct{}{}
foundMatches = append(foundMatches, match)
}
foundMatches = append(foundMatches, b.atleast(hash.Kind, max, hash.Hash)...)
}
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
return foundMatches, nil
}
// findHash must have a read lock before using
func (b *basicMapStorage) findHash(hashType int, hash uint64) (int, bool) {
return slices.BinarySearchFunc(b.hashes[hashType], hash, func(e structHash, t uint64) int {
return cmp.Compare(e.hash, t)
})
// getCurrentHashes must have a read lock before using
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]SavedHash {
if kind == goimagehash.AHash {
return &b.aHashes
}
if kind == goimagehash.DHash {
return &b.dHashes
}
if kind == goimagehash.PHash {
return &b.pHashes
}
panic("Unknown hash type: " + kind.String())
}
// insertHash will take a write lock if the hash is not found
func (b *basicMapStorage) insertHash(hashType int, hash uint64, ids *[]ID) {
b.hashMutex.RLock()
index, hashFound := b.findHash(hashType, hash)
b.hashMutex.RUnlock()
if hashFound {
return
// findHash must have a read lock before using
// return value is index, count
// if count < 1 then no results were found
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
currentHashes := *b.getCurrentHashes(hash.Kind)
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing SavedHash, target Hash) int {
return cmp.Compare(existing.Hash.Hash, target.Hash)
})
if !found {
return index, 0
}
count := 0
for i := index + 1; i < len(currentHashes) && currentHashes[i].Hash.Hash == hash.Hash; i++ {
count++
}
return index, count
}
// insertHash must already have a lock
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
currentHashes := b.getCurrentHashes(hash.Kind)
index, count := b.findHash(hash)
max := index + count
for ; index < max; index++ {
if (*currentHashes)[index].ID == id {
return
}
}
*currentHashes = slices.Insert(*currentHashes, index, SavedHash{hash, id})
if _, mapped := b.ids[id]; !mapped {
b.ids[id] = &[]ID{id}
}
b.hashMutex.Lock()
b.hashes[hashType] = slices.Insert(b.hashes[hashType], index, structHash{hash, ids})
b.hashMutex.Unlock()
}
func (b *basicMapStorage) MapHashes(hash ImageHash) {
b.hashMutex.Lock()
defer b.hashMutex.Unlock()
for _, ih := range hash.Hashes {
var (
hashType = int(ih.Kind) - 1
)
b.hashMutex.RLock()
ids, ok := b.ids[hash.ID]
b.hashMutex.RUnlock()
if !ok {
b.hashMutex.Lock()
ids = &[]ID{hash.ID}
b.ids[hash.ID] = ids
b.hashMutex.Unlock()
}
b.insertHash(hashType, ih.Hash, ids)
b.insertHash(ih, hash.ID)
}
}
// DecodeHashes should already have a lock
// DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
for hashType, sourceHashes := range hashes.Hashes {
b.hashes[hashType] = make([]structHash, len(sourceHashes))
for savedHash, idlistLocation := range sourceHashes {
b.hashes[hashType] = append(b.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
for _, id := range hashes.IDs[idlistLocation] {
b.ids[id] = &hashes.IDs[idlistLocation]
}
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
for _, id := range ids {
b.ids[id] = &ids
}
}
for hashType := range b.hashes {
slices.SortFunc(b.hashes[hashType], func(a, b structHash) int {
return cmp.Compare(a.hash, b.hash)
})
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
})
// Assume they are probably fairly equally split between hash types
b.aHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
b.dHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
b.pHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
for _, savedHash := range hashes.Hashes {
if savedHash.Hash.Kind == goimagehash.AHash {
b.aHashes = append(b.aHashes, savedHash)
}
if savedHash.Hash.Kind == goimagehash.DHash {
b.dHashes = append(b.dHashes, savedHash)
}
if savedHash.Hash.Kind == goimagehash.PHash {
b.pHashes = append(b.pHashes, savedHash)
}
if savedHash.ID == (ID{}) {
fmt.Println("Empty ID detected")
panic(savedHash)
}
// All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := b.ids[savedHash.ID]; !ok {
b.ids[savedHash.ID] = &[]ID{savedHash.ID}
}
}
hashCmp := func(existing, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
}
slices.SortFunc(b.aHashes, hashCmp)
slices.SortFunc(b.dHashes, hashCmp)
slices.SortFunc(b.pHashes, hashCmp)
return nil
}
// EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
hashes := SavedHashes{
Hashes: [3]map[uint64]int{
make(map[uint64]int),
make(map[uint64]int),
make(map[uint64]int),
},
savedHashes := SavedHashes{
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
}
idmap := map[*[]ID]int{}
// Only keep groups >1 as they will be mapped in SavedHashes.Hashes
for _, ids := range b.ids {
if _, ok := idmap[ids]; ok {
continue
if len(*ids) > 1 {
savedHashes.IDs = append(savedHashes.IDs, *ids)
}
idmap[ids] = len(hashes.IDs)
hashes.IDs = append(hashes.IDs, *ids)
}
for hashType, hashToID := range b.hashes {
for _, hash := range hashToID {
hashes.Hashes[hashType][hash.hash] = idmap[hash.ids]
}
}
return hashes, nil
savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
savedHashes.Hashes = append(savedHashes.Hashes, b.pHashes...)
return savedHashes, nil
}
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
@ -171,7 +231,7 @@ func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
ids, found := b.ids[newid.OldID]
b.hashMutex.RUnlock()
if !found {
msg := "No IDs belonging to " + string(newid.OldID.Domain) + " exist on this server"
msg := "ID not found on this server"
return errors.New(msg)
}
b.hashMutex.Lock()
@ -195,7 +255,9 @@ func NewBasicMapStorage() (HashStorage, error) {
storage := &basicMapStorage{
hashMutex: &sync.RWMutex{},
ids: make(map[ID]*[]ID),
hashes: [3][]structHash{},
aHashes: []SavedHash{},
dHashes: []SavedHash{},
pHashes: []SavedHash{},
}
return storage, nil
}

113
CHDB.go
View File

@ -1,108 +1,11 @@
package ch
import (
"database/sql"
"fmt"
"log"
"os"
"path/filepath"
_ "modernc.org/sqlite"
)
type CHDB struct {
comicvinePath string
sql *sql.DB
deleteExisting bool
}
func OpenCHDB(path string, comicvinePath string, deleteExisting bool) (CHDB, error) {
path, _ = filepath.Abs(path)
err := os.MkdirAll(filepath.Dir(path), 0o755)
if err != nil {
panic("Unable to create directory " + filepath.Dir(path))
}
println(fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
sql, err := sql.Open("sqlite", fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
if err != nil {
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
}
err = sql.Ping()
if err != nil {
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
}
_, err = sql.Exec(`
CREATE TABLE IF NOT EXISTS paths(
path STRING PRIMARY KEY
);
CREATE TABLE IF NOT EXISTS bad_urls(
url STRING PRIMARY KEY
);
`)
if err != nil {
err = fmt.Errorf("Failed to create table: %w", err)
}
return CHDB{comicvinePath, sql, deleteExisting}, err
}
func (s CHDB) PathHashed(path string) bool {
path, _ = filepath.Rel(s.comicvinePath, path)
dbPath := ""
if s.deleteExisting {
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
if dbPath == path {
os.Remove(filepath.Join(s.comicvinePath, path))
}
return dbPath == path
}
count := 0
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", path).Scan(&count)
return count > 0
}
func (s CHDB) PathDownloaded(path string) bool {
relPath, _ := filepath.Rel(s.comicvinePath, path)
count := 0
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", relPath).Scan(&count)
if count != 1 {
f, err := os.Open(path)
if err == nil {
defer f.Close()
}
return !os.IsNotExist(err)
}
return true
}
func (s CHDB) AddPath(path string) {
relPath, _ := filepath.Rel(s.comicvinePath, path)
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath)
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err))
}
if s.deleteExisting {
_ = os.Remove(path)
_ = RmdirP(filepath.Dir(path))
}
}
func (s CHDB) CheckURL(url string) bool {
count := 0
_ = s.sql.QueryRow("SELECT count(url) FROM bad_urls where url=?", url).Scan(&count)
return count > 0
}
func (s CHDB) AddURL(url string) {
_, err := s.sql.Exec("INSERT INTO bad_urls VALUES(?) ON CONFLICT DO NOTHING", url)
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
}
}
func (s CHDB) Close() error {
return s.sql.Close()
type CHDB interface {
// OpenCHDB(path string, comicvinePath string, deleteExisting bool) (CHDB, error)
PathHashed(path string) bool
PathDownloaded(path string) bool
AddPath(path string)
CheckURL(url string) bool
AddURL(url string)
Close() error
}

177
CHDB_bolt.go Normal file
View File

@ -0,0 +1,177 @@
package ch
import (
"fmt"
"log"
"os"
"path/filepath"
"slices"
bolt "go.etcd.io/bbolt"
)
type CHDBBolt struct {
comicvinePath string
db *bolt.DB
deleteExisting bool
}
func OpenCHDBBolt(path string, comicvinePath string, deleteExisting bool) (CHDBBolt, error) {
path, _ = filepath.Abs(path)
err := os.MkdirAll(filepath.Dir(path), 0o755)
if err != nil {
panic("Unable to create directory " + filepath.Dir(path))
}
db, err := bolt.Open(path, 0o644, nil)
if err != nil {
return CHDBBolt{comicvinePath, db, deleteExisting}, fmt.Errorf("failed to open database: %w", err)
}
err = db.Update(func(tx *bolt.Tx) error {
_, err = tx.CreateBucketIfNotExists([]byte("paths"))
if err != nil {
return fmt.Errorf("failed to create bucket %v: %w", "paths", err)
}
_, err = tx.CreateBucketIfNotExists([]byte("bad_urls"))
if err != nil {
return fmt.Errorf("failed to create bucket %v: %w", "paths", err)
}
return nil
})
if err != nil {
db.Close()
return CHDBBolt{comicvinePath, db, deleteExisting}, fmt.Errorf("failed to init database: %w", err)
}
return CHDBBolt{comicvinePath, db, deleteExisting}, nil
}
func (c CHDBBolt) Import(paths []string, bad_urls []string) {
slices.Sort(paths)
slices.Sort(bad_urls)
c.db.Update(func(tx *bolt.Tx) error {
p := tx.Bucket([]byte("paths"))
b := tx.Bucket([]byte("bad_urls"))
for _, path := range paths {
p.Put([]byte(path), []byte{})
}
for _, url := range bad_urls {
b.Put([]byte(url), []byte{})
}
return nil
})
}
func (c CHDBBolt) Dump() (paths []string, bad_urls []string) {
c.db.View(func(tx *bolt.Tx) error {
p := tx.Bucket([]byte("paths"))
b := tx.Bucket([]byte("bad_urls"))
paths = make([]string, 0, p.Inspect().KeyN)
bad_urls = make([]string, 0, b.Inspect().KeyN)
b.ForEach(func(k, v []byte) error {
bad_urls = append(bad_urls, string(k)+"")
return nil
})
p.ForEach(func(k, v []byte) error {
paths = append(paths, string(k)+"")
return nil
})
return nil
})
return paths, bad_urls
}
func (c CHDBBolt) PathHashed(path string) bool {
path, _ = filepath.Rel(c.comicvinePath, path)
tx, err := c.db.Begin(false)
if err != nil {
return false
}
defer tx.Rollback()
b := tx.Bucket([]byte("paths"))
dbRes := b.Get([]byte(path))
if dbRes != nil {
if c.deleteExisting {
os.Remove(filepath.Join(c.comicvinePath, path))
}
return true
}
return false
}
func (c CHDBBolt) PathDownloaded(path string) bool {
relPath, _ := filepath.Rel(c.comicvinePath, path)
tx, err := c.db.Begin(false)
if err != nil {
return false
}
defer tx.Rollback()
b := tx.Bucket([]byte("paths"))
dbRes := b.Get([]byte(relPath))
if dbRes == nil {
f, err := os.Open(path)
if err == nil {
defer f.Close()
}
return !os.IsNotExist(err)
}
return true
}
func (c CHDBBolt) AddPath(path string) {
relPath, _ := filepath.Rel(c.comicvinePath, path)
tx, err := c.db.Begin(true)
if err != nil {
c.db.Logger().Errorf("Failed to open transaction: %v", err)
}
defer tx.Rollback()
b := tx.Bucket([]byte("paths"))
err = b.Put([]byte(relPath), []byte{})
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v (%v) into paths: %w", path, relPath, err))
}
tx.Commit()
if c.deleteExisting {
_ = os.Remove(path)
_ = RmdirP(filepath.Dir(path))
}
}
func (c CHDBBolt) CheckURL(url string) bool {
tx, err := c.db.Begin(true)
if err != nil {
return false
}
defer tx.Rollback()
b := tx.Bucket([]byte("bad_urls"))
return b.Get([]byte(url)) != nil
}
func (c CHDBBolt) AddURL(url string) {
tx, err := c.db.Begin(true)
if err != nil {
c.db.Logger().Errorf("Failed to open transaction: %v", err)
}
defer tx.Rollback()
b := tx.Bucket([]byte("bad_urls"))
err = b.Put([]byte(url), []byte{})
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
}
tx.Commit()
}
func (c CHDBBolt) Close() error {
return c.db.Close()
}

142
CHDB_sqlite.go Normal file
View File

@ -0,0 +1,142 @@
package ch
import (
"database/sql"
"fmt"
"log"
"os"
"path/filepath"
_ "modernc.org/sqlite"
)
type CHDBSqlite struct {
comicvinePath string
sql *sql.DB
deleteExisting bool
}
func OpenCHDBSqlite(path string, comicvinePath string, deleteExisting bool) (CHDBSqlite, error) {
path, _ = filepath.Abs(path)
err := os.MkdirAll(filepath.Dir(path), 0o755)
if err != nil {
panic("Unable to create directory " + filepath.Dir(path))
}
println(fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
sql, err := sql.Open("sqlite", fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
if err != nil {
return CHDBSqlite{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
}
err = sql.Ping()
if err != nil {
return CHDBSqlite{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
}
_, err = sql.Exec(`
CREATE TABLE IF NOT EXISTS paths(
path STRING PRIMARY KEY
);
CREATE TABLE IF NOT EXISTS bad_urls(
url STRING PRIMARY KEY
);
`)
if err != nil {
err = fmt.Errorf("Failed to create table: %w", err)
}
return CHDBSqlite{comicvinePath, sql, deleteExisting}, err
}
func (s CHDBSqlite) Dump() (paths []string, bad_urls []string) {
rows, err := s.sql.Query("SELECT path from paths")
if err != nil {
panic(err)
}
for rows.Next() {
var value string
err = rows.Scan(&value)
if err != nil {
panic(err)
}
paths = append(paths, value)
}
rows.Close()
rows, err = s.sql.Query("SELECT url from bad_urls")
if err != nil {
panic(err)
}
for rows.Next() {
var value string
err = rows.Scan(&value)
if err != nil {
panic(err)
}
bad_urls = append(bad_urls, value)
}
rows.Close()
return paths, bad_urls
}
func (s CHDBSqlite) PathHashed(path string) bool {
path, _ = filepath.Rel(s.comicvinePath, path)
dbPath := ""
if s.deleteExisting {
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
if dbPath == path {
os.Remove(filepath.Join(s.comicvinePath, path))
}
return dbPath == path
}
count := 0
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", path).Scan(&count)
return count > 0
}
func (s CHDBSqlite) PathDownloaded(path string) bool {
relPath, _ := filepath.Rel(s.comicvinePath, path)
count := 0
_ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", relPath).Scan(&count)
if count != 1 {
f, err := os.Open(path)
if err == nil {
defer f.Close()
}
return !os.IsNotExist(err)
}
return true
}
func (s CHDBSqlite) AddPath(path string) {
relPath, _ := filepath.Rel(s.comicvinePath, path)
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath)
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err))
}
if s.deleteExisting {
_ = os.Remove(path)
_ = RmdirP(filepath.Dir(path))
}
}
func (s CHDBSqlite) CheckURL(url string) bool {
count := 0
_ = s.sql.QueryRow("SELECT count(url) FROM bad_urls where url=?", url).Scan(&count)
return count > 0
}
func (s CHDBSqlite) AddURL(url string) {
_, err := s.sql.Exec("INSERT INTO bad_urls VALUES(?) ON CONFLICT DO NOTHING", url)
if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
}
}
func (s CHDBSqlite) Close() error {
return s.sql.Close()
}

31
cmd/bolt-migrate/main.go Normal file
View File

@ -0,0 +1,31 @@
package main
import (
"fmt"
"os"
ch "gitea.narnian.us/lordwelch/comic-hasher"
)
func main() {
fmt.Printf("cv path: %s Sqlite path: %s Bolt path: %s\n", os.Args[1], os.Args[2], os.Args[3])
sql, err := ch.OpenCHDBSqlite(os.Args[2], os.Args[1], false)
if err != nil {
panic(err)
}
db, err := ch.OpenCHDBBolt(os.Args[3], os.Args[1], false)
if err != nil {
panic(err)
}
paths, bad_urls := sql.Dump()
fmt.Printf("Dumped %d %d", len(paths), len(bad_urls))
db.Import(paths, bad_urls)
// for _, path := range paths {
// db.AddPath(filepath.Join(os.Args[1], path))
// }
// for _, url := range bad_urls {
// db.AddURL(url)
// }
sql.Close()
db.Close()
}

View File

@ -29,10 +29,9 @@ import (
"sync"
"time"
"github.com/disintegration/imaging"
"github.com/kr/pretty"
"github.com/vmihailenco/msgpack/v5"
_ "golang.org/x/image/tiff"
_ "golang.org/x/image/vp8"
_ "golang.org/x/image/vp8l"
@ -57,23 +56,6 @@ type Server struct {
onlyHashNewIDs bool
}
type Format int
const (
Msgpack = iota + 1
JSON
)
var formatNames = map[Format]string{
JSON: "json",
Msgpack: "msgpack",
}
var formatValues = map[string]Format{
"json": JSON,
"msgpack": Msgpack,
}
var bufPool = &sync.Pool{
New: func() any {
// The Pool's New function should generally only return pointer
@ -83,22 +65,6 @@ var bufPool = &sync.Pool{
},
}
func (f Format) String() string {
if name, known := formatNames[f]; known {
return name
}
return "Unknown"
}
func (f *Format) Set(s string) error {
if format, known := formatValues[strings.ToLower(s)]; known {
*f = format
} else {
return fmt.Errorf("Unknown format: %d", f)
}
return nil
}
type Storage int
const (
@ -141,8 +107,6 @@ func (f *Storage) Set(s string) error {
return nil
}
type Encoder func(any) ([]byte, error)
type Decoder func([]byte, interface{}) error
type CVOpts struct {
downloadCovers bool
APIKey string
@ -158,7 +122,7 @@ type Opts struct {
sqlitePath string
loadEmbeddedHashes bool
saveEmbeddedHashes bool
format Format
format ch.Format
hashesPath string
storageType Storage
onlyHashNewIDs bool
@ -169,7 +133,7 @@ type Opts struct {
}
func main() {
opts := Opts{format: Msgpack, storageType: BasicMap} // flag is weird
opts := Opts{format: ch.Msgpack, storageType: BasicMap} // flag is weird
wd, err := os.Getwd()
fmt.Println(err)
if err != nil {
@ -208,13 +172,11 @@ func main() {
panic(err)
}
}
// opts.onlyHashNewIDs = opts.onlyHashNewIDs || opts.deleteHashedImages
if opts.cv.downloadCovers {
if opts.cv.APIKey == "" {
log.Fatal("No ComicVine API Key provided")
}
}
opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && (opts.deleteHashedImages || !opts.cv.keepDownloaded))
opts.path, _ = filepath.Abs(opts.path)
if opts.hashesPath == "" {
opts.hashesPath = filepath.Join(opts.path, "hashes.gz")
@ -230,9 +192,7 @@ func main() {
opts.cv.path, _ = filepath.Abs(opts.cv.path)
pretty.Log(opts)
if !opts.cv.keepDownloaded && opts.onlyHashNewIDs {
panic("You need to fix your -cv-keep-downloaded and -only-hash-new-ids flags")
}
// TODO: Fix options
startServer(opts)
}
@ -553,9 +513,7 @@ func (s *Server) hasher(workerID int, done func(int)) {
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
// TODO: Check channel pipelines
case s.mappingQueue <- hash:
default:
}
@ -589,59 +547,12 @@ func (s *Server) reader(workerID int, done func(i int)) {
NewOnly: s.onlyHashNewIDs,
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
case s.hashingQueue <- im:
default:
}
}
}
// EncodeHashes must have a lock to s.hashMutex
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
var encoder Encoder
switch format {
case Msgpack:
encoder = msgpack.Marshal
case JSON:
encoder = json.Marshal
default:
return nil, fmt.Errorf("Unknown format: %v", format)
}
hashes, err := s.hashes.EncodeHashes()
if err != nil {
return nil, err
}
return encoder(hashes)
}
// DecodeHashes must have a lock to s.hashMutex
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
var decoder Decoder
switch format {
case Msgpack:
decoder = msgpack.Unmarshal
case JSON:
decoder = json.Unmarshal
default:
return fmt.Errorf("Unknown format: %v", format)
}
loadedHashes := ch.SavedHashes{}
err := decoder(hashes, &loadedHashes)
if err != nil || len(loadedHashes.Hashes[0]) == 0 {
fmt.Println("Failed to load hashes, checking if they are old hashes", format, ":", err)
oldHashes := make(ch.OldSavedHashes)
if err = decoder(hashes, &oldHashes); err != nil {
return err
}
loadedHashes = ch.ConvertSavedHashes(oldHashes)
}
return s.hashes.DecodeHashes(loadedHashes)
}
func (s *Server) HashLocalImages(opts Opts) {
if opts.coverPath == "" {
return
@ -700,28 +611,17 @@ func initializeStorage(opts Opts) (ch.HashStorage, error) {
return nil, errors.New("Unknown storage type provided")
}
func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error) {
func loadHashes(opts Opts) *ch.SavedHashes {
var hashes []byte
if opts.loadEmbeddedHashes && len(ch.Hashes) != 0 {
fmt.Println("Loading embedded hashes")
var err error
hashes := ch.Hashes
hashes = ch.Hashes
if gr, err := gzip.NewReader(bytes.NewReader(ch.Hashes)); err == nil {
hashes, err = io.ReadAll(gr)
if err != nil {
panic(fmt.Sprintf("Failed to read embedded hashes: %s", err))
}
}
var format Format
for _, format = range []Format{Msgpack, JSON} {
if err = decodeHashes(format, hashes); err == nil {
break
}
}
if err != nil {
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
}
fmt.Printf("Loaded embedded %s hashes\n", format)
} else {
fmt.Println("Loading saved hashes")
if f, err := os.Open(opts.hashesPath); err == nil {
@ -731,64 +631,67 @@ func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error
} else {
_, _ = f.Seek(0, io.SeekStart)
}
hashes, err := io.ReadAll(buf)
hashes, err = io.ReadAll(buf)
f.Close()
if err != nil {
panic(fmt.Sprintf("Failed to load hashes from disk: %s", err))
}
var format Format
for _, format = range []Format{Msgpack, JSON} {
if err = decodeHashes(format, hashes); err == nil {
break
}
}
if err != nil {
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
}
fmt.Printf("Loaded %s hashes from %q\n", format, opts.hashesPath)
} else {
if errors.Is(err, os.ErrNotExist) {
log.Println("No saved hashes to load")
} else {
log.Println("Unable to load saved hashes", err)
}
return nil
}
}
var (
format ch.Format
loadedHashes *ch.SavedHashes
err error
)
for _, format = range []ch.Format{ch.Msgpack, ch.JSON} {
if loadedHashes, err = ch.DecodeHashes(format, hashes); errors.Is(err, ch.DecodeError) {
continue
}
break
}
if err != nil {
panic(fmt.Sprintf("Failed to decode hashes: %s", err))
}
fmt.Printf("Loaded %s hashes\n", format)
return loadedHashes
}
func saveHashes(opts Opts, encodeHashes func(format Format) ([]byte, error)) {
if !opts.loadEmbeddedHashes || opts.saveEmbeddedHashes {
encodedHashes, err := encodeHashes(opts.format)
if err == nil {
if f, err := os.Create(opts.hashesPath); err == nil {
failed := false
gzw := gzip.NewWriter(f)
_, err := gzw.Write(encodedHashes)
if err != nil {
log.Println("Failed to write hashes", err)
failed = true
}
err = gzw.Close()
if err != nil {
log.Println("Failed to write hashes", err)
failed = true
}
err = f.Close()
if err != nil {
log.Println("Failed to write hashes", err)
failed = true
}
if !failed {
log.Println("Successfully saved hashes")
}
} else {
log.Println("Unabled to save hashes", err)
}
} else {
fmt.Printf("Unable to encode hashes as %v: %v", opts.format, err)
}
func saveHashes(opts Opts, hashes ch.SavedHashes) error {
if opts.loadEmbeddedHashes && !opts.saveEmbeddedHashes {
return errors.New("refusing to save embedded hashes")
}
encodedHashes, err := ch.EncodeHashes(hashes, opts.format)
if err != nil {
return fmt.Errorf("unable to encode hashes as %v: %w", opts.format, err)
}
f, err := os.Create(opts.hashesPath)
if err != nil {
return fmt.Errorf("unabled to save hashes: %w", err)
}
gzw := gzip.NewWriter(f)
if _, err = gzw.Write(encodedHashes); err != nil {
return fmt.Errorf("failed to write hashes: %w", err)
}
if err = gzw.Close(); err != nil {
return fmt.Errorf("failed to write hashes: %w", err)
}
if err = f.Close(); err != nil {
return fmt.Errorf("failed to write hashes: %w", err)
}
log.Println("Successfully saved hashes")
return nil
}
func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, server Server) {
@ -803,7 +706,6 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
}
if chdb.PathHashed(path.Dest) {
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
continue
}
var (
@ -832,7 +734,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
}
continue // skip this image
}
chdb.AddPath(path.Dest) // Add to sqlite db and remove file if opts.deleteHashedImages is true
chdb.AddPath(path.Dest) // Add to db and remove file if opts.deleteHashedImages is true
im := ch.Im{
Im: i,
@ -845,6 +747,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
}
func startServer(opts Opts) {
imaging.SetMaxProcs(2)
if opts.cpuprofile != "" {
f, err := os.Create(opts.cpuprofile)
if err != nil {
@ -904,31 +807,37 @@ func startServer(opts Opts) {
mwg.Add(1)
go server.mapper(func() { log.Println("Mapper 0 completed"); mwg.Done() })
// server.DecodeHashes would normally need a write lock
// DecodeHashes would normally need a write lock
// nothing else has been started yet so we don't need one
loadHashes(opts, server.DecodeHashes)
if err := server.hashes.DecodeHashes(*loadHashes(opts)); err != nil {
panic(err)
}
server.HashLocalImages(opts)
chdb, err := ch.OpenCHDB(filepath.Join(opts.path, "ch.sqlite"), opts.cv.path, opts.deleteHashedImages)
chdb, err := ch.OpenCHDBBolt(filepath.Join(opts.path, "chdb.bolt"), opts.cv.path, opts.deleteHashedImages)
if err != nil {
panic(err)
}
log.Println("Init downloaders")
dwg := sync.WaitGroup{}
dcwg := sync.WaitGroup{}
finishedDownloadQueue := make(chan cv.Download, 1)
go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
dcwg.Add(1)
go func() {
defer dcwg.Done()
downloadProcessor(chdb, opts, finishedDownloadQueue, server)
}()
if opts.cv.downloadCovers {
dwg.Add(1)
imageTypes := []string{}
if opts.cv.thumbOnly {
imageTypes = append(imageTypes, "thumb_url")
}
if opts.cv.originalOnly {
} else if opts.cv.originalOnly {
imageTypes = append(imageTypes, "original_url")
}
cvdownloader := cv.NewCVDownloader(server.Context, bufPool, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue)
cvdownloader := cv.NewCVDownloader(server.Context, bufPool, opts.onlyHashNewIDs, server.hashes.GetIDs, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue)
go func() {
defer dwg.Done()
cv.DownloadCovers(cvdownloader)
@ -954,7 +863,8 @@ func startServer(opts Opts) {
close(server.readerQueue)
log.Println("waiting on readers")
rwg.Wait()
for range server.readerQueue {
for dw := range server.readerQueue {
fmt.Println("Skipping read", dw)
}
log.Println("waiting on downloaders")
@ -962,28 +872,39 @@ func startServer(opts Opts) {
log.Println("waiting on downloader")
close(finishedDownloadQueue)
for range finishedDownloadQueue {
dcwg.Wait() // Wait for the download processor to finish
for dw := range finishedDownloadQueue {
fmt.Println("Skipping download", dw.IssueID)
}
// close(server.hashingQueue) // Closed by downloadProcessor
log.Println("waiting on hashers")
hwg.Wait()
for range server.hashingQueue {
for dw := range server.hashingQueue {
fmt.Println("Skipping hashing", dw.ID)
}
close(server.mappingQueue)
log.Println("waiting on mapper")
mwg.Wait()
for range server.mappingQueue {
for dw := range server.mappingQueue {
fmt.Println("Skipping mapping", dw.ID)
}
close(server.signalQueue)
for range server.signalQueue {
for dw := range server.signalQueue {
fmt.Println("Skipping", dw)
}
_ = chdb.Close()
// server.EncodeHashes would normally need a read lock
// the server has been stopped so it's not needed here
saveHashes(opts, server.EncodeHashes)
hashes, err := server.hashes.EncodeHashes()
if err != nil {
panic(fmt.Errorf("Failed to save hashes: %w", err))
}
if err = saveHashes(opts, hashes); err != nil {
panic(err)
}
}

View File

@ -73,14 +73,16 @@ type CVDownloader struct {
Context context.Context
FinishedDownloadQueue chan Download
fileList []string
totalResults int
imageWG sync.WaitGroup
downloadQueue chan *CVResult
imageDownloads chan download
notFound chan download
chdb ch.CHDB
bufPool *sync.Pool
fileList []string
totalResults int
imageWG sync.WaitGroup
downloadQueue chan *CVResult
imageDownloads chan download
notFound chan download
chdb ch.CHDB
bufPool *sync.Pool
get_id func(id ch.ID) ch.IDList
only_hash_new_ids bool
}
var (
@ -128,8 +130,8 @@ func (c *CVDownloader) loadIssues(filename string) (*CVResult, error) {
return tmp, nil
}
func Get(ctx context.Context, url string) (*http.Response, error, func()) {
ctx, cancel := context.WithTimeout(ctx, time.Second*20)
func Get(url string) (*http.Response, error, func()) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*20)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err, cancel
@ -144,7 +146,7 @@ func getOffset(name string) int {
}
// updateIssues c.downloadQueue must not be closed before this function has returned
func (c *CVDownloader) updateIssues() {
func (c *CVDownloader) updateIssues() int {
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,image,volume")
if err != nil {
log.Fatal(err)
@ -183,7 +185,7 @@ func (c *CVDownloader) updateIssues() {
for offset = 0; offset <= c.totalResults; offset += 100 {
index := offset / 100
if c.hasQuit() {
return
return offset - 100
}
if index < len(c.fileList) {
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
@ -195,7 +197,7 @@ func (c *CVDownloader) updateIssues() {
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
@ -231,7 +233,7 @@ func (c *CVDownloader) updateIssues() {
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
@ -255,17 +257,17 @@ func (c *CVDownloader) updateIssues() {
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return
return offset - 100
case <-time.After(10 * time.Second):
}
resp, err, cancelDownloadCTX := Get(c.Context, URI.String())
resp, err, cancelDownloadCTX := Get(URI.String())
if err != nil {
cancelDownloadCTX()
if retry(URI.String(), err) {
continue
}
// Fail and let comic-hasher try the whole thing again later
return
return offset - 100
}
if resp.StatusCode != 200 {
cancelDownloadCTX()
@ -277,7 +279,7 @@ func (c *CVDownloader) updateIssues() {
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
_ = resp.Body.Close()
return
return offset - 100
case <-time.After(1 * time.Hour):
}
}
@ -295,7 +297,7 @@ func (c *CVDownloader) updateIssues() {
if retry(URI.String(), err) {
continue
}
return
return offset - 100
}
cancelDownloadCTX()
if issue.NumberOfTotalResults > c.totalResults {
@ -303,15 +305,13 @@ func (c *CVDownloader) updateIssues() {
}
prev = -1
failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
select {
case <-c.Context.Done():
return
case c.downloadQueue <- issue:
}
c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset))
log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset)
}
return offset
}
type download struct {
@ -328,16 +328,9 @@ func (c *CVDownloader) start_downloader() {
go func() {
log.Println("starting downloader", i)
for dl := range c.imageDownloads {
if c.hasQuit() {
c.imageWG.Done()
continue // We must continue so that c.imageWG will complete otherwise it will hang forever
}
if dl.finished {
select {
case <-c.Context.Done():
c.imageWG.Done()
continue
case c.FinishedDownloadQueue <- Download{
URL: dl.url,
Dest: dl.dest,
@ -348,7 +341,7 @@ func (c *CVDownloader) start_downloader() {
continue
}
dir := filepath.Dir(dl.dest)
resp, err, cancelDownload := Get(c.Context, dl.url)
resp, err, cancelDownload := Get(dl.url)
if err != nil {
cancelDownload()
log.Println("Failed to download", dl.volumeID, "/", dl.issueID, dl.url, err)
@ -449,9 +442,16 @@ func (c *CVDownloader) downloadImages() {
}
imageURLs := []i{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
for _, image := range imageURLs {
if c.hasQuit() {
return
if strings.HasSuffix(image.url, "6373148-blank.png") {
c.notFound <- download{
url: image.url,
offset: list.Offset,
volumeID: issue.Volume.ID,
issueID: issue.ID,
}
continue
}
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
continue
}
@ -469,6 +469,7 @@ func (c *CVDownloader) downloadImages() {
issueID: issue.ID,
finished: true,
}
continue
}
ext := strings.TrimSuffix(strings.ToLower(path.Ext(uri.Path)), "~original")
if ext == "" || (len(ext) > 4 && !slices.Contains([]string{".avif", ".webp", ".tiff", ".heif"}, ext)) {
@ -477,7 +478,11 @@ func (c *CVDownloader) downloadImages() {
dir := filepath.Join(c.ImagePath, strconv.Itoa(issue.Volume.ID), strconv.Itoa(issue.ID))
path := filepath.Join(dir, image.name+ext)
if c.chdb.PathDownloaded(path) {
ids := c.get_id(ch.ID{
Domain: ch.ComicVine,
ID: strconv.Itoa(issue.ID),
})
if c.chdb.PathDownloaded(path) || c.only_hash_new_ids && len(ids) > 0 {
if _, err = os.Stat(path); c.SendExistingImages && err == nil {
// We don't add to the count of added as these should be processed immediately
log.Printf("Sending Existing image %v/%v %v", issue.Volume.ID, issue.ID, path)
@ -516,8 +521,6 @@ func (c *CVDownloader) downloadImages() {
t := 10 * time.Second
log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited)
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return
case <-time.After(t):
}
} else {
@ -543,9 +546,6 @@ list:
}
for _, issue := range list.Results {
for _, url := range []string{issue.Image.IconURL, issue.Image.MediumURL, issue.Image.ScreenURL, issue.Image.ScreenLargeURL, issue.Image.SmallURL, issue.Image.SuperURL, issue.Image.ThumbURL, issue.Image.TinyURL, issue.Image.OriginalURL} {
if c.hasQuit() {
return ErrQuit
}
if c.chdb.CheckURL(url) {
indexesToRemove = append(indexesToRemove, i)
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile)); err != nil {
@ -590,7 +590,7 @@ func (c *CVDownloader) cleanDirs() {
})
}
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids bool, get_id func(id ch.ID) ch.IDList, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
return &CVDownloader{
Context: ctx,
JSONPath: filepath.Join(workPath, "_json"),
@ -602,6 +602,8 @@ func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, chdb ch.CHDB, work
KeepDownloadedImages: keepDownloadedImages,
ImageTypes: imageTypes,
chdb: chdb,
get_id: get_id,
only_hash_new_ids: only_hash_new_ids,
}
}
@ -609,9 +611,9 @@ func DownloadCovers(c *CVDownloader) {
var (
err error
)
c.downloadQueue = make(chan *CVResult, 100) // This is just json it shouldn't take up much more than 122 MB
c.imageDownloads = make(chan download, 1) // These are just URLs should only take a few MB
c.notFound = make(chan download, 1) // Same here
c.downloadQueue = make(chan *CVResult) // This is just json it shouldn't take up much more than 122 MB
c.imageDownloads = make(chan download, 1) // These are just URLs should only take a few MB
c.notFound = make(chan download, 1) // Same here
os.MkdirAll(c.JSONPath, 0o777)
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
f.Close()
@ -643,7 +645,7 @@ func DownloadCovers(c *CVDownloader) {
dwg.Done()
}()
c.updateIssues()
offset := c.updateIssues()
issueCount := len(c.fileList) * 100
log.Println("Number of issues", issueCount, " expected:", c.totalResults)
@ -654,15 +656,19 @@ func DownloadCovers(c *CVDownloader) {
log.Println("Waiting for downloaders")
dwg.Wait()
close(c.imageDownloads)
for range c.imageDownloads {
for dw := range c.imageDownloads {
fmt.Println("Skipping cv download", dw.issueID)
}
close(c.notFound)
for range c.notFound {
for dw := range c.notFound {
fmt.Println("Skipping not found", dw.issueID)
}
// We drain this at the end because we need to wait for the images to download
for range c.downloadQueue {
for dw := range c.downloadQueue {
fmt.Println("Skipping page download", dw.Offset)
}
log.Println("Completed downloading images")
log.Println("Last offset", offset)
}

38
go.mod
View File

@ -1,25 +1,23 @@
module gitea.narnian.us/lordwelch/comic-hasher
go 1.22.0
go 1.23.0
toolchain go1.22.5
toolchain go1.24.0
require (
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09
github.com/fmartingr/go-comicinfo/v2 v2.0.2
github.com/kr/pretty v0.1.0
github.com/mattn/go-sqlite3 v1.14.24
github.com/mholt/archiver/v4 v4.0.0-alpha.8
github.com/ncruces/go-sqlite3 v0.22.0
golang.org/x/image v0.23.0
golang.org/x/text v0.21.0
github.com/ncruces/go-sqlite3 v0.23.1
github.com/vmihailenco/msgpack v4.0.4+incompatible
go.etcd.io/bbolt v1.4.0
golang.org/x/image v0.24.0
golang.org/x/text v0.22.0
gonum.org/v1/gonum v0.15.1
modernc.org/sqlite v1.34.5
)
require (
github.com/vmihailenco/msgpack/v5 v5.4.1
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
modernc.org/sqlite v1.35.0
)
require (
@ -28,10 +26,10 @@ require (
github.com/bodgit/sevenzip v1.3.0 // indirect
github.com/bodgit/windows v1.0.0 // indirect
github.com/connesc/cipherio v0.2.1 // indirect
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
@ -45,15 +43,17 @@ require (
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/tetratelabs/wazero v1.8.2 // indirect
github.com/tetratelabs/wazero v1.9.0 // indirect
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
golang.org/x/sys v0.29.0 // indirect
modernc.org/libc v1.55.3 // indirect
modernc.org/mathutil v1.6.0 // indirect
modernc.org/memory v1.8.0 // indirect
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.36.5 // indirect
modernc.org/libc v1.61.13 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.8.2 // indirect
)
replace golang.org/x/text v0.17.0 => github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f

119
go.sum
View File

@ -15,8 +15,8 @@ cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6 h1:DqwlGXgaLjXVEio1+podh25e7q/phY02aTMsYkfryqQ=
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250113012632-72c18ebad3c6/go.mod h1:q+HjeXYjflX3nk3qt74Gho8z+6MGe5lZO/Po+kiUK7E=
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d h1:mFnVC/tEHk6woq6FBulwzGcuNdYn+zNhXNBILuetQJs=
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d/go.mod h1:UDwa7njhbB5nzxIjHbT9Mjlve9GYn3wzxAcQax1XRvE=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
@ -65,6 +65,10 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@ -73,6 +77,9 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
@ -111,8 +118,8 @@ github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBW
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM=
github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A=
github.com/ncruces/go-sqlite3 v0.22.0 h1:FkGSBhd0TY6e66k1LVhyEpA+RnG/8QkQNed5pjIk4cs=
github.com/ncruces/go-sqlite3 v0.22.0/go.mod h1:ueXOZXYZS2OFQirCU3mHneDwJm5fGKHrtccYBeGEV7M=
github.com/ncruces/go-sqlite3 v0.23.1 h1:zGAd76q+Tr18z/xKGatUlzBQdjR3J+rexfANUcjAgkY=
github.com/ncruces/go-sqlite3 v0.23.1/go.mod h1:Xg3FyAZl25HcBSFmcbymdfoTqD7jRnBUmv1jSrbIjdE=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
@ -131,20 +138,21 @@ github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tetratelabs/wazero v1.8.2 h1:yIgLR/b2bN31bjxwXHD8a3d+BogigR952csSDdLYEv4=
github.com/tetratelabs/wazero v1.8.2/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
@ -155,6 +163,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -163,13 +172,13 @@ golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa h1:t2QcU6V556bFjYgu4L6C+6VrCPyJZ+eyRsABUPs1mz4=
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@ -186,8 +195,9 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -202,7 +212,9 @@ golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -215,8 +227,9 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -233,17 +246,23 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@ -271,8 +290,9 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -292,6 +312,8 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
@ -312,7 +334,12 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
@ -324,28 +351,28 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ=
modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y=
modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s=
modernc.org/cc/v4 v4.24.4 h1:TFkx1s6dCkQpd6dKurBNmpo+G8Zl4Sq/ztJ+2+DEsh0=
modernc.org/cc/v4 v4.24.4/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
modernc.org/ccgo/v4 v4.23.16 h1:Z2N+kk38b7SfySC1ZkpGLN2vthNJP1+ZzGZIlH7uBxo=
modernc.org/ccgo/v4 v4.23.16/go.mod h1:nNma8goMTY7aQZQNTyN9AIoJfxav4nvTnvKThAeMDdo=
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
modernc.org/sqlite v1.34.5 h1:Bb6SR13/fjp15jt70CL4f18JIN7p7dnMExd+UFnF15g=
modernc.org/sqlite v1.34.5/go.mod h1:YLuNmX9NKs8wRNK2ko1LW1NGYcc9FkBO69JOt1AR9JE=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/gc/v2 v2.6.3 h1:aJVhcqAte49LF+mGveZ5KPlsp4tdGdAOT4sipJXADjw=
modernc.org/gc/v2 v2.6.3/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
modernc.org/libc v1.61.13 h1:3LRd6ZO1ezsFiX1y+bHd1ipyEHIJKvuprv0sLTBwLW8=
modernc.org/libc v1.61.13/go.mod h1:8F/uJWL/3nNil0Lgt1Dpz+GgkApWh04N3el3hxJcA6E=
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
modernc.org/memory v1.8.2 h1:cL9L4bcoAObu4NkxOlKWBWtNHIsnnACGF/TbqQ6sbcI=
modernc.org/memory v1.8.2/go.mod h1:ZbjSvMO5NQ1A2i3bWeDiVMxIorXwdClKE/0SZ+BMotU=
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
modernc.org/sqlite v1.35.0 h1:yQps4fegMnZFdphtzlfQTCNBWtS0CZv48pRpW3RFHRw=
modernc.org/sqlite v1.35.0/go.mod h1:9cr2sicr7jIaWTBKQmAxQLfBv9LL0su4ZTEV+utt3ic=
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=

View File

@ -39,7 +39,8 @@ const (
)
const (
ComicVine Source = "comicvine.gamespot.com"
ComicVine Source = "comicvine.gamespot.com"
SavedHashVersion int = 2
)
type Source string
@ -78,16 +79,9 @@ type Hash struct {
}
// IDList is a map of domain to ID eg IDs["comicvine.gamespot.com"] = []string{"1235"}
// Maps are extremely expensive in go for small maps this should only be used to return info to a user no internal code should use this
// Maps are extremely expensive in go for small maps this should only be used to return info to a user or as a map containing all IDs for a source
type IDList map[Source][]string
type OldSavedHashes map[Source]map[string][3]uint64
type SavedHashes struct {
IDs [][]ID
Hashes [3]map[uint64]int
}
func ToIDList(ids []ID) IDList {
idlist := IDList{}
for _, id := range ids {
@ -96,10 +90,10 @@ func ToIDList(ids []ID) IDList {
return idlist
}
func InsertID(ids []ID, id ID) []ID {
index, itemFound := slices.BinarySearchFunc(ids, id, func(e ID, t ID) int {
index, itemFound := slices.BinarySearchFunc(ids, id, func(existing ID, target ID) int {
return cmp.Or(
cmp.Compare(e.Domain, t.Domain),
cmp.Compare(e.ID, t.ID),
cmp.Compare(existing.Domain, target.Domain),
cmp.Compare(existing.ID, target.ID),
)
})
if itemFound {
@ -107,52 +101,6 @@ func InsertID(ids []ID, id ID) []ID {
}
return slices.Insert(ids, index, id)
}
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
for i, h := range s.Hashes {
if h == nil {
s.Hashes[i] = make(map[uint64]int)
}
}
hashType := int(hash.Kind) - 1
idx, hashFound := s.Hashes[hashType][hash.Hash]
if !hashFound {
idx = len(s.IDs)
s.IDs = append(s.IDs, make([]ID, 0, 3))
}
s.IDs[idx] = InsertID(s.IDs[idx], id)
s.Hashes[hashType][hash.Hash] = idx
}
func ConvertSavedHashes(oldHashes OldSavedHashes) SavedHashes {
t := SavedHashes{}
idcount := 0
for _, ids := range oldHashes {
idcount += len(ids)
}
t.IDs = make([][]ID, 0, idcount)
t.Hashes[0] = make(map[uint64]int, idcount)
t.Hashes[1] = make(map[uint64]int, idcount)
t.Hashes[2] = make(map[uint64]int, idcount)
for domain, sourceHashes := range oldHashes {
for id, hashes := range sourceHashes {
idx := len(t.IDs)
t.IDs = append(t.IDs, []ID{{domain, id}})
for hashType, hash := range hashes {
t.Hashes[hashType][hash] = idx
}
}
}
fmt.Println("Expected number of IDs", idcount)
idcount = 0
for _, ids := range t.IDs {
idcount += len(ids)
}
fmt.Println("length of hashes", len(t.Hashes[0])+len(t.Hashes[1])+len(t.Hashes[2]))
fmt.Println("Length of ID lists", len(t.IDs))
fmt.Println("Total number of IDs", idcount)
return t
}
type NewIDs struct {
OldID ID
@ -169,7 +117,7 @@ type HashStorage interface {
}
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
matchingHashes := make([]Match, 0, 20) // hope that we don't need all of them
for _, storedHash := range hashes {
distance := bits.OnesCount64(searchHash ^ storedHash)
if distance <= maxDistance {

206
map.go
View File

@ -1,150 +1,156 @@
package ch
import (
"cmp"
"fmt"
"slices"
"sync"
"gitea.narnian.us/lordwelch/goimagehash"
)
type MapStorage struct {
basicMapStorage
partialHash [3][8]map[uint8][]uint64
partialAHash [8]map[uint8][]uint64
partialDHash [8]map[uint8][]uint64
partialPHash [8]map[uint8][]uint64
}
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var foundMatches []Result
var (
foundMatches []Result
tl timeLog
)
m.hashMutex.RLock()
defer m.hashMutex.RUnlock()
resetTime()
defer logTime("Search Complete")
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
hashType := int(hash.Kind) - 1
index, hashFound := m.findHash(hashType, hash.Hash)
if hashFound {
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: hash,
IDs: ToIDList(*m.hashes[hashType][index].ids),
})
}
}
// If we have exact matches don't bother with other matches
logTime("Search Exact")
if len(foundMatches) > 0 && exactOnly {
return foundMatches, nil
}
if exactOnly {
return m.basicMapStorage.GetMatches(hashes, max, exactOnly)
}
tl.resetTime()
defer tl.logTime("Search Complete")
totalPartialHashes := 0
for _, searchHash := range hashes {
foundHashes := make(map[uint64]struct{})
hashType := int(searchHash.Kind) - 1
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
potentialMatches := []uint64{}
for i, partialHash := range SplitHash(searchHash.Hash) {
partialHashes := m.partialHash[hashType][i][partialHash]
totalPartialHashes += len(partialHashes)
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
_, alreadyMatched := foundHashes[match.Hash]
if index, hashFound := m.findHash(hashType, match.Hash); hashFound && !alreadyMatched {
foundHashes[match.Hash] = struct{}{}
foundMatches = append(foundMatches, Result{IDs: ToIDList(*m.hashes[hashType][index].ids), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
}
totalPartialHashes += len(potentialMatches)
mappedIds := map[*[]ID]bool{}
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := Hash{match.Hash, searchHash.Kind}
index, count := m.findHash(matchedHash)
if count < 1 {
continue
}
for _, storedHash := range currentHashes[index : index+count] {
ids := m.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*m.ids[storedHash.ID]),
})
}
}
}
fmt.Println("Total partial hashes tested:", totalPartialHashes)
go m.printSizes()
return foundMatches, nil
}
// getCurrentHashes must have a read lock before using
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]SavedHash, [8]map[uint8][]uint64) {
if kind == goimagehash.AHash {
return m.aHashes, m.partialAHash
}
if kind == goimagehash.DHash {
return m.dHashes, m.partialDHash
}
if kind == goimagehash.PHash {
return m.pHashes, m.partialPHash
}
panic("Unknown hash type: " + kind.String())
}
func (m *MapStorage) MapHashes(hash ImageHash) {
m.basicMapStorage.MapHashes(hash)
for _, hash := range hash.Hashes {
hashType := int(hash.Kind) - 1
_, partialHashes := m.getCurrentHashes(hash.Kind)
for i, partialHash := range SplitHash(hash.Hash) {
m.partialHash[hashType][i][partialHash] = Insert(m.partialHash[hashType][i][partialHash], hash.Hash)
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash)
}
}
}
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
for hashType, sourceHashes := range hashes.Hashes {
m.hashes[hashType] = make([]structHash, len(sourceHashes))
for savedHash, idlistLocation := range sourceHashes {
m.hashes[hashType] = append(m.hashes[hashType], structHash{savedHash, &hashes.IDs[idlistLocation]})
}
if err := m.basicMapStorage.DecodeHashes(hashes); err != nil {
return err
}
for hashType := range m.hashes {
slices.SortFunc(m.hashes[hashType], func(a, b structHash) int {
return cmp.Compare(a.hash, b.hash)
})
}
m.printSizes()
for _, partialHashes := range m.partialHash {
for _, partMap := range partialHashes {
for part, hashes := range partMap {
slices.Sort(hashes)
partMap[part] = slices.Compact(hashes)
}
}
}
m.printSizes()
mapPartialHashes(m.aHashes, m.partialAHash)
mapPartialHashes(m.dHashes, m.partialDHash)
mapPartialHashes(m.pHashes, m.partialPHash)
compactPartialHashes(m.partialAHash)
compactPartialHashes(m.partialDHash)
compactPartialHashes(m.partialPHash)
return nil
}
func (m *MapStorage) printSizes() {
fmt.Println("Length of hashes:", len(m.hashes[0])+len(m.hashes[1])+len(m.hashes[2]))
// fmt.Println("Size of", "hashes:", size.Of(m.hashes)/1024/1024, "MB")
// fmt.Println("Size of", "ids:", size.Of(m.ids)/1024/1024, "MB")
// fmt.Println("Size of", "MapStorage:", size.Of(m)/1024/1024, "MB")
}
func NewMapStorage() (HashStorage, error) {
storage := &MapStorage{
basicMapStorage: basicMapStorage{
hashMutex: &sync.RWMutex{},
hashes: [3][]structHash{
[]structHash{},
[]structHash{},
[]structHash{},
},
},
partialHash: [3][8]map[uint8][]uint64{
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
{
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
make(map[uint8][]uint64),
},
ids: make(map[ID]*[]ID),
aHashes: []SavedHash{},
dHashes: []SavedHash{},
pHashes: []SavedHash{},
},
partialAHash: newPartialHash(),
partialDHash: newPartialHash(),
partialPHash: newPartialHash(),
}
return storage, nil
}
func newPartialHash() [8]map[uint8][]uint64 {
return [8]map[uint8][]uint64{
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
map[uint8][]uint64{},
}
}
func mapPartialHashes(hashes []SavedHash, partialHashMap [8]map[uint8][]uint64) {
for _, savedHash := range hashes {
for i, partialHash := range SplitHash(savedHash.Hash.Hash) {
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
}
}
}
func compactPartialHashes(partialHashMap [8]map[uint8][]uint64) {
for _, partMap := range partialHashMap {
for part, hashes := range partMap {
slices.Sort(hashes)
partMap[part] = slices.Compact(hashes)
}
}
}

254
savedHashes.go Normal file
View File

@ -0,0 +1,254 @@
package ch
import (
"cmp"
"encoding/json"
"errors"
"fmt"
"slices"
"strings"
"gitea.narnian.us/lordwelch/goimagehash"
"github.com/vmihailenco/msgpack"
)
type Format int
const (
Msgpack Format = iota + 1
JSON
CurrentSavedHashesVersion int = 2
)
var versionMap map[int]versionDecoder
var formatNames = map[Format]string{
JSON: "json",
Msgpack: "msgpack",
}
var formatValues = map[string]Format{
"json": JSON,
"msgpack": Msgpack,
}
type OldSavedHashes map[Source]map[string][3]uint64
type SavedHashesv1 struct {
IDs [][]ID
Hashes [3]map[uint64]int
}
// SavedHashes The IDs and Hashes fields have no direct correlation
// It is perfectly valid to have an empty IDs or an empty Hashes field
// If two covers have identical hashes then they should be two entries in Hashes not a set in IDs with two IDs from the same source
type SavedHashes struct {
Version int
IDs [][]ID // List of sets of IDs that are the same across Sources, should generally only have one Source per set
Hashes []SavedHash // List of all known hashes, hashes will be duplicated for each source
}
type SavedHash struct {
Hash Hash
ID ID
}
type Encoder func(any) ([]byte, error)
type Decoder func([]byte, interface{}) error
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
var NoHashes = errors.New("no hashes")
var DecodeError = errors.New("decoder failure")
func (f Format) String() string {
if name, known := formatNames[f]; known {
return name
}
return "Unknown"
}
func (f *Format) Set(s string) error {
if format, known := formatValues[strings.ToLower(s)]; known {
*f = format
} else {
return fmt.Errorf("Unknown format: %d", f)
}
return nil
}
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
h := SavedHash{
hash,
id,
}
index, itemFound := slices.BinarySearchFunc(s.Hashes, h, func(existing SavedHash, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
})
if !itemFound {
s.Hashes = slices.Insert(s.Hashes, index, h)
}
}
func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
t := SavedHashes{}
idcount := 0
for _, ids := range oldHashes {
idcount += len(ids)
}
t.IDs = make([][]ID, 0, idcount)
t.Hashes = make([]SavedHash, 0, idcount)
for domain, sourceHashes := range oldHashes {
for id, hashes := range sourceHashes {
t.IDs = append(t.IDs, []ID{{domain, id}})
for hashType, hash := range hashes {
t.Hashes = append(t.Hashes, SavedHash{
Hash: Hash{
Kind: goimagehash.Kind(hashType + 1),
Hash: hash,
},
ID: ID{domain, id},
})
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
t := SavedHashes{}
hashCount := 0
for _, hashes := range oldHashes.Hashes {
hashCount += len(hashes)
}
t.IDs = oldHashes.IDs
t.Hashes = make([]SavedHash, 0, hashCount)
for hashType, sourceHashes := range oldHashes.Hashes {
for hash, index := range sourceHashes {
for _, id := range oldHashes.IDs[index] {
t.Hashes = append(t.Hashes, SavedHash{
ID: id,
Hash: Hash{
Kind: goimagehash.Kind(hashType + 1),
Hash: hash,
},
})
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
loadedHashes := OldSavedHashes{}
err := decode(hashes, &loadedHashes)
if err != nil {
return nil, fmt.Errorf("%w: %w", DecodeError, err)
}
if len(loadedHashes) == 0 {
return nil, NoHashes
}
return ConvertHashesV0(loadedHashes), nil
}
func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
loadedHashes := SavedHashesv1{}
err := decode(hashes, &loadedHashes)
if err != nil {
return nil, fmt.Errorf("%w: %w", DecodeError, err)
}
hashesCount := 0
for _, hashes := range loadedHashes.Hashes {
hashesCount += len(hashes)
}
if hashesCount < 1 {
return nil, NoHashes
}
return ConvertHashesV1(loadedHashes), nil
}
func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
loadedHashes := SavedHashes{}
err := decode(hashes, &loadedHashes)
if err != nil {
return nil, fmt.Errorf("%w: %w", DecodeError, err)
}
if len(loadedHashes.Hashes) < 1 && len(loadedHashes.IDs) < 1 {
return nil, NoHashes
}
return &loadedHashes, nil
}
func getSavedHashesVersion(decode Decoder, hashes []byte) (int, error) {
type version struct {
Version int
}
var savedVersion version
err := decode(hashes, &savedVersion)
if err != nil {
return -1, fmt.Errorf("%w: %w", DecodeError, err)
}
if savedVersion.Version > 1 {
return savedVersion.Version, nil
}
return -1, nil
}
func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
var decode Decoder
switch format {
case Msgpack:
decode = msgpack.Unmarshal
case JSON:
decode = json.Unmarshal
default:
return nil, fmt.Errorf("Unknown format: %v", format)
}
version, err := getSavedHashesVersion(decode, hashes)
if err != nil {
return nil, err
}
if decodeVersion, knownVersion := versionMap[version]; knownVersion {
return decodeVersion(decode, hashes)
}
for _, decodeVersion := range []versionDecoder{
DecodeHashesV0,
DecodeHashesV1,
DecodeHashesV2,
} {
loadedHashes, err := decodeVersion(decode, hashes)
if err == nil {
return loadedHashes, nil
}
if !errors.Is(err, NoHashes) {
return nil, err
}
}
return nil, NoHashes
}
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
var encoder Encoder
switch format {
case Msgpack:
encoder = msgpack.Marshal
case JSON:
encoder = json.Marshal
default:
return nil, fmt.Errorf("Unknown format: %v", format)
}
hashes.Version = CurrentSavedHashesVersion
return encoder(hashes)
}

View File

@ -8,7 +8,6 @@ import (
"log"
"math/bits"
"strings"
"time"
"gitea.narnian.us/lordwelch/goimagehash"
_ "modernc.org/sqlite"
@ -66,7 +65,7 @@ func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, items ...interface{
return hashes, nil
}
func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
func (s *sqliteStorage) findPartialHashes(tl timeLog, max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
hashes := []sqliteHash{}
statement, err := s.db.PrepareContext(context.Background(), `SELECT rowid,hash,kind FROM Hashes WHERE (kind=?) AND (((hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF)));`)
if err != nil {
@ -94,7 +93,7 @@ func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goima
}
}
rows.Close()
logTime("Filter partial " + kind.String())
tl.logTime("Filter partial " + kind.String())
statement, err = s.db.PrepareContext(context.Background(), `SELECT DISTINCT IDS.domain, IDs.id, id_hash.hashid FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid in (`+strings.TrimRight(strings.Repeat("?,", len(hashes)), ",")+`)) ORDER BY IDs.domain, IDs.ID;`)
if err != nil {
@ -171,35 +170,18 @@ ANALYZE;
return nil
}
var (
total time.Duration
t = time.Now()
)
func resetTime() {
total = 0
t = time.Now()
}
func logTime(log string) {
n := time.Now()
s := n.Sub(t)
t = n
total += s
fmt.Printf("total: %v, %s: %v\n", total, log, s)
}
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var (
foundMatches []Result
tl timeLog
)
resetTime()
tl.resetTime()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
statement, err := s.db.Prepare(`SELECT rowid,hash,kind FROM Hashes WHERE ` + strings.TrimSuffix(strings.Repeat("(hash=? AND kind=?) OR", len(hashes)), "OR") + `ORDER BY kind,hash;`)
if err != nil {
logTime("Fail exact")
tl.logTime("Fail exact")
return foundMatches, err
}
@ -221,17 +203,17 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if len(foundMatches) > 0 && exactOnly {
return foundMatches, nil
}
logTime("Search Exact")
tl.logTime("Search Exact")
}
foundHashes := make(map[uint64]struct{})
for _, hash := range hashes {
hashes, err := s.findPartialHashes(max, int64(hash.Hash), hash.Kind)
hashes, err := s.findPartialHashes(tl, max, int64(hash.Hash), hash.Kind)
if err != nil {
return foundMatches, err
}
logTime("Search partial " + hash.Kind.String())
tl.logTime("Search partial " + hash.Kind.String())
for _, hash := range hashes {
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
@ -251,27 +233,26 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
if err != nil {
panic(err)
}
insertHashes, err := tx.Prepare(`
INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO UPDATE SET hash=?1 RETURNING hashid
`)
insertHashes, err := tx.Prepare(`INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO UPDATE SET hash=?1 RETURNING hashid`)
if err != nil {
panic(err)
}
rows, err := tx.Query(`
INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RETURNING idid
`, hash.ID.Domain, hash.ID.ID)
rows, err := tx.Query(`INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RETURNING idid`, hash.ID.Domain, hash.ID.ID)
if err != nil {
panic(err)
}
if !rows.Next() {
panic("Unable to insert IDs")
panic("Unable to insert ID")
}
var id_id int64
err = rows.Scan(&id_id)
if err != nil {
panic(err)
}
rows.Close()
hash_ids := []int64{}
for _, hash := range hash.Hashes {
rows, err := insertHashes.Query(int64(hash.Hash), hash.Kind)
@ -280,21 +261,24 @@ INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RET
}
if !rows.Next() {
panic("Unable to insert IDs")
panic("Unable to insert Hash")
}
var id int64
err = rows.Scan(&id)
rows.Close()
if err != nil {
panic(err)
}
hash_ids = append(hash_ids, id)
}
var ids []any
var ids []any = make([]any, 0, len(hash_ids)+1)
ids = append(ids, id_id)
for _, hash_id := range hash_ids {
ids = append(ids, hash_id, id_id)
ids = append(ids, hash_id)
}
_, err = tx.Exec(`INSERT INTO id_hash (hashid,idid) VALUES `+strings.TrimSuffix(strings.Repeat("(?, ?),", len(hash_ids)), ",")+` ON CONFLICT DO NOTHING;`, ids...)
_, err = tx.Exec(`INSERT INTO id_hash (idid, hashid) VALUES `+strings.TrimSuffix(strings.Repeat("(?1, ?),", len(hash_ids)), ",")+` ON CONFLICT DO NOTHING;`, ids...)
if err != nil {
panic(fmt.Errorf("Failed inserting: %v,%v: %w", hash.ID.Domain, hash.ID.ID, err))
}
@ -311,16 +295,11 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
return err
}
for hashType, sourceHashes := range hashes.Hashes {
hashKind := goimagehash.Kind(hashType + 1)
for hash, idsLocations := range sourceHashes {
for _, id := range hashes.IDs[idsLocations] {
s.MapHashes(ImageHash{
Hashes: []Hash{{hash, hashKind}},
ID: id,
})
}
}
for _, savedHash := range hashes.Hashes {
s.MapHashes(ImageHash{
Hashes: []Hash{savedHash.Hash},
ID: savedHash.ID,
})
}
err = s.createIndexes()
if err != nil {
@ -434,28 +413,27 @@ func NewSqliteStorage(db, path string) (HashStorage, error) {
_, err = sqlite.db.Exec(`
PRAGMA foreign_keys=ON;
CREATE TABLE IF NOT EXISTS Hashes(
hashid INTEGER PRIMARY KEY,
hash INT NOT NULL,
kind int NOT NULL,
hashid INTEGER PRIMARY KEY,
hash INTEGER NOT NULL,
kind INTEGER NOT NULL,
id INTEGER NOT NULL,
FOREIGN KEY(id) REFERENCES IDs(idid),
UNIQUE(kind, hash)
);
CREATE TABLE IF NOT EXISTS IDs(
id TEXT NOT NULL,
domain TEXT NOT NULL,
idid INTEGER PRIMARY KEY,
idid INTEGER PRIMARY KEY,
UNIQUE (domain, id)
);
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
CREATE TABLE IF NOT EXISTS id_hash(
hashid INTEGER,
idid INTEGER,
FOREIGN KEY(hashid) REFERENCES Hashes(hashid),
FOREIGN KEY(idid) REFERENCES IDs(idid)
UNIQUE (hashid, idid)
CREATE TABLE IF NOT EXISTS EquivalentIDs(
id INTEGER
groupid INTEGER,
FOREIGN KEY(idid) REFERENCES IDs(idid)
UNIQUE (groupid, id)
);
`)
if err != nil {
panic(err)

24
timing.go Normal file
View File

@ -0,0 +1,24 @@
package ch
import (
"fmt"
"time"
)
type timeLog struct {
total time.Duration
last time.Time
}
func (t *timeLog) resetTime() {
t.total = 0
t.last = time.Now()
}
func (t *timeLog) logTime(log string) {
now := time.Now()
diff := now.Sub(t.last)
t.last = now
t.total += diff
fmt.Printf("total: %v, %s: %v\n", t.total, log, diff)
}

View File

@ -10,12 +10,17 @@ import (
)
type VPTree struct {
trees [3]*vptree.Tree
hashes [3][]vptree.Comparable
aTree *vptree.Tree
dTree *vptree.Tree
pTree *vptree.Tree
ids map[ID]*[]ID
aHashes []vptree.Comparable // temporary, only used for vptree creation
dHashes []vptree.Comparable // temporary, only used for vptree creation
pHashes []vptree.Comparable // temporary, only used for vptree creation
}
type VPHash struct {
Hash Hash
IDs []ID
SavedHash
}
func (h *VPHash) Distance(c vptree.Comparable) float64 {
@ -27,57 +32,108 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
}
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var matches []Result
var exactMatches []Result
fmt.Println(hashes)
var (
matches []Result
exactMatches []Result
tl timeLog
)
tl.resetTime()
defer tl.logTime("Search Complete")
for _, hash := range hashes {
results := vptree.NewDistKeeper(float64(max))
hashType := int(hash.Kind) - 1
v.trees[hashType].NearestSet(results, &VPHash{Hash: hash})
currentTree := v.getCurrentTree(hash.Kind)
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}})
mappedIds := map[*[]ID]bool{}
for _, result := range results.Heap {
vphash := result.Comparable.(*VPHash)
storedHash := result.Comparable.(*VPHash)
ids := v.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
if result.Dist == 0 {
exactMatches = append(exactMatches, Result{
IDs: ToIDList(vphash.IDs),
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: vphash.Hash,
Hash: storedHash.Hash,
})
} else {
matches = append(matches, Result{
IDs: ToIDList(vphash.IDs),
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: vphash.Hash,
Hash: storedHash.Hash,
})
}
}
}
if len(exactMatches) > 0 && exactOnly {
if exactOnly {
return exactMatches, nil
}
matches = append(exactMatches[:len(exactMatches):len(exactMatches)], matches...)
exactMatches = append(exactMatches, matches...)
return matches, nil
}
func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
if kind == goimagehash.AHash {
return v.aTree
}
if kind == goimagehash.DHash {
return v.dTree
}
if kind == goimagehash.PHash {
return v.pTree
}
panic("Unknown hash type: " + kind.String())
}
func (v *VPTree) MapHashes(ImageHash) {
panic("Not Implemented")
}
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
var err error
for hashType, sourceHashes := range hashes.Hashes {
for hash, idsLocation := range sourceHashes {
var (
hashKind = goimagehash.Kind(hashType + 1)
)
hash := &VPHash{Hash{hash, hashKind}, hashes.IDs[idsLocation]}
v.hashes[hashType] = append(v.hashes[hashType], hash)
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
for _, id := range ids {
v.ids[id] = &ids
}
}
for hashType := range 3 {
v.trees[hashType], err = vptree.New(v.hashes[hashType], 3, nil)
if err != nil {
return err
var err error
for _, savedHash := range hashes.Hashes {
if savedHash.Hash.Kind == goimagehash.AHash {
v.aHashes = append(v.aHashes, &VPHash{savedHash})
}
if savedHash.Hash.Kind == goimagehash.DHash {
v.dHashes = append(v.dHashes, &VPHash{savedHash})
}
if savedHash.Hash.Kind == goimagehash.PHash {
v.pHashes = append(v.pHashes, &VPHash{savedHash})
}
if savedHash.ID == (ID{}) {
fmt.Println("Empty ID detected")
panic(savedHash)
}
// All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := v.ids[savedHash.ID]; !ok {
v.ids[savedHash.ID] = &[]ID{savedHash.ID}
}
}
v.aTree, err = vptree.New(v.aHashes, 3, nil)
if err != nil {
return err
}
v.dTree, err = vptree.New(v.dHashes, 3, nil)
if err != nil {
return err
}
v.pTree, err = vptree.New(v.pHashes, 3, nil)
if err != nil {
return err
}
return nil
}
@ -90,16 +146,31 @@ func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
}
func (v *VPTree) GetIDs(id ID) IDList {
return nil
ids, found := v.ids[id]
if !found {
return nil
}
return ToIDList(*ids)
}
func NewVPStorage() (HashStorage, error) {
return &VPTree{
hashes: [3][]vptree.Comparable{
make([]vptree.Comparable, 0, 1_000_000),
make([]vptree.Comparable, 0, 1_000_000),
make([]vptree.Comparable, 0, 1_000_000),
},
}, nil
var err error
v := &VPTree{
aHashes: []vptree.Comparable{},
dHashes: []vptree.Comparable{},
pHashes: []vptree.Comparable{},
}
v.aTree, err = vptree.New(v.aHashes, 3, nil)
if err != nil {
return v, err
}
v.dTree, err = vptree.New(v.dHashes, 3, nil)
if err != nil {
return v, err
}
v.pTree, err = vptree.New(v.pHashes, 3, nil)
if err != nil {
return v, err
}
return v, nil
}