Compare commits

..

12 Commits

Author SHA1 Message Date
22d59aa221 Move HashStorage to its own package 2025-05-31 19:00:40 -07:00
ed0b5ba441 Don't include sqlite/vptree in gokrazy build 2025-05-31 18:35:18 -07:00
d240bd953b Remove references to loaded json object 2025-05-31 17:59:19 -07:00
bb64d8449c Improve memory efficiency 2025-05-31 15:00:05 -07:00
b0a4c2939c Map to slice 2025-05-27 22:31:11 -07:00
3e364b1858 Filter out non-json files 2025-04-24 12:54:40 -07:00
0da76f7fb5 Fix inserting downloaded pages 2025-04-24 12:53:55 -07:00
d7c42f5c1d Fix issues with loading saved hashes 2025-04-24 12:46:04 -07:00
374c46bc48 Fix url handling order 2025-04-24 12:46:04 -07:00
aca658e32d Typo 2025-04-24 12:46:04 -07:00
acd71df302 Add the issue number to the downloaded CV pages
Update existing pages 10 at a time if they are missing data
Simplify checking for previously downloaded pages
2025-04-24 12:46:04 -07:00
0fd431f6f7 Update result type 2025-04-19 15:41:04 -07:00
17 changed files with 1381 additions and 1123 deletions

View File

@ -1,270 +0,0 @@
package ch
import (
"cmp"
"errors"
"fmt"
"math/bits"
"slices"
"sync"
"gitea.narnian.us/lordwelch/goimagehash"
)
type basicMapStorage struct {
hashMutex *sync.RWMutex
ids map[ID]*[]ID
aHashes []SavedHash
dHashes []SavedHash
pHashes []SavedHash
}
var ErrIDNotFound = errors.New("ID not found on this server")
// atleast must have a read lock before using
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
matchingHashes := make([]Result, 0, 20) // hope that we don't need more
mappedIds := map[*[]ID]bool{}
for _, storedHash := range *b.getCurrentHashes(kind) {
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
if distance <= maxDistance {
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
}
}
return matchingHashes
}
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
var foundMatches []Result
for _, hash := range hashes {
mappedIds := map[*[]ID]bool{}
index, count := b.findHash(hash)
if count > 0 {
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*b.ids[storedHash.ID]),
})
}
}
}
return foundMatches
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var (
foundMatches []Result
tl timeLog
)
tl.resetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = b.exactMatches(hashes, max)
tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
foundHashes := make(map[uint64]struct{})
totalPartialHashes := 0
for _, hash := range hashes {
foundMatches = append(foundMatches, b.atleast(hash.Kind, max, hash.Hash)...)
}
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
return foundMatches, nil
}
// getCurrentHashes must have a read lock before using
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]SavedHash {
if kind == goimagehash.AHash {
return &b.aHashes
}
if kind == goimagehash.DHash {
return &b.dHashes
}
if kind == goimagehash.PHash {
return &b.pHashes
}
panic("Unknown hash type: " + kind.String())
}
// findHash must have a read lock before using
// return value is index, count
// if count < 1 then no results were found
func (b *basicMapStorage) findHash(hash Hash) (int, int) {
currentHashes := *b.getCurrentHashes(hash.Kind)
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing SavedHash, target Hash) int {
return cmp.Compare(existing.Hash.Hash, target.Hash)
})
if !found {
return index, 0
}
count := 0
for i := index + 1; i < len(currentHashes) && currentHashes[i].Hash.Hash == hash.Hash; i++ {
count++
}
return index, count
}
// insertHash must already have a lock
func (b *basicMapStorage) insertHash(hash Hash, id ID) {
currentHashes := b.getCurrentHashes(hash.Kind)
index, count := b.findHash(hash)
max := index + count
for ; index < max; index++ {
if (*currentHashes)[index].ID == id {
return
}
}
*currentHashes = slices.Insert(*currentHashes, index, SavedHash{hash, id})
if _, mapped := b.ids[id]; !mapped {
b.ids[id] = &[]ID{id}
}
}
func (b *basicMapStorage) MapHashes(hash ImageHash) {
b.hashMutex.Lock()
defer b.hashMutex.Unlock()
for _, ih := range hash.Hashes {
b.insertHash(ih, hash.ID)
}
}
// DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
for _, id := range ids {
b.ids[id] = &ids
}
}
slices.SortFunc(hashes.Hashes, func(existing, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
})
// Assume they are probably fairly equally split between hash types
b.aHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
b.dHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
b.pHashes = make([]SavedHash, 0, len(hashes.Hashes)/3)
for _, savedHash := range hashes.Hashes {
if savedHash.Hash.Kind == goimagehash.AHash {
b.aHashes = append(b.aHashes, savedHash)
}
if savedHash.Hash.Kind == goimagehash.DHash {
b.dHashes = append(b.dHashes, savedHash)
}
if savedHash.Hash.Kind == goimagehash.PHash {
b.pHashes = append(b.pHashes, savedHash)
}
if savedHash.ID == (ID{}) {
fmt.Println("Empty ID detected")
panic(savedHash)
}
// All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := b.ids[savedHash.ID]; !ok {
b.ids[savedHash.ID] = &[]ID{savedHash.ID}
}
}
hashCmp := func(existing, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
}
slices.SortFunc(b.aHashes, hashCmp)
slices.SortFunc(b.dHashes, hashCmp)
slices.SortFunc(b.pHashes, hashCmp)
return nil
}
// EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
savedHashes := SavedHashes{
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
}
savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
savedHashes.Hashes = append(savedHashes.Hashes, b.pHashes...)
// Only keep groups len>1 as they are mapped in SavedHashes.Hashes
for _, ids := range b.ids {
if len(*ids) > 1 {
savedHashes.IDs = append(savedHashes.IDs, *ids)
}
}
return savedHashes, nil
}
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
for _, newid := range newids {
b.hashMutex.RLock()
ids, found := b.ids[newid.OldID]
b.hashMutex.RUnlock()
if !found {
return ErrIDNotFound
}
b.hashMutex.Lock()
*ids = InsertID(*ids, newid.NewID)
b.hashMutex.Unlock()
}
return nil
}
func (b *basicMapStorage) GetIDs(id ID) IDList {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
ids, found := b.ids[id]
if !found {
return nil
}
return ToIDList(*ids)
}
func NewBasicMapStorage() (HashStorage, error) {
storage := &basicMapStorage{
hashMutex: &sync.RWMutex{},
ids: make(map[ID]*[]ID),
aHashes: []SavedHash{},
dHashes: []SavedHash{},
pHashes: []SavedHash{},
}
return storage, nil
}

View File

@ -3,10 +3,8 @@ package main
import (
"bufio"
"bytes"
"cmp"
"compress/gzip"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
@ -15,17 +13,14 @@ import (
_ "image/jpeg"
_ "image/png"
"io"
"io/fs"
"log"
"net/http"
_ "net/http/pprof"
"net/url"
"os"
"path/filepath"
"runtime"
"runtime/debug"
"runtime/pprof"
"slices"
"strconv"
"strings"
"sync"
"time"
@ -40,24 +35,9 @@ import (
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/comic-hasher/cv"
"gitea.narnian.us/lordwelch/goimagehash"
"gitea.narnian.us/lordwelch/comic-hasher/storage"
)
type Server struct {
httpServer *http.Server
mux *CHMux
BaseURL *url.URL
hashes ch.HashStorage
Context context.Context
cancel func()
signalQueue chan os.Signal
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash
onlyHashNewIDs bool
version string
}
var bufPool = &sync.Pool{
New: func() any {
// The Pool's New function should generally only return pointer
@ -120,6 +100,7 @@ type CVOpts struct {
}
type Opts struct {
cpuprofile string
memprofile string
coverPath string
sqlitePath string
loadEmbeddedHashes bool
@ -160,6 +141,7 @@ func main() {
wd = filepath.Join(wd, "comic-hasher")
}
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
flag.StringVar(&opts.memprofile, "memprofile", "", "Write mem profile to file after loading hashes")
flag.StringVar(&opts.addr, "listen", ":8080", "Address to listen on")
flag.StringVar(&opts.debugPort, "debug-port", "", "Port to listen to for debug info")
@ -219,411 +201,6 @@ func main() {
startServer(opts)
}
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
return strings.TrimSpace("lordwelch"), true
}
func (s *Server) setupAppHandlers() {
// s.mux.HandleFunc("/get_cover", s.getCover)
s.mux.HandleFunc("/add_cover", s.addCover)
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
s.mux.HandleFunc("/associate_ids", s.associateIDs)
}
func (s *Server) getCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
http.Error(w, "No ID Provided", http.StatusBadRequest)
return
}
if domain == "" {
log.Println("No domain Provided")
http.Error(w, "No domain Provided", http.StatusBadRequest)
return
}
// if index, ok := s.IDToCover[domain+":"+ID]; ok {
// covers, err := json.Marshal(s.covers[index])
// if err == nil {
// w.Header().Add("Content-Type", "application/json")
// w.Write(covers)
// return
// }
// }
fmt.Fprintln(w, "Not implemented")
}
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
newDomain = strings.TrimSpace(values.Get("newDomain"))
newID = strings.TrimSpace(values.Get("newID"))
)
if ID == "" {
msg := "No ID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if domain == "" {
msg := "No domain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newID == "" {
msg := "No newID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == "" {
msg := "No newDomain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == domain {
msg := "newDomain cannot be the same as the existing domain"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
// msg := "No IDs belonging to " + domain + "exist on this server"
// log.Println(msg)
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
// }
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
found := false
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
// for i, idlist := range hash {
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
// found = true
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
// }
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
// }
// }
// }
if found {
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
} else {
writeJson(w, http.StatusOK, result{Msg: "Old ID not found"})
}
}
type SimpleResult struct {
Distance int
IDList ch.IDList
}
func getSimpleResults(fullResults []ch.Result) []SimpleResult {
simpleResult := make([]SimpleResult, 0, len(fullResults))
slices.SortFunc(fullResults, func(a, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
})
// Deduplicate IDs
idToDistance := make(map[ch.ID]int)
for _, fullResult := range fullResults {
for domain, idlist := range fullResult.IDs {
for _, idStr := range idlist {
id := ch.ID{
Domain: domain,
ID: idStr,
}
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
idToDistance[id] = fullResult.Distance
}
}
}
}
// Group by distance
distanceMap := make(map[int]SimpleResult)
for id, distance := range idToDistance {
var (
sr SimpleResult
ok bool
)
if sr, ok = distanceMap[distance]; !ok {
sr.IDList = make(ch.IDList)
}
sr.Distance = distance
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
distanceMap[distance] = sr
}
// turn into array
for _, sr := range distanceMap {
simpleResult = append(simpleResult, sr)
}
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
return cmp.Compare(a.Distance, b.Distance)
})
return simpleResult
}
type result struct {
Results any `json:"results,omitempty"`
Msg string `json:"msg,omitempty"`
}
func writeJson(w http.ResponseWriter, status int, res result) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("X-Content-Type-Options", "nosniff")
var (
bytes []byte
err error
)
if bytes, err = json.Marshal(res); err != nil {
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
}
w.WriteHeader(status)
_, _ = w.Write(bytes)
_, _ = w.Write([]byte("\n"))
}
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
hashes []ch.Hash
)
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
log.Printf("could not parse ahash: %s", ahashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if ahash > 0 {
hashes = append(hashes, ch.Hash{ahash, goimagehash.AHash})
}
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
log.Printf("could not parse dhash: %s", dhashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if dhash > 0 {
hashes = append(hashes, ch.Hash{dhash, goimagehash.DHash})
}
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
log.Printf("could not parse phash: %s", phashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if phash > 0 {
hashes = append(hashes, ch.Hash{phash, goimagehash.PHash})
}
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
log.Printf("Invalid Max: %s", maxStr)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
return
}
if maxStr != "" {
max = max_tmp
}
if max > 8 {
log.Printf("Max must be less than 9: %d", max)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
return
}
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
})
log.Println(err)
if len(matches) > 0 {
var msg string = ""
if err != nil {
msg = err.Error()
}
if simple {
writeJson(w, http.StatusOK, result{
Results: getSimpleResults(matches),
Msg: msg,
})
return
}
writeJson(w, http.StatusOK, result{
Results: matches,
Msg: msg,
})
return
}
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
}
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
w.WriteHeader(http.StatusNotImplemented)
return
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
return
}
if domain == "" {
log.Println("No domain Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
return
}
i, format, err := image.Decode(r.Body)
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Decoded %s image from %s", format, user)
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
default:
}
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}}
writeJson(w, http.StatusOK, result{Msg: "Success"})
}
func (s *Server) mapper(done func()) {
defer done()
for hash := range s.mappingQueue {
s.hashes.MapHashes(hash)
}
}
func (s *Server) hasher(workerID int, done func(int)) {
defer done(workerID)
for image := range s.hashingQueue {
start := time.Now()
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
log.Printf("Skipping existing hash with ID: %s found", image.ID)
continue
}
hash := ch.HashImage(image)
if hash.ID.Domain == "" || hash.ID.ID == "" {
continue
}
select {
// TODO: Check channel pipelines
case s.mappingQueue <- hash:
default:
}
elapsed := time.Since(start)
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
}
}
func (s *Server) reader(workerID int, done func(i int)) {
defer done(workerID)
for path := range s.readerQueue {
id := ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))}
if len(s.hashes.GetIDs(id)) > 0 {
continue
}
file, err := os.Open(path)
if err != nil {
panic(err)
}
i, format, err := image.Decode(bufio.NewReader(file))
file.Close()
if err != nil {
continue // skip this image
}
im := ch.Im{
Im: i,
Format: format,
ID: id,
NewOnly: s.onlyHashNewIDs,
}
select {
case s.hashingQueue <- im:
default:
}
}
}
func (s *Server) HashLocalImages(opts Opts) {
if opts.coverPath == "" {
return
}
go func() {
log.Println("Hashing covers at ", opts.coverPath)
start := time.Now()
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
err = s.httpServer.Shutdown(context.TODO())
return fmt.Errorf("Recieved quit: %w", err)
default:
}
if d.IsDir() {
return nil
}
s.readerQueue <- path
return nil
})
elapsed := time.Since(start)
log.Println("Err:", err, "local hashing took", elapsed)
}()
}
func signalHandler(s *Server) {
select {
case sig := <-s.signalQueue:
@ -639,15 +216,15 @@ func signalHandler(s *Server) {
func initializeStorage(opts Opts) (ch.HashStorage, error) {
switch opts.storageType {
case Map:
return ch.NewMapStorage()
return storage.NewMapStorage()
case BasicMap:
return ch.NewBasicMapStorage()
return storage.NewBasicMapStorage()
case Sqlite:
return ch.NewSqliteStorage("sqlite", opts.sqlitePath)
return storage.NewSqliteStorage("sqlite", opts.sqlitePath)
case Sqlite3:
return ch.NewSqliteStorage("sqlite3", opts.sqlitePath)
return storage.NewSqliteStorage("sqlite3", opts.sqlitePath)
case VPTree:
return ch.NewVPStorage()
return storage.NewVPStorage()
}
return nil, errors.New("Unknown storage type provided")
}
@ -662,17 +239,19 @@ func loadHashes(opts Opts) *ch.SavedHashes {
if err != nil {
panic(fmt.Sprintf("Failed to read embedded hashes: %s", err))
}
gr.Close()
}
} else {
fmt.Println("Loading saved hashes")
if f, err := os.Open(opts.hashesPath); err == nil {
var buf io.Reader = f
if gr, err := gzip.NewReader(buf); err == nil {
buf = bufio.NewReader(gr)
var r io.ReadCloser = f
if gr, err := gzip.NewReader(f); err == nil {
r = gr
} else {
_, _ = f.Seek(0, io.SeekStart)
}
hashes, err = io.ReadAll(buf)
hashes, err = io.ReadAll(r)
r.Close()
f.Close()
if err != nil {
panic(fmt.Sprintf("Failed to load hashes from disk: %s", err))
@ -698,13 +277,17 @@ func loadHashes(opts Opts) *ch.SavedHashes {
}
break
}
if errors.Is(err, ch.NoHashes) {
log.Println("No saved hashes to load", loadedHashes, err)
return loadedHashes
}
if err != nil {
panic(fmt.Sprintf("Failed to decode hashes: %s", err))
}
fmt.Printf("Loaded %s hashes\n", format)
return loadedHashes
}
func saveHashes(opts Opts, hashes ch.SavedHashes) error {
func saveHashes(opts Opts, hashes *ch.SavedHashes) error {
if opts.loadEmbeddedHashes && !opts.saveEmbeddedHashes {
return errors.New("refusing to save embedded hashes")
}
@ -741,7 +324,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
close(server.hashingQueue)
}()
for path := range imagePaths {
id := ch.ID{Domain: ch.ComicVine, ID: path.IssueID}
id := ch.ID{Domain: ch.NewSource(ch.ComicVine), ID: path.IssueID}
if opts.onlyHashNewIDs && len(server.hashes.GetIDs(id)) > 0 {
continue
}
@ -786,16 +369,17 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
server.hashingQueue <- im
}
}
type CHMux struct {
version string
*http.ServeMux
func printMemStats(m runtime.MemStats) {
fmt.Printf("Alloc = %v MiB\n", bToKb(m.Alloc))
fmt.Printf("TotalAlloc = %v MiB\n", bToKb(m.TotalAlloc))
fmt.Printf("Sys = %v MiB\n", bToKb(m.Sys))
fmt.Printf("NumGC = %v\n", m.NumGC)
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
CHM.ServeMux.ServeHTTP(w, r)
func bToKb(b uint64) uint64 {
return b / 1024 / 1024
}
func startServer(opts Opts) {
imaging.SetMaxProcs(2)
if opts.cpuprofile != "" {
@ -860,7 +444,7 @@ func startServer(opts Opts) {
// DecodeHashes would normally need a write lock
// nothing else has been started yet so we don't need one
if err := server.hashes.DecodeHashes(*loadHashes(opts)); err != nil {
if err := server.hashes.DecodeHashes(loadHashes(opts)); err != nil {
panic(err)
}
@ -905,11 +489,30 @@ func startServer(opts Opts) {
}
go signalHandler(&server)
log.Println("Listening on ", server.httpServer.Addr)
if opts.memprofile != "" {
f, err := os.Create(opts.memprofile)
if err != nil {
log.Fatal("could not create memory profile: ", err)
}
defer f.Close() // error handling omitted for example
runtime.GC() // get up-to-date statistics
runtime.GC() // get up-to-date statistics
// Lookup("allocs") creates a profile similar to go test -memprofile.
// Alternatively, use Lookup("heap") for a profile
// that has inuse_space as the default index.
m := runtime.MemStats{}
runtime.ReadMemStats(&m)
printMemStats(m)
if err := pprof.Lookup("heap").WriteTo(f, 0); err != nil {
log.Fatal("could not write memory profile: ", err)
}
}
err = server.httpServer.ListenAndServe()
if err != nil {
log.Println(err)
}
log.Println("Listening on ", server.httpServer.Addr)
close(server.readerQueue)
log.Println("waiting on readers")

366
cmd/comic-hasher/server.go Normal file
View File

@ -0,0 +1,366 @@
package main
import (
"bufio"
"cmp"
"context"
"encoding/json"
"fmt"
"image"
"io/fs"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
"golang.org/x/exp/slices"
)
type Server struct {
httpServer *http.Server
mux *CHMux
BaseURL *url.URL
hashes ch.HashStorage
Context context.Context
cancel func()
signalQueue chan os.Signal
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash
onlyHashNewIDs bool
version string
}
type CHMux struct {
version string
*http.ServeMux
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
CHM.ServeMux.ServeHTTP(w, r)
}
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
return strings.TrimSpace("lordwelch"), true
}
func (s *Server) setupAppHandlers() {
s.mux.HandleFunc("/add_cover", s.addCover)
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
s.mux.HandleFunc("/associate_ids", s.associateIDs)
}
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("domain"))))
ID = strings.ToLower(strings.TrimSpace(values.Get("id")))
newDomain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("newDomain"))))
newID = strings.ToLower(strings.TrimSpace(values.Get("newID")))
)
if ID == "" {
msg := "No ID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if domain == "" {
msg := "No domain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newID == "" {
msg := "No newID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == "" {
msg := "No newDomain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == domain {
msg := "newDomain cannot be the same as the existing domain"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
err := s.hashes.AssociateIDs([]ch.NewIDs{{
OldID: ch.ID{
Domain: &domain,
ID: ID,
},
NewID: ch.ID{
Domain: &newDomain,
ID: newID,
},
}})
if err == nil {
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
} else {
writeJson(w, http.StatusOK, result{Msg: err.Error()})
}
}
type result struct {
Results []ch.Result `json:"results,omitempty"`
Msg string `json:"msg,omitempty"`
}
func writeJson(w http.ResponseWriter, status int, res result) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("X-Content-Type-Options", "nosniff")
var (
bytes []byte
err error
)
if bytes, err = json.Marshal(res); err != nil {
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
}
w.WriteHeader(status)
_, _ = w.Write(bytes)
_, _ = w.Write([]byte("\n"))
}
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
hashes []ch.Hash
)
if simple {
writeJson(w, http.StatusBadRequest, result{Msg: "Simple results are no longer Supported"})
return
}
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
log.Printf("could not parse ahash: %s", ahashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if ahash > 0 {
hashes = append(hashes, ch.Hash{Hash: ahash, Kind: goimagehash.AHash})
}
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
log.Printf("could not parse dhash: %s", dhashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if dhash > 0 {
hashes = append(hashes, ch.Hash{Hash: dhash, Kind: goimagehash.DHash})
}
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
log.Printf("could not parse phash: %s", phashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if phash > 0 {
hashes = append(hashes, ch.Hash{Hash: phash, Kind: goimagehash.PHash})
}
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
log.Printf("Invalid Max: %s", maxStr)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
return
}
if maxStr != "" {
max = max_tmp
}
if max > 8 {
log.Printf("Max must be less than 9: %d", max)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
return
}
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
})
log.Println(err)
if len(matches) > 0 {
var msg string = ""
if err != nil {
msg = err.Error()
}
writeJson(w, http.StatusOK, result{
Results: matches,
Msg: msg,
})
return
}
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
}
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
if true {
w.WriteHeader(http.StatusNotImplemented)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
return
}
if domain == "" {
log.Println("No domain Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
return
}
i, format, err := image.Decode(r.Body)
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Decoded %s image from %s", format, user)
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
default:
}
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.NewSource(domain), ID: ID}}
writeJson(w, http.StatusOK, result{Msg: "Success"})
}
func (s *Server) mapper(done func()) {
defer done()
for hash := range s.mappingQueue {
s.hashes.MapHashes(hash)
}
}
func (s *Server) hasher(workerID int, done func(int)) {
defer done(workerID)
for image := range s.hashingQueue {
start := time.Now()
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
log.Printf("Skipping existing hash with ID: %s found", image.ID)
continue
}
hash := ch.HashImage(image)
if *hash.ID.Domain == "" || hash.ID.ID == "" {
continue
}
select {
// TODO: Check channel pipelines
case s.mappingQueue <- hash:
default:
}
elapsed := time.Since(start)
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
}
}
func (s *Server) reader(workerID int, done func(i int)) {
defer done(workerID)
for path := range s.readerQueue {
id := ch.ID{
Domain: ch.NewSource(filepath.Base(filepath.Dir(filepath.Dir(path)))),
ID: filepath.Base(filepath.Dir(path)),
}
if len(s.hashes.GetIDs(id)) > 0 {
continue
}
file, err := os.Open(path)
if err != nil {
panic(err)
}
i, format, err := image.Decode(bufio.NewReader(file))
file.Close()
if err != nil {
continue // skip this image
}
im := ch.Im{
Im: i,
Format: format,
ID: id,
NewOnly: s.onlyHashNewIDs,
}
select {
case s.hashingQueue <- im:
default:
}
}
}
func (s *Server) HashLocalImages(opts Opts) {
if opts.coverPath == "" {
return
}
go func() {
log.Println("Hashing covers at ", opts.coverPath)
start := time.Now()
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
err = s.httpServer.Shutdown(context.TODO())
return fmt.Errorf("Recieved quit: %w", err)
default:
}
if d.IsDir() {
return nil
}
s.readerQueue <- path
return nil
})
elapsed := time.Since(start)
log.Println("Err:", err, "local hashing took", elapsed)
}()
}

View File

@ -106,7 +106,7 @@ func main() {
debugImage(debugim, 8, 8)
}
hash := ch.HashImage(ch.Im{Im: im, Format: format, ID: ch.ID{Domain: ch.Source(ch.ComicVine), ID: "nothing"}})
hash := ch.HashImage(ch.Im{Im: im, Format: format, ID: ch.ID{Domain: ch.NewSource(ch.ComicVine), ID: "nothing"}})
fmt.Println("ahash: ", goimagehash.NewImageHash(hash.Hashes[0].Hash, hash.Hashes[0].Kind).BinString())
fmt.Println("dhash: ", goimagehash.NewImageHash(hash.Hashes[1].Hash, hash.Hashes[1].Kind).BinString())

213
cv/cv.go
View File

@ -34,8 +34,9 @@ type Download struct {
}
type Issue struct {
ID int `json:"id"`
Image struct {
ID int `json:"id"`
IssueNumber string `json:"issue_number"`
Image struct {
IconURL string `json:"icon_url,omitempty"`
MediumURL string `json:"medium_url,omitempty"`
ScreenURL string `json:"screen_url,omitempty"`
@ -86,8 +87,12 @@ type CVDownloader struct {
}
var (
ErrQuit = errors.New("Quit")
ErrInvalidPage = errors.New("Invalid ComicVine Page")
ErrQuit = errors.New("quit")
ErrInvalidPage = errors.New("invalid ComicVine page")
ErrInvalidIndex = errors.New("invalid page index")
ErrDownloadFail = errors.New("download failed")
ErrMissingPage = errors.New("page missing")
ErrUpdateNeeded = errors.New("update needed")
)
func (c *CVDownloader) readJson() ([]*CVResult, error) {
@ -145,9 +150,57 @@ func getOffset(name string) int {
return i
}
func (c *CVDownloader) findDownloadedPage(offset int) int {
index := offset / 100
if index < len(c.fileList) && getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
return index
}
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
return index
}
return -1
}
func (c *CVDownloader) getDownloadedIssues(offset int, update bool) (*CVResult, error) {
index := c.findDownloadedPage(offset)
if index < 0 {
return nil, ErrMissingPage
}
issue, err := c.loadIssues(c.fileList[index])
if err != nil || issue == nil {
err = fmt.Errorf("Failed to read page at offset %d: %w", offset, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
return nil, err
}
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
if update && (len(issue.Results) == 0 || issue.Results[0].IssueNumber == "") {
err = fmt.Errorf("Deleting page %d to update records from cv", offset)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
return nil, err
}
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
err = fmt.Errorf("Wrong index: expected %d got %d", len(c.fileList), index)
return nil, err
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
}
return issue, nil
}
// updateIssues c.downloadQueue must not be closed before this function has returned
func (c *CVDownloader) updateIssues() int {
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,image,volume")
func (c *CVDownloader) updateIssues() (int, error) {
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,issue_number,image,volume")
if err != nil {
log.Fatal(err)
}
@ -182,84 +235,44 @@ func (c *CVDownloader) updateIssues() int {
offset -= 100
return failCount < 15
}
updated := 0
for offset = 0; offset <= c.totalResults; offset += 100 {
index := offset / 100
if c.hasQuit() {
return offset - 100
return offset - 100, ErrQuit
}
if index < len(c.fileList) {
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
prev = -1
failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
} else {
select {
case <-c.Context.Done():
case c.downloadQueue <- issue:
}
continue
}
} else {
log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
}
} else {
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[index]))
issue, err := c.getDownloadedIssues(offset, updated < 9)
if err == nil && issue != nil {
prev = -1
failCount = 0
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return offset - 100, ErrQuit
case c.downloadQueue <- issue:
}
continue
}
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
prev = -1
failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
select {
case <-c.Context.Done():
case c.downloadQueue <- issue:
}
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
} else {
continue
}
} else {
log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, (index)+1)
}
if errors.Is(err, ErrInvalidIndex) {
return offset - 100, err
}
if err != nil && !errors.Is(err, ErrMissingPage) {
log.Println(err)
}
log.Println("Starting download at offset", offset)
issue := &CVResult{}
issue = &CVResult{}
URI := (*base_url)
query = base_url.Query()
query.Add("offset", strconv.Itoa(offset))
URI.RawQuery = query.Encode()
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return offset - 100
case <-time.After(10 * time.Second):
case <-c.Context.Done(): // Allows us to return immediately even during a timeout
return offset - 100, ErrQuit
case <-time.After(10 * time.Second): // Enforces a minimum 10s wait between API hits
}
resp, err, cancelDownloadCTX := Get(URI.String())
if err != nil {
cancelDownloadCTX()
@ -267,7 +280,7 @@ func (c *CVDownloader) updateIssues() int {
continue
}
// Fail and let comic-hasher try the whole thing again later
return offset - 100
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
}
if resp.StatusCode != 200 {
cancelDownloadCTX()
@ -275,14 +288,11 @@ func (c *CVDownloader) updateIssues() int {
_ = resp.Body.Close()
continue
}
log.Println("Failed to download this page, we'll wait for an hour to see if it clears up")
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
_ = resp.Body.Close()
return offset - 100
case <-time.After(1 * time.Hour):
}
msg, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return offset - 100, fmt.Errorf("%w: response code: %d Message: %s", ErrDownloadFail, resp.StatusCode, string(msg))
}
file, err := os.Create(filepath.Join(c.JSONPath, "cv-"+strconv.Itoa(offset)+".json"))
if err != nil {
log.Fatal(err)
@ -297,7 +307,7 @@ func (c *CVDownloader) updateIssues() int {
if retry(URI.String(), err) {
continue
}
return offset - 100
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
}
cancelDownloadCTX()
if issue.NumberOfTotalResults > c.totalResults {
@ -305,13 +315,14 @@ func (c *CVDownloader) updateIssues() int {
}
prev = -1
failCount = 0
updated += 1
select {
case c.downloadQueue <- issue:
}
c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset))
c.insertIssuePage(offset)
log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset)
}
return offset
return offset, nil
}
type download struct {
@ -436,12 +447,19 @@ func (c *CVDownloader) downloadImages() {
for list := range c.downloadQueue {
log.Printf("Checking downloads at offset %v\r", list.Offset)
for _, issue := range list.Results {
type i struct {
type image struct {
url string
name string
}
imageURLs := []i{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
imageURLs := []image{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
for _, image := range imageURLs {
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
continue
}
if c.chdb.CheckURL(image.url) {
log.Printf("Skipping known bad url %s", image.url)
continue
}
if strings.HasSuffix(image.url, "6373148-blank.png") {
c.notFound <- download{
url: image.url,
@ -452,14 +470,6 @@ func (c *CVDownloader) downloadImages() {
continue
}
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
continue
}
if c.chdb.CheckURL(image.url) {
log.Printf("Skipping known bad url %s", image.url)
continue
}
uri, err := url.ParseRequestURI(image.url)
if err != nil {
c.notFound <- download{
@ -479,7 +489,7 @@ func (c *CVDownloader) downloadImages() {
path := filepath.Join(dir, image.name+ext)
ids := c.get_id(ch.ID{
Domain: ch.ComicVine,
Domain: ch.NewSource(ch.ComicVine),
ID: strconv.Itoa(issue.ID),
})
if c.chdb.PathDownloaded(path) || c.only_hash_new_ids && len(ids) > 0 {
@ -589,6 +599,15 @@ func (c *CVDownloader) cleanDirs() {
return nil
})
}
func (c *CVDownloader) insertIssuePage(offset int) {
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
return
}
c.fileList = slices.Insert(c.fileList, index, fmt.Sprintf("cv-%v.json", offset))
}
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids bool, get_id func(id ch.ID) ch.IDList, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
return &CVDownloader{
@ -596,7 +615,7 @@ func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids
JSONPath: filepath.Join(workPath, "_json"),
ImagePath: filepath.Join(workPath, "_image"),
APIKey: APIKey,
bufPool: bufPool, // Only used if keepDownloadedImages is false to save space on byte buffers. The buffers get sent back via finishedDownloadQueue
bufPool: bufPool, // Only used if keepDownloadedImages is false to save memory on byte buffers. The buffers get sent back via finishedDownloadQueue
FinishedDownloadQueue: finishedDownloadQueue,
SendExistingImages: sendExistingImages,
KeepDownloadedImages: keepDownloadedImages,
@ -625,6 +644,11 @@ func DownloadCovers(c *CVDownloader) {
var d *os.File
d, err = os.Open(c.JSONPath)
c.fileList, err = d.Readdirnames(-1)
for i := len(c.fileList) - 1; i >= 0; i-- {
if !strings.Contains(c.fileList[i], "json") {
c.fileList = slices.Delete(c.fileList, i, i+1)
}
}
if err != nil {
panic(fmt.Errorf("Unable to open path for json files: %w", err))
}
@ -645,7 +669,10 @@ func DownloadCovers(c *CVDownloader) {
dwg.Done()
}()
offset := c.updateIssues()
offset, err := c.updateIssues()
if err != nil {
log.Printf("Failed to download CV Covers: %s", err)
}
issueCount := len(c.fileList) * 100
log.Println("Number of issues", issueCount, " expected:", c.totalResults)

66
go.mod
View File

@ -4,53 +4,67 @@ go 1.23.0
toolchain go1.24.0
// Main comic-hasher
require (
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09
github.com/fmartingr/go-comicinfo/v2 v2.0.2
github.com/kr/pretty v0.1.0
github.com/mattn/go-sqlite3 v1.14.24
github.com/mholt/archiver/v4 v4.0.0-alpha.8
github.com/ncruces/go-sqlite3 v0.23.1
github.com/json-iterator/go v1.1.12
github.com/kr/pretty v0.2.1
github.com/vmihailenco/msgpack v4.0.4+incompatible
go.etcd.io/bbolt v1.4.0
golang.org/x/image v0.24.0
golang.org/x/sys v0.30.0
golang.org/x/text v0.22.0
gonum.org/v1/gonum v0.15.1
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
golang.org/x/image v0.25.0
)
// Storage types
require (
github.com/mattn/go-sqlite3 v1.14.24
github.com/ncruces/go-sqlite3 v0.26.0
gonum.org/v1/gonum v0.16.0
modernc.org/sqlite v1.35.0
)
// other commands
require (
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/bodgit/plumbing v1.2.0 // indirect
github.com/bodgit/sevenzip v1.3.0 // indirect
github.com/bodgit/windows v1.0.0 // indirect
github.com/connesc/cipherio v0.2.1 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/fmartingr/go-comicinfo/v2 v2.0.2
github.com/mholt/archiver/v4 v4.0.0-alpha.9
golang.org/x/text v0.25.0
)
require (
github.com/STARRY-S/zip v0.1.0 // indirect
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/bodgit/plumbing v1.3.0 // indirect
github.com/bodgit/sevenzip v1.5.2 // indirect
github.com/bodgit/windows v1.0.1 // indirect
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/errwrap v1.0.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/klauspost/pgzip v1.2.5 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/klauspost/compress v1.17.11 // indirect
github.com/klauspost/pgzip v1.2.6 // indirect
github.com/kr/text v0.1.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/ncruces/julianday v1.0.0 // indirect
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/nwaples/rardecode/v2 v2.0.0-beta.4 // indirect
github.com/pierrec/lz4/v4 v4.1.21 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/sorairolake/lzip-go v0.3.5 // indirect
github.com/tetratelabs/wazero v1.9.0 // indirect
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
github.com/ulikunitz/xz v0.5.12 // indirect
go4.org v0.0.0-20230225012048-214862532bf5 // indirect
golang.org/x/sys v0.33.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.36.5 // indirect
google.golang.org/protobuf v1.26.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
modernc.org/libc v1.61.13 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.8.2 // indirect

128
go.sum
View File

@ -19,28 +19,28 @@ gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d h1:mFn
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20250130004139-e91c39c79e0d/go.mod h1:UDwa7njhbB5nzxIjHbT9Mjlve9GYn3wzxAcQax1XRvE=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY=
github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/bodgit/plumbing v1.2.0 h1:gg4haxoKphLjml+tgnecR4yLBV5zo4HAZGCtAh3xCzM=
github.com/bodgit/plumbing v1.2.0/go.mod h1:b9TeRi7Hvc6Y05rjm8VML3+47n4XTZPtQ/5ghqic2n8=
github.com/bodgit/sevenzip v1.3.0 h1:1ljgELgtHqvgIp8W8kgeEGHIWP4ch3xGI8uOBZgLVKY=
github.com/bodgit/sevenzip v1.3.0/go.mod h1:omwNcgZTEooWM8gA/IJ2Nk/+ZQ94+GsytRzOJJ8FBlM=
github.com/bodgit/windows v1.0.0 h1:rLQ/XjsleZvx4fR1tB/UxQrK+SJ2OFHzfPjLWWOhDIA=
github.com/bodgit/windows v1.0.0/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM=
github.com/STARRY-S/zip v0.1.0 h1:eUER3jKmHKXjv+iy3BekLa+QnNSo1Lqz4eTzYBcGDqo=
github.com/STARRY-S/zip v0.1.0/go.mod h1:qj/mTZkvb3AvfGQ2e775/3AODRvB4peSw8KNMvrM8/I=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/bodgit/plumbing v1.3.0 h1:pf9Itz1JOQgn7vEOE7v7nlEfBykYqvUYioC61TwWCFU=
github.com/bodgit/plumbing v1.3.0/go.mod h1:JOTb4XiRu5xfnmdnDJo6GmSbSbtSyufrsyZFByMtKEs=
github.com/bodgit/sevenzip v1.5.2 h1:acMIYRaqoHAdeu9LhEGGjL9UzBD4RNf9z7+kWDNignI=
github.com/bodgit/sevenzip v1.5.2/go.mod h1:gTGzXA67Yko6/HLSD0iK4kWaWzPlPmLfDO73jTjSRqc=
github.com/bodgit/windows v1.0.1 h1:tF7K6KOluPYygXa3Z2594zxlkbKPAOvqr97etrGNIz4=
github.com/bodgit/windows v1.0.1/go.mod h1:a6JLwrB4KrTR5hBpp8FI9/9W9jJfeQ2h4XDXU74ZCdM=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/connesc/cipherio v0.2.1 h1:FGtpTPMbKNNWByNrr9aEBtaJtXjqOzkIXNYJp6OEycw=
github.com/connesc/cipherio v0.2.1/go.mod h1:ukY0MWJDFnJEbXMQtOcn2VmTpRfzcTz4OoVrWGGJZcA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 h1:MJFqtdxTq94XqUgg7DcGCaOIXrDTJE/tPHK66Jshguc=
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707 h1:2tV76y6Q9BB+NEBasnqvs7e49aEBFI8ejC89PSnWH+4=
github.com/dsnet/compress v0.0.2-0.20230904184137-39efe44ab707/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
@ -58,17 +58,13 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@ -80,6 +76,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
@ -91,24 +88,30 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
@ -116,18 +119,22 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM=
github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A=
github.com/ncruces/go-sqlite3 v0.23.1 h1:zGAd76q+Tr18z/xKGatUlzBQdjR3J+rexfANUcjAgkY=
github.com/ncruces/go-sqlite3 v0.23.1/go.mod h1:Xg3FyAZl25HcBSFmcbymdfoTqD7jRnBUmv1jSrbIjdE=
github.com/mholt/archiver/v4 v4.0.0-alpha.9 h1:EZgAsW6DsuawxDgTtIdjCUBa2TQ6AOe9pnCidofSRtE=
github.com/mholt/archiver/v4 v4.0.0-alpha.9/go.mod h1:5D3uct315OMkMRXKwEuMB+wQi/2m5NQngKDmApqwVlo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/ncruces/go-sqlite3 v0.26.0 h1:dY6ASfuhSEbtSge6kJwjyJVC7bXCpgEVOycmdboKJek=
github.com/ncruces/go-sqlite3 v0.26.0/go.mod h1:46HIzeCQQ+aNleAxCli+vpA2tfh7ttSnw24kQahBc1o=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk=
github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY=
github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/nwaples/rardecode/v2 v2.0.0-beta.4 h1:sdiJxQdPjECn2lh9nLFFhgLCf+0ulDU5rODbtERTlUY=
github.com/nwaples/rardecode/v2 v2.0.0-beta.4/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -135,21 +142,29 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk=
github.com/sorairolake/lzip-go v0.3.5 h1:ms5Xri9o1JBIWvOFAorYtUNik6HI3HgBTkISiqu0Cwg=
github.com/sorairolake/lzip-go v0.3.5/go.mod h1:N0KYq5iWrMXI0ZEXKXaS9hCyOjZUQdBDEIbXfoUwbdk=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tetratelabs/wazero v1.9.0 h1:IcZ56OuxrtaEz8UYNRHBrUa9bYeX9oVY93KspZZBf/I=
github.com/tetratelabs/wazero v1.9.0/go.mod h1:TSbcXCfFP0L2FGkRPxHphadXPjo1T6W+CseNNY7EkjM=
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
@ -157,8 +172,8 @@ go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go4.org v0.0.0-20200411211856-f5505b9728dd h1:BNJlw5kRTzdmyfh5U8F93HA2OwkP7ZGwA51eJ/0wKOU=
go4.org v0.0.0-20200411211856-f5505b9728dd/go.mod h1:CIiUVy99QCPfoE13bO4EZaz5GZMZXMSBGhxRdsvzbkg=
go4.org v0.0.0-20230225012048-214862532bf5 h1:nifaUDeh+rPaBCMPMQHZmvJf+QdpLFnuQPwx+LxVmtc=
go4.org v0.0.0-20230225012048-214862532bf5/go.mod h1:F57wTi5Lrj6WLyswp5EYV1ncrEbFGHD4hhz6S1ZYeaU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
@ -177,8 +192,8 @@ golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa/go.mod h1:BHOTPb3L19zxehTsLo
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ=
golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@ -194,7 +209,6 @@ golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKG
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.23.0 h1:Zb7khfcRGKk+kqfxFaP5tZqCnDZMjC5VtUBs87Hr6QM=
golang.org/x/mod v0.23.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
@ -211,10 +225,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -226,10 +239,9 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -242,18 +254,17 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@ -261,8 +272,9 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@ -289,16 +301,14 @@ golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.30.0 h1:BgcpHewrV5AUp2G9MebG4XPFI1E2W41zU1SaqVA9vJY=
golang.org/x/tools v0.30.0/go.mod h1:c347cR/OJfw5TI+GfX7RUPNMdDRRbjvYTS0jPyvsVtY=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
@ -335,12 +345,12 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -7,10 +7,14 @@ import (
"image"
"log"
"math/bits"
"runtime"
"slices"
"strings"
"sync"
"unsafe"
"gitea.narnian.us/lordwelch/goimagehash"
json "github.com/json-iterator/go"
"github.com/vmihailenco/msgpack"
)
//go:embed hashes.gz
@ -43,6 +47,8 @@ const (
SavedHashVersion int = 2
)
var sources *sync.Map = newSourceMap()
type Source string
type Match struct {
@ -51,16 +57,16 @@ type Match struct {
}
type ID struct {
Domain Source
Domain *Source
ID string
}
type Result struct {
IDs IDList
Distance int
Hash Hash
Hash Hash
ID ID
Distance int
EquivalentIDs []ID
}
type Im struct {
Im image.Image
Format string
@ -78,21 +84,97 @@ type Hash struct {
Kind goimagehash.Kind
}
func (id *ID) Compare(target ID) int {
return cmp.Or(
strings.Compare(string(*id.Domain), string(*target.Domain)),
strings.Compare(id.ID, target.ID),
)
}
func newSourceMap() *sync.Map {
m := &sync.Map{}
for s := range []Source{ComicVine} {
m.Store(s, &s)
}
return m
}
func NewSource[E string | Source](s E) *Source {
s2 := Source(strings.ToLower(Clone(string(s))))
sp, _ := sources.LoadOrStore(s2, &s2)
return sp.(*Source)
}
// IDList is a map of domain to ID eg IDs["comicvine.gamespot.com"] = []string{"1235"}
// Maps are extremely expensive in go for small maps this should only be used to return info to a user or as a map containing all IDs for a source
type IDList map[Source][]string
//go:noinline pragma
func (a *ID) DecodeMsgpack(dec *msgpack.Decoder) error {
var s struct {
Domain, ID string
}
err := dec.Decode(&s)
if err != nil {
return err
}
a.ID = Clone(s.ID)
a.Domain = NewSource(s.Domain)
return nil
}
//go:noinline pragma
func Clone(s string) string {
if len(s) == 0 {
return ""
}
b := make([]byte, len(s))
copy(b, s)
return unsafe.String(&b[0], len(b))
}
//go:noinline pragma
func (a *ID) UnmarshalJSON(b []byte) error {
var s struct {
Domain, ID string
}
if err := json.Unmarshal(b, &s); err != nil {
return err
}
a.ID = Clone(s.ID)
domain := Clone(s.Domain)
a.Domain = NewSource(domain)
return nil
}
func ToIDList(ids []ID) IDList {
idlist := IDList{}
for _, id := range ids {
idlist[id.Domain] = Insert(idlist[id.Domain], id.ID)
idlist[*id.Domain] = Insert(idlist[*id.Domain], id.ID)
}
return idlist
}
func InsertID(ids []ID, id ID) []ID {
index, itemFound := slices.BinarySearchFunc(ids, id, func(existing ID, target ID) int {
func InsertIDp(ids []*ID, id *ID) []*ID {
index, itemFound := slices.BinarySearchFunc(ids, id, func(existing, target *ID) int {
return cmp.Or(
cmp.Compare(existing.Domain, target.Domain),
cmp.Compare(*existing.Domain, *target.Domain),
cmp.Compare(existing.ID, target.ID),
)
})
if itemFound {
return ids
}
return slices.Insert(ids, index, id)
}
func InsertID(ids []ID, id ID) []ID {
index, itemFound := slices.BinarySearchFunc(ids, id, func(existing, target ID) int {
return cmp.Or(
cmp.Compare(*existing.Domain, *target.Domain),
cmp.Compare(existing.ID, target.ID),
)
})
@ -110,8 +192,8 @@ type NewIDs struct {
type HashStorage interface {
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
MapHashes(ImageHash)
DecodeHashes(hashes SavedHashes) error
EncodeHashes() (SavedHashes, error)
DecodeHashes(hashes *SavedHashes) error
EncodeHashes() (*SavedHashes, error)
AssociateIDs(newIDs []NewIDs) error
GetIDs(id ID) IDList
}
@ -127,14 +209,6 @@ func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
return matchingHashes
}
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
index, itemFound := slices.BinarySearch(slice, item)
if itemFound {
return slice
}
return slices.Insert(slice, index, item)
}
func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
index, itemFound := slices.BinarySearch(slice, item)
if itemFound {
@ -143,10 +217,9 @@ func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
return slices.Insert(slice, index, item), index
}
func MemStats() uint64 {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return m.Alloc
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
slice, _ = InsertIdx(slice, item)
return slice
}
func HashImage(i Im) ImageHash {

View File

@ -2,12 +2,15 @@ package ch
import (
"cmp"
"encoding/json"
"errors"
"fmt"
"slices"
"strings"
// json "github.com/goccy/go-json"
json "github.com/json-iterator/go"
// "encoding/json"
"gitea.narnian.us/lordwelch/goimagehash"
"github.com/vmihailenco/msgpack"
)
@ -21,7 +24,11 @@ const (
CurrentSavedHashesVersion int = 2
)
var versionMap map[int]versionDecoder
var versionMap = map[int]versionDecoder{
0: DecodeHashesV0,
1: DecodeHashesV1,
2: DecodeHashesV2,
}
var formatNames = map[Format]string{
JSON: "json",
@ -52,6 +59,7 @@ type SavedHash struct {
Hash Hash
ID ID
}
type Encoder func(any) ([]byte, error)
type Decoder func([]byte, interface{}) error
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
@ -75,12 +83,25 @@ func (f *Format) Set(s string) error {
return nil
}
func (h *SavedHash) Clone() SavedHash {
return SavedHash{
Hash: Hash{
Hash: h.Hash.Hash,
Kind: h.Hash.Kind,
},
ID: ID{
Domain: NewSource(*h.ID.Domain),
ID: strings.Clone(h.ID.ID),
},
}
}
func (s *SavedHashes) InsertHash(hash SavedHash) {
index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
cmp.Compare(existing.ID.Domain, target.ID.Domain),
cmp.Compare(*existing.ID.Domain, *target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
})
@ -95,23 +116,21 @@ func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
for _, ids := range oldHashes {
idcount += len(ids)
}
t.IDs = make([][]ID, 0, idcount)
t.Hashes = make([]SavedHash, 0, idcount)
for domain, sourceHashes := range oldHashes {
for id, hashes := range sourceHashes {
t.IDs = append(t.IDs, []ID{{domain, id}})
for hashType, hash := range hashes {
t.Hashes = append(t.Hashes, SavedHash{
Hash: Hash{
Kind: goimagehash.Kind(hashType + 1),
Hash: hash,
},
ID: ID{domain, id},
ID: ID{NewSource(domain), id},
})
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
@ -128,7 +147,7 @@ func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
for hash, index := range sourceHashes {
for _, id := range oldHashes.IDs[index] {
t.Hashes = append(t.Hashes, SavedHash{
ID: id,
ID: ID{NewSource(*id.Domain), id.ID},
Hash: Hash{
Kind: goimagehash.Kind(hashType + 1),
Hash: hash,
@ -137,7 +156,7 @@ func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
@ -173,6 +192,7 @@ func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
}
func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
fmt.Println("Decode v2 hashes")
loadedHashes := SavedHashes{}
err := decode(hashes, &loadedHashes)
if err != nil {
@ -182,6 +202,8 @@ func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
return nil, NoHashes
}
fmt.Println("Length of hashes", len(loadedHashes.Hashes))
fmt.Println("Length of ID lists", len(loadedHashes.IDs))
fmt.Println("Loaded V2 hashes")
return &loadedHashes, nil
}
@ -205,8 +227,10 @@ func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
switch format {
case Msgpack:
decode = msgpack.Unmarshal
fmt.Println("Decode Msgpack")
case JSON:
decode = json.Unmarshal
fmt.Println("Decode JSON")
default:
return nil, fmt.Errorf("Unknown format: %v", format)
@ -234,7 +258,7 @@ func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
return nil, NoHashes
}
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
func EncodeHashes(hashes *SavedHashes, format Format) ([]byte, error) {
var encoder Encoder
switch format {
case Msgpack:

372
storage/basicmap.go Normal file
View File

@ -0,0 +1,372 @@
package storage
import (
"cmp"
"errors"
"fmt"
"math/bits"
"slices"
"sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
)
type basicMapStorage struct {
hashMutex *sync.RWMutex
ids IDMap
aHashes []ch.SavedHash
dHashes []ch.SavedHash
pHashes []ch.SavedHash
}
type IDs struct {
id *ch.ID
idList *[]*ch.ID
}
type IDMap struct {
ids []IDs
}
func (m *IDMap) InsertID(id *ch.ID) *ch.ID {
return m.insertID(id, &[]*ch.ID{id})
}
func (m *IDMap) insertID(id *ch.ID, idList *[]*ch.ID) *ch.ID {
index, found := slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target)
})
if !found {
m.ids = slices.Insert(m.ids, index, IDs{
id,
idList,
})
}
return m.ids[index].id
}
func (m *IDMap) sort() {
slices.SortFunc(m.ids, func(a, b IDs) int {
return a.id.Compare(*b.id)
})
}
func (m *IDMap) FindID(id *ch.ID) (int, bool) {
return slices.BinarySearchFunc(m.ids, id, func(id IDs, target *ch.ID) int {
return id.id.Compare(*target)
})
}
func (m *IDMap) GetIDs(id *ch.ID) []ch.ID {
index, found := m.FindID(id)
if !found {
return nil
}
ids := make([]ch.ID, 0, len(*m.ids[index].idList))
for _, id := range *m.ids[index].idList {
ids = append(ids, *id)
}
return ids
}
func (m *IDMap) AssociateIDs(newids []ch.NewIDs) error {
for _, newid := range newids {
index, found := m.FindID(&newid.OldID)
if !found {
return ErrIDNotFound
}
*(m.ids[index].idList) = ch.InsertIDp(*(m.ids[index].idList), &newid.NewID)
m.insertID(&newid.NewID, m.ids[index].idList)
}
return nil
}
// func (m *IDMap) NewID(domain Source, id string) *ch.ID {
// newID := ch.ID{domain, id}
// index, found := slices.BinarySearchFunc(m.idList, newID, func(id *ch.ID, target ch.ID) int {
// return id.Compare(*target)
// })
// if !found {
// m.idList = slices.Insert(m.idList, index, &newID)
// }
// return m.idList[index]
// }
var ErrIDNotFound = errors.New("ID not found on this server")
// atleast must have a read lock before using
func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, searchHash uint64) []ch.Result {
matchingHashes := make([]ch.Result, 0, 20) // hope that we don't need more
mappedIds := map[int]bool{}
storedHash := ch.SavedHash{} // reduces allocations and ensures queries are <1s
for _, storedHash = range *b.getCurrentHashes(kind) {
distance := bits.OnesCount64(searchHash ^ storedHash.Hash.Hash)
if distance <= maxDistance {
index, _ := b.ids.FindID(&storedHash.ID)
if mappedIds[index] {
continue
}
mappedIds[index] = true
matchingHashes = append(matchingHashes, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: distance,
EquivalentIDs: b.ids.GetIDs(&storedHash.ID),
})
}
}
return matchingHashes
}
func (b *basicMapStorage) exactMatches(hashes []ch.Hash, max int) []ch.Result {
var foundMatches []ch.Result
for _, hash := range hashes {
mappedIds := map[int]bool{}
index, count := b.findHash(hash)
if count > 0 {
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
index, _ := b.ids.FindID(&storedHash.ID)
if mappedIds[index] {
continue
}
mappedIds[index] = true
foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: b.ids.GetIDs(&storedHash.ID),
})
}
}
}
return foundMatches
}
func (b *basicMapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []ch.Result
tl ch.TimeLog
)
tl.ResetTime()
defer tl.LogTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = b.exactMatches(hashes, max)
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
foundHashes := make(map[uint64]struct{})
totalPartialHashes := 0
for _, hash := range hashes {
foundMatches = append(foundMatches, b.atleast(hash.Kind, max, hash.Hash)...)
}
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
return foundMatches, nil
}
// getCurrentHashes must have a read lock before using
func (b *basicMapStorage) getCurrentHashes(kind goimagehash.Kind) *[]ch.SavedHash {
if kind == goimagehash.AHash {
return &b.aHashes
}
if kind == goimagehash.DHash {
return &b.dHashes
}
if kind == goimagehash.PHash {
return &b.pHashes
}
panic("Unknown hash type: " + kind.String())
}
// findHash must have a read lock before using
// return value is index, count
// if count < 1 then no results were found
func (b *basicMapStorage) findHash(hash ch.Hash) (int, int) {
currentHashes := *b.getCurrentHashes(hash.Kind)
index, found := slices.BinarySearchFunc(currentHashes, hash, func(existing ch.SavedHash, target ch.Hash) int {
return cmp.Compare(existing.Hash.Hash, target.Hash)
})
if !found {
return index, 0
}
count := 0
for i := index + 1; i < len(currentHashes) && currentHashes[i].Hash.Hash == hash.Hash; i++ {
count++
}
return index, count
}
// insertHash must already have a lock
func (b *basicMapStorage) insertHash(hash ch.Hash, id ch.ID) {
currentHashes := b.getCurrentHashes(hash.Kind)
index, count := b.findHash(hash)
max := index + count
for ; index < max; index++ {
if (*currentHashes)[index].ID == id {
return
}
}
sh := ch.SavedHash{
Hash: hash,
ID: id,
}
*currentHashes = slices.Insert(*currentHashes, index, sh)
b.ids.InsertID(&sh.ID)
}
func (b *basicMapStorage) MapHashes(hash ch.ImageHash) {
b.hashMutex.Lock()
defer b.hashMutex.Unlock()
for _, ih := range hash.Hashes {
b.insertHash(ih, hash.ID)
}
}
// DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
b.ids.ids = make([]IDs, 0, len(hashes.Hashes))
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
new_ids := make([]*ch.ID, 0, len(ids))
for _, id := range ids {
new_ids = append(new_ids, &id)
}
for _, id := range new_ids {
b.ids.ids = append(b.ids.ids, IDs{
id,
&new_ids,
})
}
}
b.ids.sort()
slices.SortFunc(hashes.Hashes, func(existing, target ch.SavedHash) int {
return cmp.Or(
cmp.Compare(*existing.ID.Domain, *target.ID.Domain), // Sorted for id insertion efficiency
cmp.Compare(existing.ID.ID, target.ID.ID), // Sorted for id insertion efficiency
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
)
})
aHashCount := 0
dHashCount := 0
pHashCount := 0
for _, savedHash := range hashes.Hashes {
if savedHash.Hash.Kind == goimagehash.AHash {
aHashCount += 1
}
if savedHash.Hash.Kind == goimagehash.DHash {
dHashCount += 1
}
if savedHash.Hash.Kind == goimagehash.PHash {
pHashCount += 1
}
}
// Assume they are probably fairly equally split between hash types
b.aHashes = make([]ch.SavedHash, 0, aHashCount)
b.dHashes = make([]ch.SavedHash, 0, dHashCount)
b.pHashes = make([]ch.SavedHash, 0, pHashCount)
for i := range hashes.Hashes {
hash := hashes.Hashes[i].Clone() // Not cloning this will keep strings/slices loaded from json wasting memory
if hashes.Hashes[i].Hash.Kind == goimagehash.AHash {
b.aHashes = append(b.aHashes, hash)
}
if hashes.Hashes[i].Hash.Kind == goimagehash.DHash {
b.dHashes = append(b.dHashes, hash)
}
if hashes.Hashes[i].Hash.Kind == goimagehash.PHash {
b.pHashes = append(b.pHashes, hash)
}
if hashes.Hashes[i].ID == (ch.ID{}) {
fmt.Println("Empty ID detected")
panic(hashes.Hashes[i])
}
// TODO: Make loading this more efficient
// All known equal IDs are already mapped we can add any missing ones from hashes
b.ids.InsertID(&hash.ID)
}
hashCmp := func(existing, target ch.SavedHash) int {
return cmp.Or(
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
cmp.Compare(*existing.ID.Domain, *target.ID.Domain),
cmp.Compare(existing.ID.ID, target.ID.ID),
)
}
slices.SortFunc(b.aHashes, hashCmp)
slices.SortFunc(b.dHashes, hashCmp)
slices.SortFunc(b.pHashes, hashCmp)
return nil
}
// EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (*ch.SavedHashes, error) {
savedHashes := ch.SavedHashes{
Hashes: make([]ch.SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
}
// savedHashes.Hashes = append(savedHashes.Hashes, b.aHashes...)
// savedHashes.Hashes = append(savedHashes.Hashes, b.dHashes...)
// savedHashes.Hashes = append(savedHashes.Hashes, b.pHashes...)
// // Only keep groups len>1 as they are mapped in SavedHashes.Hashes
// for _, ids := range b.ids.ids {
// if len(*ids.idList) > 1 {
// idl := make([]ID, 0, len(*ids.idList))
// for _, id := range *ids.idList {
// idl = append(idl, *id)
// }
// savedHashes.IDs = append(savedHashes.IDs, idl)
// }
// }
return &savedHashes, nil
}
func (b *basicMapStorage) AssociateIDs(newids []ch.NewIDs) error {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
return b.ids.AssociateIDs(newids)
}
func (b *basicMapStorage) GetIDs(id ch.ID) ch.IDList {
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
ids := b.ids.GetIDs(&id)
return ch.ToIDList(ids)
}
func NewBasicMapStorage() (ch.HashStorage, error) {
storage := &basicMapStorage{
hashMutex: &sync.RWMutex{},
ids: IDMap{
ids: []IDs{},
},
aHashes: []ch.SavedHash{},
dHashes: []ch.SavedHash{},
pHashes: []ch.SavedHash{},
}
return storage, nil
}

View File

@ -1,10 +1,11 @@
package ch
package storage
import (
"fmt"
"slices"
"sync"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
)
@ -15,10 +16,10 @@ type MapStorage struct {
partialPHash [8]map[uint8][]uint64
}
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (m *MapStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []Result
tl timeLog
foundMatches []ch.Result
tl ch.TimeLog
)
m.hashMutex.RLock()
defer m.hashMutex.RUnlock()
@ -26,13 +27,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = m.exactMatches(hashes, max)
tl.logTime("Search Exact")
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
tl.resetTime()
defer tl.logTime("Search Complete")
tl.ResetTime()
defer tl.LogTime("Search Complete")
totalPartialHashes := 0
@ -40,30 +41,34 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
currentHashes, currentPartialHashes := m.getCurrentHashes(searchHash.Kind)
potentialMatches := []uint64{}
for i, partialHash := range SplitHash(searchHash.Hash) {
for i, partialHash := range ch.SplitHash(searchHash.Hash) {
potentialMatches = append(potentialMatches, currentPartialHashes[i][partialHash]...)
}
totalPartialHashes += len(potentialMatches)
mappedIds := map[*[]ID]bool{}
mappedIds := map[int]bool{}
for _, match := range Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := Hash{match.Hash, searchHash.Kind}
for _, match := range ch.Atleast(max, searchHash.Hash, potentialMatches) {
matchedHash := ch.Hash{
Hash: match.Hash,
Kind: searchHash.Kind,
}
index, count := m.findHash(matchedHash)
if count < 1 {
continue
}
for _, storedHash := range currentHashes[index : index+count] {
ids := m.ids[storedHash.ID]
if mappedIds[ids] {
idIndex, _ := m.ids.FindID(&storedHash.ID)
if mappedIds[idIndex] {
continue
}
mappedIds[ids] = true
mappedIds[idIndex] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*m.ids[storedHash.ID]),
foundMatches = append(foundMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: m.ids.GetIDs(&storedHash.ID),
})
}
@ -74,7 +79,7 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
}
// getCurrentHashes must have a read lock before using
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]SavedHash, [8]map[uint8][]uint64) {
func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]ch.SavedHash, [8]map[uint8][]uint64) {
if kind == goimagehash.AHash {
return m.aHashes, m.partialAHash
}
@ -87,17 +92,20 @@ func (m *MapStorage) getCurrentHashes(kind goimagehash.Kind) ([]SavedHash, [8]ma
panic("Unknown hash type: " + kind.String())
}
func (m *MapStorage) MapHashes(hash ImageHash) {
func (m *MapStorage) MapHashes(hash ch.ImageHash) {
m.basicMapStorage.MapHashes(hash)
for _, hash := range hash.Hashes {
_, partialHashes := m.getCurrentHashes(hash.Kind)
for i, partialHash := range SplitHash(hash.Hash) {
partialHashes[i][partialHash] = Insert(partialHashes[i][partialHash], hash.Hash)
for i, partialHash := range ch.SplitHash(hash.Hash) {
partialHashes[i][partialHash] = ch.Insert(partialHashes[i][partialHash], hash.Hash)
}
}
}
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
func (m *MapStorage) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
if err := m.basicMapStorage.DecodeHashes(hashes); err != nil {
return err
}
@ -113,15 +121,17 @@ func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
return nil
}
func NewMapStorage() (HashStorage, error) {
func NewMapStorage() (ch.HashStorage, error) {
storage := &MapStorage{
basicMapStorage: basicMapStorage{
hashMutex: &sync.RWMutex{},
ids: make(map[ID]*[]ID),
aHashes: []SavedHash{},
dHashes: []SavedHash{},
pHashes: []SavedHash{},
ids: IDMap{
ids: []IDs{},
},
aHashes: []ch.SavedHash{},
dHashes: []ch.SavedHash{},
pHashes: []ch.SavedHash{},
},
partialAHash: newPartialHash(),
partialDHash: newPartialHash(),
@ -143,9 +153,9 @@ func newPartialHash() [8]map[uint8][]uint64 {
}
}
func mapPartialHashes(hashes []SavedHash, partialHashMap [8]map[uint8][]uint64) {
func mapPartialHashes(hashes []ch.SavedHash, partialHashMap [8]map[uint8][]uint64) {
for _, savedHash := range hashes {
for i, partialHash := range SplitHash(savedHash.Hash.Hash) {
for i, partialHash := range ch.SplitHash(savedHash.Hash.Hash) {
partialHashMap[i][partialHash] = append(partialHashMap[i][partialHash], savedHash.Hash.Hash)
}
}

View File

@ -1,4 +1,4 @@
package ch
package storage
import (
"context"
@ -8,6 +8,7 @@ import (
"log"
"math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
_ "modernc.org/sqlite"
)
@ -26,63 +27,66 @@ type sqliteStorage struct {
idExists *sql.Stmt
}
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) ([]ID, error) {
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash ch.Hash) (map[ch.ID][]ch.ID, error) {
if statement == nil {
statement = s.hashExactMatchStatement
}
hashes := []ID{}
hashes := map[ch.ID][]ch.ID{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil {
return hashes, err
}
for rows.Next() {
var (
r = ID{}
id ch.ID
foundID ch.ID
)
err = rows.Scan(&r.Domain, &r.ID)
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
if err != nil {
rows.Close()
return hashes, err
}
hashes = append(hashes, r)
hashes[foundID] = append(hashes[foundID], id)
}
rows.Close()
return hashes, nil
}
func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max int, hash Hash) ([]Result, error) {
func (s *sqliteStorage) findPartialHashes(tl ch.TimeLog, statement *sql.Stmt, max int, hash ch.Hash) ([]ch.Result, error) {
if statement == nil {
statement = s.hashPartialMatchStatement
}
hashResults := []Result{}
hashResults := []ch.Result{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil {
return hashResults, err
}
results := map[Hash][]ID{}
results := map[ch.SavedHash][]ch.ID{}
for rows.Next() {
var (
tmpHash int64
sqlHash = Hash{Kind: hash.Kind}
id ID
sqlHash = ch.SavedHash{
Hash: ch.Hash{Kind: hash.Kind},
}
id ch.ID
)
err = rows.Scan(&tmpHash, &id.Domain, &id.ID)
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
if err != nil {
rows.Close()
return hashResults, err
}
sqlHash.Hash = uint64(tmpHash)
sqlHash.Hash.Hash = uint64(tmpHash)
results[sqlHash] = append(results[sqlHash], id)
}
for sqlHash, ids := range results {
res := Result{
Hash: sqlHash,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash),
res := ch.Result{
Hash: sqlHash.Hash,
ID: sqlHash.ID,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
EquivalentIDs: ids,
}
if res.Distance <= max {
res.IDs = ToIDList(ids)
hashResults = append(hashResults, res)
}
}
@ -91,18 +95,18 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
func (s *sqliteStorage) dropIndexes() error {
_, err := s.db.Exec(`
DROP INDEX IF EXISTS hash_index;
DROP INDEX IF EXISTS hash_1_index;
DROP INDEX IF EXISTS hash_2_index;
DROP INDEX IF EXISTS hash_3_index;
DROP INDEX IF EXISTS hash_4_index;
DROP INDEX IF EXISTS hash_5_index;
DROP INDEX IF EXISTS hash_6_index;
DROP INDEX IF EXISTS hash_7_index;
DROP INDEX IF EXISTS hash_8_index;
DROP INDEX IF EXISTS hash_index;
DROP INDEX IF EXISTS hash_1_index;
DROP INDEX IF EXISTS hash_2_index;
DROP INDEX IF EXISTS hash_3_index;
DROP INDEX IF EXISTS hash_4_index;
DROP INDEX IF EXISTS hash_5_index;
DROP INDEX IF EXISTS hash_6_index;
DROP INDEX IF EXISTS hash_7_index;
DROP INDEX IF EXISTS hash_8_index;
DROP INDEX IF EXISTS id_domain;
`)
DROP INDEX IF EXISTS id_domain;
`)
if err != nil {
return err
}
@ -111,32 +115,32 @@ func (s *sqliteStorage) dropIndexes() error {
func (s *sqliteStorage) createIndexes() error {
_, err := s.db.Exec(`
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid);
PRAGMA shrink_memory;
ANALYZE;
`)
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, stringid);
PRAGMA shrink_memory;
ANALYZE;
`)
if err != nil {
return err
}
return nil
}
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (s *sqliteStorage) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
foundMatches []Result
tl timeLog
foundMatches []ch.Result
tl ch.TimeLog
)
tl.resetTime()
tl.ResetTime()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
@ -144,13 +148,17 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if err != nil {
return foundMatches, err
}
foundMatches = append(foundMatches, Result{
IDs: ToIDList(idlist),
Hash: hash,
})
for id, equivalentIDs := range idlist {
foundMatches = append(foundMatches, ch.Result{
Hash: hash,
ID: id,
Distance: 0,
EquivalentIDs: equivalentIDs,
})
}
}
tl.logTime("Search Exact")
tl.LogTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
@ -163,7 +171,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if err != nil {
return foundMatches, err
}
tl.logTime(fmt.Sprintf("Search partial %v", hash.Kind))
tl.LogTime(fmt.Sprintf("Search partial %v", hash.Kind))
for _, hash := range results {
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
@ -178,7 +186,7 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
return foundMatches, nil
}
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ch.ImageHash) {
var err error
insertHash := tx.Stmt(s.insertHash)
insertID := tx.Stmt(s.insertID)
@ -227,7 +235,7 @@ func (s *sqliteStorage) mapHashes(tx *sql.Tx, hash ImageHash) {
}
}
}
func (s *sqliteStorage) MapHashes(hash ImageHash) {
func (s *sqliteStorage) MapHashes(hash ch.ImageHash) {
tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil {
panic(err)
@ -239,7 +247,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
}
}
func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
func (s *sqliteStorage) DecodeHashes(hashes *ch.SavedHashes) error {
return nil
err := s.dropIndexes()
if err != nil {
@ -278,8 +286,8 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
}
for _, savedHash := range hashes.Hashes {
s.mapHashes(tx, ImageHash{
Hashes: []Hash{savedHash.Hash},
s.mapHashes(tx, ch.ImageHash{
Hashes: []ch.Hash{savedHash.Hash},
ID: savedHash.ID,
})
}
@ -295,45 +303,45 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
return nil
}
func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
hashes := SavedHashes{}
func (s *sqliteStorage) EncodeHashes() (*ch.SavedHashes, error) {
hashes := ch.SavedHashes{}
tx, err := s.db.Begin()
if err != nil {
return hashes, err
return &hashes, err
}
rows, err := tx.Query("SELECT Hashes.kind, Hashes.hash, IDs.domain, IDs.stringid FROM Hashes JOIN IDs ON Hashes.id=IDs.id ORDER BY Hashes.kind, Hashes.hash;")
if err != nil {
return hashes, err
return &hashes, err
}
for rows.Next() {
var (
hash SavedHash
hash ch.SavedHash
tmpHash int64
)
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
if err != nil {
return hashes, err
return &hashes, err
}
hash.Hash.Hash = uint64(tmpHash)
hashes.InsertHash(hash)
}
rows, err = tx.Query("SELECT IEIDs.equivalentid, IDs.domain, IDs.stringid FROM IDs JOIN IDsToEquivalantIDs AS IEIDs ON IDs.id=IEIDs.idid ORDER BY IEIDs.equivalentid, IDs.domain, IDs.stringid;")
if err != nil {
return hashes, err
return &hashes, err
}
var (
previousEid int64 = -1
ids []ID
ids []ch.ID
)
for rows.Next() {
var (
id ID
id ch.ID
newEid int64
)
err = rows.Scan(&newEid, &id.Domain, &id.Domain)
if err != nil {
return hashes, err
return &hashes, err
}
if newEid != previousEid {
previousEid = newEid
@ -341,14 +349,14 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
if len(ids) > 1 {
hashes.IDs = append(hashes.IDs, ids)
}
ids = make([]ID, 0)
ids = make([]ch.ID, 0)
}
ids = append(ids, id)
}
return hashes, nil
return &hashes, nil
}
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
func (s *sqliteStorage) AssociateIDs(newIDs []ch.NewIDs) error {
tx, err := s.db.BeginTx(context.Background(), nil)
if err != nil {
panic(err)
@ -390,21 +398,21 @@ func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
return nil
}
func (s *sqliteStorage) GetIDs(id ID) IDList {
var ids []ID
func (s *sqliteStorage) GetIDs(id ch.ID) ch.IDList {
var ids []ch.ID
rows, err := s.idMatchStatement.Query(id.Domain, id.ID)
if err != nil {
return nil
}
for rows.Next() {
var id ID
var id ch.ID
err = rows.Scan(&id.Domain, &id.ID)
if err != nil {
return nil
}
ids = append(ids, id)
}
return ToIDList(ids)
return ch.ToIDList(ids)
}
func (s *sqliteStorage) PrepareStatements() error {
@ -430,50 +438,50 @@ func (s *sqliteStorage) PrepareStatements() error {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
s.hashExactMatchStatement, err = s.db.Prepare(`
select IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND Hashes.hash=?)
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND Hashes.hash=?)
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
s.hashPartialMatchStatement, err = s.db.Prepare(`
select EIDs.hash, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF))))
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
join IDsToEquivalantIDs as QIEIDs on QEIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
join Hashes on Hashes.id=QIDs.id
where (Hashes.kind=? AND (((Hashes.hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((Hashes.hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((Hashes.hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((Hashes.hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((Hashes.hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((Hashes.hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((Hashes.hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((Hashes.hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF))))
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
s.idMatchStatement, err = s.db.Prepare(`
select IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select EIDs.* from EquivalentIDs as EIDs
join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
where (QIDs.domain=? AND QIDs.stringid=?)
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
select IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select EIDs.* from EquivalentIDs as EIDs
join IDsToEquivalantIDs as QIEIDs on EIDs.id=QIEIDs.equivalentid
join IDs as QIDs on QIDs.id=QIEIDs.idid
where (QIDs.domain=? AND QIDs.stringid=?)
) as EIDs on EIDs.id=IEIDs.equivalentid;
`)
if err != nil {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
return nil
}
func NewSqliteStorage(db, path string) (HashStorage, error) {
func NewSqliteStorage(db, path string) (ch.HashStorage, error) {
sqlite := &sqliteStorage{}
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
if err != nil {
@ -481,34 +489,34 @@ func NewSqliteStorage(db, path string) (HashStorage, error) {
}
sqlite.db = sqlDB
_, err = sqlite.db.Exec(`
PRAGMA foreign_keys=ON;
CREATE TABLE IF NOT EXISTS IDs(
id INTEGER PRIMARY KEY,
stringid TEXT NOT NULL,
domain TEXT NOT NULL
);
PRAGMA foreign_keys=ON;
CREATE TABLE IF NOT EXISTS IDs(
id INTEGER PRIMARY KEY,
stringid TEXT NOT NULL,
domain TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS Hashes(
hash INTEGER NOT NULL,
kind INTEGER NOT NULL,
id INTEGER NOT NULL,
CREATE TABLE IF NOT EXISTS Hashes(
hash INTEGER NOT NULL,
kind INTEGER NOT NULL,
id INTEGER NOT NULL,
FOREIGN KEY(id) REFERENCES IDs(id)
);
FOREIGN KEY(id) REFERENCES IDs(id)
);
CREATE TABLE IF NOT EXISTS EquivalentIDs(
id integer primary key
);
CREATE TABLE IF NOT EXISTS EquivalentIDs(
id integer primary key
);
CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs(
idid INTEGER NOT NULL,
equivalentid INTEGER NOT NULL,
PRIMARY KEY (idid, equivalentid),
CREATE TABLE IF NOT EXISTS IDsToEquivalantIDs(
idid INTEGER NOT NULL,
equivalentid INTEGER NOT NULL,
PRIMARY KEY (idid, equivalentid),
FOREIGN KEY(idid) REFERENCES IDs(id),
FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id)
);
`)
FOREIGN KEY(idid) REFERENCES IDs(id),
FOREIGN KEY(equivalentid) REFERENCES EquivalentIDs(id)
);
`)
if err != nil {
panic(err)
}

View File

@ -1,6 +1,6 @@
//go:build cgo
//go:build cgo && !gokrazy
package ch
package storage
import (
_ "github.com/mattn/go-sqlite3"

View File

@ -1,6 +1,6 @@
//go:build !cgo
//go:build !cgo && !gokrazy
package ch
package storage
import (
_ "github.com/ncruces/go-sqlite3/driver"

View File

@ -1,10 +1,13 @@
package ch
//go:build !gokrazy
package storage
import (
"errors"
"fmt"
"math/bits"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
"gonum.org/v1/gonum/spatial/vptree"
)
@ -13,14 +16,14 @@ type VPTree struct {
aTree *vptree.Tree
dTree *vptree.Tree
pTree *vptree.Tree
ids map[ID]*[]ID
ids map[ch.ID]*[]ch.ID
aHashes []vptree.Comparable // temporary, only used for vptree creation
dHashes []vptree.Comparable // temporary, only used for vptree creation
pHashes []vptree.Comparable // temporary, only used for vptree creation
}
type VPHash struct {
SavedHash
ch.SavedHash
}
func (h *VPHash) Distance(c vptree.Comparable) float64 {
@ -31,22 +34,22 @@ func (h *VPHash) Distance(c vptree.Comparable) float64 {
return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash))
}
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
func (v *VPTree) GetMatches(hashes []ch.Hash, max int, exactOnly bool) ([]ch.Result, error) {
var (
matches []Result
exactMatches []Result
tl timeLog
matches []ch.Result
exactMatches []ch.Result
tl ch.TimeLog
)
tl.resetTime()
defer tl.logTime("Search Complete")
tl.ResetTime()
defer tl.LogTime("Search Complete")
for _, hash := range hashes {
results := vptree.NewDistKeeper(float64(max))
currentTree := v.getCurrentTree(hash.Kind)
currentTree.NearestSet(results, &VPHash{SavedHash{Hash: hash}})
currentTree.NearestSet(results, &VPHash{ch.SavedHash{Hash: hash}})
mappedIds := map[*[]ID]bool{}
mappedIds := map[*[]ch.ID]bool{}
for _, result := range results.Heap {
storedHash := result.Comparable.(*VPHash)
ids := v.ids[storedHash.ID]
@ -55,16 +58,18 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
}
mappedIds[ids] = true
if result.Dist == 0 {
exactMatches = append(exactMatches, Result{
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: storedHash.Hash,
exactMatches = append(exactMatches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID],
})
} else {
matches = append(matches, Result{
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: storedHash.Hash,
matches = append(matches, ch.Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID],
})
}
}
@ -89,11 +94,14 @@ func (v *VPTree) getCurrentTree(kind goimagehash.Kind) *vptree.Tree {
panic("Unknown hash type: " + kind.String())
}
func (v *VPTree) MapHashes(ImageHash) {
func (v *VPTree) MapHashes(ch.ImageHash) {
panic("Not Implemented")
}
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
func (v *VPTree) DecodeHashes(hashes *ch.SavedHashes) error {
if hashes == nil {
return nil
}
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
@ -113,13 +121,13 @@ func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
v.pHashes = append(v.pHashes, &VPHash{savedHash})
}
if savedHash.ID == (ID{}) {
if savedHash.ID == (ch.ID{}) {
fmt.Println("Empty ID detected")
panic(savedHash)
}
// All known equal IDs are already mapped we can add any missing ones from hashes
if _, ok := v.ids[savedHash.ID]; !ok {
v.ids[savedHash.ID] = &[]ID{savedHash.ID}
v.ids[savedHash.ID] = &[]ch.ID{savedHash.ID}
}
}
@ -137,23 +145,23 @@ func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
}
return nil
}
func (v *VPTree) EncodeHashes() (SavedHashes, error) {
return SavedHashes{}, errors.New("Not Implemented")
func (v *VPTree) EncodeHashes() (*ch.SavedHashes, error) {
return &ch.SavedHashes{}, errors.New("Not Implemented")
}
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
func (v *VPTree) AssociateIDs(newIDs []ch.NewIDs) error {
return errors.New("Not Implemented")
}
func (v *VPTree) GetIDs(id ID) IDList {
func (v *VPTree) GetIDs(id ch.ID) ch.IDList {
ids, found := v.ids[id]
if !found {
return nil
}
return ToIDList(*ids)
return ch.ToIDList(*ids)
}
func NewVPStorage() (HashStorage, error) {
func NewVPStorage() (ch.HashStorage, error) {
var err error
v := &VPTree{
aHashes: []vptree.Comparable{},

View File

@ -0,0 +1,13 @@
//go:build gokrazy
package storage
import (
"errors"
ch "gitea.narnian.us/lordwelch/comic-hasher"
)
func NewVPStorage() (ch.HashStorage, error) {
return nil, errors.New("VPTree not available")
}

View File

@ -5,17 +5,17 @@ import (
"time"
)
type timeLog struct {
type TimeLog struct {
total time.Duration
last time.Time
}
func (t *timeLog) resetTime() {
func (t *TimeLog) ResetTime() {
t.total = 0
t.last = time.Now()
}
func (t *timeLog) logTime(log string) {
func (t *TimeLog) LogTime(log string) {
now := time.Now()
diff := now.Sub(t.last)
t.last = now