Compare commits

...

7 Commits

Author SHA1 Message Date
Timmy Welch
3e364b1858 Filter out non-json files 2025-04-24 12:54:40 -07:00
Timmy Welch
0da76f7fb5 Fix inserting downloaded pages 2025-04-24 12:53:55 -07:00
Timmy Welch
d7c42f5c1d Fix issues with loading saved hashes 2025-04-24 12:46:04 -07:00
Timmy Welch
374c46bc48 Fix url handling order 2025-04-24 12:46:04 -07:00
Timmy Welch
aca658e32d Typo 2025-04-24 12:46:04 -07:00
Timmy Welch
acd71df302 Add the issue number to the downloaded CV pages
Update existing pages 10 at a time if they are missing data
Simplify checking for previously downloaded pages
2025-04-24 12:46:04 -07:00
Timmy Welch
0fd431f6f7 Update result type 2025-04-19 15:41:04 -07:00
10 changed files with 572 additions and 589 deletions

View File

@ -35,7 +35,12 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
continue
}
mappedIds[ids] = true
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
matchingHashes = append(matchingHashes, Result{
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: distance,
EquivalentIDs: *b.ids[storedHash.ID],
})
}
}
return matchingHashes
@ -56,9 +61,10 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*b.ids[storedHash.ID]),
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *b.ids[storedHash.ID],
})
}
}
@ -155,7 +161,10 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
}
// DecodeHashes must already have a lock
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
if hashes == nil {
return nil
}
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
// Initialize all the known equal IDs
@ -215,7 +224,7 @@ func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
}
// EncodeHashes should already have a lock
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
savedHashes := SavedHashes{
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
}
@ -230,7 +239,7 @@ func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
}
}
return savedHashes, nil
return &savedHashes, nil
}
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {

View File

@ -3,10 +3,8 @@ package main
import (
"bufio"
"bytes"
"cmp"
"compress/gzip"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
@ -15,17 +13,13 @@ import (
_ "image/jpeg"
_ "image/png"
"io"
"io/fs"
"log"
"net/http"
_ "net/http/pprof"
"net/url"
"os"
"path/filepath"
"runtime/debug"
"runtime/pprof"
"slices"
"strconv"
"strings"
"sync"
"time"
@ -40,24 +34,8 @@ import (
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/comic-hasher/cv"
"gitea.narnian.us/lordwelch/goimagehash"
)
type Server struct {
httpServer *http.Server
mux *CHMux
BaseURL *url.URL
hashes ch.HashStorage
Context context.Context
cancel func()
signalQueue chan os.Signal
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash
onlyHashNewIDs bool
version string
}
var bufPool = &sync.Pool{
New: func() any {
// The Pool's New function should generally only return pointer
@ -219,411 +197,6 @@ func main() {
startServer(opts)
}
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
return strings.TrimSpace("lordwelch"), true
}
func (s *Server) setupAppHandlers() {
// s.mux.HandleFunc("/get_cover", s.getCover)
s.mux.HandleFunc("/add_cover", s.addCover)
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
s.mux.HandleFunc("/associate_ids", s.associateIDs)
}
func (s *Server) getCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
http.Error(w, "No ID Provided", http.StatusBadRequest)
return
}
if domain == "" {
log.Println("No domain Provided")
http.Error(w, "No domain Provided", http.StatusBadRequest)
return
}
// if index, ok := s.IDToCover[domain+":"+ID]; ok {
// covers, err := json.Marshal(s.covers[index])
// if err == nil {
// w.Header().Add("Content-Type", "application/json")
// w.Write(covers)
// return
// }
// }
fmt.Fprintln(w, "Not implemented")
}
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
newDomain = strings.TrimSpace(values.Get("newDomain"))
newID = strings.TrimSpace(values.Get("newID"))
)
if ID == "" {
msg := "No ID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if domain == "" {
msg := "No domain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newID == "" {
msg := "No newID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == "" {
msg := "No newDomain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == domain {
msg := "newDomain cannot be the same as the existing domain"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
// msg := "No IDs belonging to " + domain + "exist on this server"
// log.Println(msg)
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
// }
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
found := false
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
// for i, idlist := range hash {
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
// found = true
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
// }
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
// }
// }
// }
if found {
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
} else {
writeJson(w, http.StatusOK, result{Msg: "Old ID not found"})
}
}
type SimpleResult struct {
Distance int
IDList ch.IDList
}
func getSimpleResults(fullResults []ch.Result) []SimpleResult {
simpleResult := make([]SimpleResult, 0, len(fullResults))
slices.SortFunc(fullResults, func(a, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
})
// Deduplicate IDs
idToDistance := make(map[ch.ID]int)
for _, fullResult := range fullResults {
for domain, idlist := range fullResult.IDs {
for _, idStr := range idlist {
id := ch.ID{
Domain: domain,
ID: idStr,
}
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
idToDistance[id] = fullResult.Distance
}
}
}
}
// Group by distance
distanceMap := make(map[int]SimpleResult)
for id, distance := range idToDistance {
var (
sr SimpleResult
ok bool
)
if sr, ok = distanceMap[distance]; !ok {
sr.IDList = make(ch.IDList)
}
sr.Distance = distance
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
distanceMap[distance] = sr
}
// turn into array
for _, sr := range distanceMap {
simpleResult = append(simpleResult, sr)
}
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
return cmp.Compare(a.Distance, b.Distance)
})
return simpleResult
}
type result struct {
Results any `json:"results,omitempty"`
Msg string `json:"msg,omitempty"`
}
func writeJson(w http.ResponseWriter, status int, res result) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("X-Content-Type-Options", "nosniff")
var (
bytes []byte
err error
)
if bytes, err = json.Marshal(res); err != nil {
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
}
w.WriteHeader(status)
_, _ = w.Write(bytes)
_, _ = w.Write([]byte("\n"))
}
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
hashes []ch.Hash
)
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
log.Printf("could not parse ahash: %s", ahashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if ahash > 0 {
hashes = append(hashes, ch.Hash{ahash, goimagehash.AHash})
}
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
log.Printf("could not parse dhash: %s", dhashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if dhash > 0 {
hashes = append(hashes, ch.Hash{dhash, goimagehash.DHash})
}
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
log.Printf("could not parse phash: %s", phashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if phash > 0 {
hashes = append(hashes, ch.Hash{phash, goimagehash.PHash})
}
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
log.Printf("Invalid Max: %s", maxStr)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
return
}
if maxStr != "" {
max = max_tmp
}
if max > 8 {
log.Printf("Max must be less than 9: %d", max)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
return
}
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
})
log.Println(err)
if len(matches) > 0 {
var msg string = ""
if err != nil {
msg = err.Error()
}
if simple {
writeJson(w, http.StatusOK, result{
Results: getSimpleResults(matches),
Msg: msg,
})
return
}
writeJson(w, http.StatusOK, result{
Results: matches,
Msg: msg,
})
return
}
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
}
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
w.WriteHeader(http.StatusNotImplemented)
return
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
return
}
if domain == "" {
log.Println("No domain Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
return
}
i, format, err := image.Decode(r.Body)
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Decoded %s image from %s", format, user)
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
default:
}
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}}
writeJson(w, http.StatusOK, result{Msg: "Success"})
}
func (s *Server) mapper(done func()) {
defer done()
for hash := range s.mappingQueue {
s.hashes.MapHashes(hash)
}
}
func (s *Server) hasher(workerID int, done func(int)) {
defer done(workerID)
for image := range s.hashingQueue {
start := time.Now()
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
log.Printf("Skipping existing hash with ID: %s found", image.ID)
continue
}
hash := ch.HashImage(image)
if hash.ID.Domain == "" || hash.ID.ID == "" {
continue
}
select {
// TODO: Check channel pipelines
case s.mappingQueue <- hash:
default:
}
elapsed := time.Since(start)
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
}
}
func (s *Server) reader(workerID int, done func(i int)) {
defer done(workerID)
for path := range s.readerQueue {
id := ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))}
if len(s.hashes.GetIDs(id)) > 0 {
continue
}
file, err := os.Open(path)
if err != nil {
panic(err)
}
i, format, err := image.Decode(bufio.NewReader(file))
file.Close()
if err != nil {
continue // skip this image
}
im := ch.Im{
Im: i,
Format: format,
ID: id,
NewOnly: s.onlyHashNewIDs,
}
select {
case s.hashingQueue <- im:
default:
}
}
}
func (s *Server) HashLocalImages(opts Opts) {
if opts.coverPath == "" {
return
}
go func() {
log.Println("Hashing covers at ", opts.coverPath)
start := time.Now()
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
err = s.httpServer.Shutdown(context.TODO())
return fmt.Errorf("Recieved quit: %w", err)
default:
}
if d.IsDir() {
return nil
}
s.readerQueue <- path
return nil
})
elapsed := time.Since(start)
log.Println("Err:", err, "local hashing took", elapsed)
}()
}
func signalHandler(s *Server) {
select {
case sig := <-s.signalQueue:
@ -698,13 +271,17 @@ func loadHashes(opts Opts) *ch.SavedHashes {
}
break
}
if errors.Is(err, ch.NoHashes) {
log.Println("No saved hashes to load")
return loadedHashes
}
if err != nil {
panic(fmt.Sprintf("Failed to decode hashes: %s", err))
}
fmt.Printf("Loaded %s hashes\n", format)
return loadedHashes
}
func saveHashes(opts Opts, hashes ch.SavedHashes) error {
func saveHashes(opts Opts, hashes *ch.SavedHashes) error {
if opts.loadEmbeddedHashes && !opts.saveEmbeddedHashes {
return errors.New("refusing to save embedded hashes")
}
@ -787,15 +364,6 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
}
}
type CHMux struct {
version string
*http.ServeMux
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
CHM.ServeMux.ServeHTTP(w, r)
}
func startServer(opts Opts) {
imaging.SetMaxProcs(2)
if opts.cpuprofile != "" {
@ -860,7 +428,7 @@ func startServer(opts Opts) {
// DecodeHashes would normally need a write lock
// nothing else has been started yet so we don't need one
if err := server.hashes.DecodeHashes(*loadHashes(opts)); err != nil {
if err := server.hashes.DecodeHashes(loadHashes(opts)); err != nil {
panic(err)
}

362
cmd/comic-hasher/server.go Normal file
View File

@ -0,0 +1,362 @@
package main
import (
"bufio"
"cmp"
"context"
"encoding/json"
"fmt"
"image"
"io/fs"
"log"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"time"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
"golang.org/x/exp/slices"
)
type Server struct {
httpServer *http.Server
mux *CHMux
BaseURL *url.URL
hashes ch.HashStorage
Context context.Context
cancel func()
signalQueue chan os.Signal
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash
onlyHashNewIDs bool
version string
}
type CHMux struct {
version string
*http.ServeMux
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
CHM.ServeMux.ServeHTTP(w, r)
}
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
return strings.TrimSpace("lordwelch"), true
}
func (s *Server) setupAppHandlers() {
s.mux.HandleFunc("/add_cover", s.addCover)
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
s.mux.HandleFunc("/associate_ids", s.associateIDs)
}
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("domain"))))
ID = strings.ToLower(strings.TrimSpace(values.Get("id")))
newDomain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("newDomain"))))
newID = strings.ToLower(strings.TrimSpace(values.Get("newID")))
)
if ID == "" {
msg := "No ID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if domain == "" {
msg := "No domain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newID == "" {
msg := "No newID Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == "" {
msg := "No newDomain Provided"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
if newDomain == domain {
msg := "newDomain cannot be the same as the existing domain"
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
err := s.hashes.AssociateIDs([]ch.NewIDs{{
OldID: ch.ID{
Domain: domain,
ID: ID,
},
NewID: ch.ID{
Domain: newDomain,
ID: newID,
},
}})
if err == nil {
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
} else {
writeJson(w, http.StatusOK, result{Msg: err.Error()})
}
}
type result struct {
Results []ch.Result `json:"results,omitempty"`
Msg string `json:"msg,omitempty"`
}
func writeJson(w http.ResponseWriter, status int, res result) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.Header().Set("X-Content-Type-Options", "nosniff")
var (
bytes []byte
err error
)
if bytes, err = json.Marshal(res); err != nil {
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
}
w.WriteHeader(status)
_, _ = w.Write(bytes)
_, _ = w.Write([]byte("\n"))
}
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
hashes []ch.Hash
)
if simple {
writeJson(w, http.StatusBadRequest, result{Msg: "Simple results are no longer Supported"})
return
}
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
log.Printf("could not parse ahash: %s", ahashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if ahash > 0 {
hashes = append(hashes, ch.Hash{Hash: ahash, Kind: goimagehash.AHash})
}
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
log.Printf("could not parse dhash: %s", dhashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if dhash > 0 {
hashes = append(hashes, ch.Hash{Hash: dhash, Kind: goimagehash.DHash})
}
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
log.Printf("could not parse phash: %s", phashStr)
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
return
}
if phash > 0 {
hashes = append(hashes, ch.Hash{Hash: phash, Kind: goimagehash.PHash})
}
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
log.Printf("Invalid Max: %s", maxStr)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
return
}
if maxStr != "" {
max = max_tmp
}
if max > 8 {
log.Printf("Max must be less than 9: %d", max)
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
return
}
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
})
log.Println(err)
if len(matches) > 0 {
var msg string = ""
if err != nil {
msg = err.Error()
}
writeJson(w, http.StatusOK, result{
Results: matches,
Msg: msg,
})
return
}
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
}
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
if true {
w.WriteHeader(http.StatusNotImplemented)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
return
}
if domain == "" {
log.Println("No domain Provided")
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
return
}
i, format, err := image.Decode(r.Body)
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
writeJson(w, http.StatusBadRequest, result{Msg: msg})
return
}
log.Printf("Decoded %s image from %s", format, user)
select {
case <-s.Context.Done():
log.Println("Recieved quit")
return
default:
}
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}}
writeJson(w, http.StatusOK, result{Msg: "Success"})
}
func (s *Server) mapper(done func()) {
defer done()
for hash := range s.mappingQueue {
s.hashes.MapHashes(hash)
}
}
func (s *Server) hasher(workerID int, done func(int)) {
defer done(workerID)
for image := range s.hashingQueue {
start := time.Now()
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
log.Printf("Skipping existing hash with ID: %s found", image.ID)
continue
}
hash := ch.HashImage(image)
if hash.ID.Domain == "" || hash.ID.ID == "" {
continue
}
select {
// TODO: Check channel pipelines
case s.mappingQueue <- hash:
default:
}
elapsed := time.Since(start)
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
}
}
func (s *Server) reader(workerID int, done func(i int)) {
defer done(workerID)
for path := range s.readerQueue {
id := ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))}
if len(s.hashes.GetIDs(id)) > 0 {
continue
}
file, err := os.Open(path)
if err != nil {
panic(err)
}
i, format, err := image.Decode(bufio.NewReader(file))
file.Close()
if err != nil {
continue // skip this image
}
im := ch.Im{
Im: i,
Format: format,
ID: id,
NewOnly: s.onlyHashNewIDs,
}
select {
case s.hashingQueue <- im:
default:
}
}
}
func (s *Server) HashLocalImages(opts Opts) {
if opts.coverPath == "" {
return
}
go func() {
log.Println("Hashing covers at ", opts.coverPath)
start := time.Now()
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
select {
case <-s.Context.Done():
log.Println("Recieved quit")
err = s.httpServer.Shutdown(context.TODO())
return fmt.Errorf("Recieved quit: %w", err)
default:
}
if d.IsDir() {
return nil
}
s.readerQueue <- path
return nil
})
elapsed := time.Since(start)
log.Println("Err:", err, "local hashing took", elapsed)
}()
}

211
cv/cv.go
View File

@ -34,8 +34,9 @@ type Download struct {
}
type Issue struct {
ID int `json:"id"`
Image struct {
ID int `json:"id"`
IssueNumber string `json:"issue_number"`
Image struct {
IconURL string `json:"icon_url,omitempty"`
MediumURL string `json:"medium_url,omitempty"`
ScreenURL string `json:"screen_url,omitempty"`
@ -86,8 +87,12 @@ type CVDownloader struct {
}
var (
ErrQuit = errors.New("Quit")
ErrInvalidPage = errors.New("Invalid ComicVine Page")
ErrQuit = errors.New("quit")
ErrInvalidPage = errors.New("invalid ComicVine page")
ErrInvalidIndex = errors.New("invalid page index")
ErrDownloadFail = errors.New("download failed")
ErrMissingPage = errors.New("page missing")
ErrUpdateNeeded = errors.New("update needed")
)
func (c *CVDownloader) readJson() ([]*CVResult, error) {
@ -145,9 +150,57 @@ func getOffset(name string) int {
return i
}
func (c *CVDownloader) findDownloadedPage(offset int) int {
index := offset / 100
if index < len(c.fileList) && getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
return index
}
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
return index
}
return -1
}
func (c *CVDownloader) getDownloadedIssues(offset int, update bool) (*CVResult, error) {
index := c.findDownloadedPage(offset)
if index < 0 {
return nil, ErrMissingPage
}
issue, err := c.loadIssues(c.fileList[index])
if err != nil || issue == nil {
err = fmt.Errorf("Failed to read page at offset %d: %w", offset, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
return nil, err
}
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
if update && (len(issue.Results) == 0 || issue.Results[0].IssueNumber == "") {
err = fmt.Errorf("Deleting page %d to update records from cv", offset)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
return nil, err
}
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
err = fmt.Errorf("Wrong index: expected %d got %d", len(c.fileList), index)
return nil, err
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
}
return issue, nil
}
// updateIssues c.downloadQueue must not be closed before this function has returned
func (c *CVDownloader) updateIssues() int {
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,image,volume")
func (c *CVDownloader) updateIssues() (int, error) {
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,issue_number,image,volume")
if err != nil {
log.Fatal(err)
}
@ -182,84 +235,44 @@ func (c *CVDownloader) updateIssues() int {
offset -= 100
return failCount < 15
}
updated := 0
for offset = 0; offset <= c.totalResults; offset += 100 {
index := offset / 100
if c.hasQuit() {
return offset - 100
return offset - 100, ErrQuit
}
if index < len(c.fileList) {
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
prev = -1
failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
} else {
select {
case <-c.Context.Done():
case c.downloadQueue <- issue:
}
continue
}
} else {
log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
}
} else {
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[index]))
issue, err := c.getDownloadedIssues(offset, updated < 9)
if err == nil && issue != nil {
prev = -1
failCount = 0
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return offset - 100, ErrQuit
case c.downloadQueue <- issue:
}
continue
}
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
prev = -1
failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
select {
case <-c.Context.Done():
case c.downloadQueue <- issue:
}
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
if index != len(c.fileList)-1 {
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
return offset - 100
}
log.Println("Deleting the last page to detect new comics")
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, index+1)
} else {
continue
}
} else {
log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, (index)+1)
}
if errors.Is(err, ErrInvalidIndex) {
return offset - 100, err
}
if err != nil && !errors.Is(err, ErrMissingPage) {
log.Println(err)
}
log.Println("Starting download at offset", offset)
issue := &CVResult{}
issue = &CVResult{}
URI := (*base_url)
query = base_url.Query()
query.Add("offset", strconv.Itoa(offset))
URI.RawQuery = query.Encode()
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
return offset - 100
case <-time.After(10 * time.Second):
case <-c.Context.Done(): // Allows us to return immediately even during a timeout
return offset - 100, ErrQuit
case <-time.After(10 * time.Second): // Enforces a minimum 10s wait between API hits
}
resp, err, cancelDownloadCTX := Get(URI.String())
if err != nil {
cancelDownloadCTX()
@ -267,7 +280,7 @@ func (c *CVDownloader) updateIssues() int {
continue
}
// Fail and let comic-hasher try the whole thing again later
return offset - 100
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
}
if resp.StatusCode != 200 {
cancelDownloadCTX()
@ -275,14 +288,11 @@ func (c *CVDownloader) updateIssues() int {
_ = resp.Body.Close()
continue
}
log.Println("Failed to download this page, we'll wait for an hour to see if it clears up")
select {
case <-c.Context.Done(): // allows us to return immediately even during a timeout
_ = resp.Body.Close()
return offset - 100
case <-time.After(1 * time.Hour):
}
msg, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
return offset - 100, fmt.Errorf("%w: response code: %d Message: %s", ErrDownloadFail, resp.StatusCode, string(msg))
}
file, err := os.Create(filepath.Join(c.JSONPath, "cv-"+strconv.Itoa(offset)+".json"))
if err != nil {
log.Fatal(err)
@ -297,7 +307,7 @@ func (c *CVDownloader) updateIssues() int {
if retry(URI.String(), err) {
continue
}
return offset - 100
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
}
cancelDownloadCTX()
if issue.NumberOfTotalResults > c.totalResults {
@ -305,13 +315,14 @@ func (c *CVDownloader) updateIssues() int {
}
prev = -1
failCount = 0
updated += 1
select {
case c.downloadQueue <- issue:
}
c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset))
c.insertIssuePage(offset)
log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset)
}
return offset
return offset, nil
}
type download struct {
@ -436,12 +447,19 @@ func (c *CVDownloader) downloadImages() {
for list := range c.downloadQueue {
log.Printf("Checking downloads at offset %v\r", list.Offset)
for _, issue := range list.Results {
type i struct {
type image struct {
url string
name string
}
imageURLs := []i{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
imageURLs := []image{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
for _, image := range imageURLs {
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
continue
}
if c.chdb.CheckURL(image.url) {
log.Printf("Skipping known bad url %s", image.url)
continue
}
if strings.HasSuffix(image.url, "6373148-blank.png") {
c.notFound <- download{
url: image.url,
@ -452,14 +470,6 @@ func (c *CVDownloader) downloadImages() {
continue
}
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
continue
}
if c.chdb.CheckURL(image.url) {
log.Printf("Skipping known bad url %s", image.url)
continue
}
uri, err := url.ParseRequestURI(image.url)
if err != nil {
c.notFound <- download{
@ -589,6 +599,15 @@ func (c *CVDownloader) cleanDirs() {
return nil
})
}
func (c *CVDownloader) insertIssuePage(offset int) {
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
return cmp.Compare(getOffset(a), b)
})
if found {
return
}
c.fileList = slices.Insert(c.fileList, index, fmt.Sprintf("cv-%v.json", offset))
}
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids bool, get_id func(id ch.ID) ch.IDList, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
return &CVDownloader{
@ -596,7 +615,7 @@ func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids
JSONPath: filepath.Join(workPath, "_json"),
ImagePath: filepath.Join(workPath, "_image"),
APIKey: APIKey,
bufPool: bufPool, // Only used if keepDownloadedImages is false to save space on byte buffers. The buffers get sent back via finishedDownloadQueue
bufPool: bufPool, // Only used if keepDownloadedImages is false to save memory on byte buffers. The buffers get sent back via finishedDownloadQueue
FinishedDownloadQueue: finishedDownloadQueue,
SendExistingImages: sendExistingImages,
KeepDownloadedImages: keepDownloadedImages,
@ -625,6 +644,11 @@ func DownloadCovers(c *CVDownloader) {
var d *os.File
d, err = os.Open(c.JSONPath)
c.fileList, err = d.Readdirnames(-1)
for i := len(c.fileList) - 1; i >= 0; i-- {
if !strings.Contains(c.fileList[i], "json") {
c.fileList = slices.Delete(c.fileList, i, i+1)
}
}
if err != nil {
panic(fmt.Errorf("Unable to open path for json files: %w", err))
}
@ -645,7 +669,10 @@ func DownloadCovers(c *CVDownloader) {
dwg.Done()
}()
offset := c.updateIssues()
offset, err := c.updateIssues()
if err != nil {
log.Printf("Failed to download CV Covers: %s", err)
}
issueCount := len(c.fileList) * 100
log.Println("Number of issues", issueCount, " expected:", c.totalResults)

4
go.mod
View File

@ -14,8 +14,8 @@ require (
github.com/ncruces/go-sqlite3 v0.23.1
github.com/vmihailenco/msgpack v4.0.4+incompatible
go.etcd.io/bbolt v1.4.0
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
golang.org/x/image v0.24.0
golang.org/x/sys v0.30.0
golang.org/x/text v0.22.0
gonum.org/v1/gonum v0.15.1
modernc.org/sqlite v1.35.0
@ -48,7 +48,7 @@ require (
github.com/therootcompany/xz v1.0.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.36.5 // indirect
modernc.org/libc v1.61.13 // indirect

View File

@ -56,11 +56,11 @@ type ID struct {
}
type Result struct {
IDs IDList
Distance int
Hash Hash
Hash Hash
ID ID
Distance int
EquivalentIDs []ID
}
type Im struct {
Im image.Image
Format string
@ -110,8 +110,8 @@ type NewIDs struct {
type HashStorage interface {
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
MapHashes(ImageHash)
DecodeHashes(hashes SavedHashes) error
EncodeHashes() (SavedHashes, error)
DecodeHashes(hashes *SavedHashes) error
EncodeHashes() (*SavedHashes, error)
AssociateIDs(newIDs []NewIDs) error
GetIDs(id ID) IDList
}

12
map.go
View File

@ -61,9 +61,10 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*m.ids[storedHash.ID]),
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *m.ids[storedHash.ID],
})
}
@ -97,7 +98,10 @@ func (m *MapStorage) MapHashes(hash ImageHash) {
}
}
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
if hashes == nil {
return nil
}
if err := m.basicMapStorage.DecodeHashes(hashes); err != nil {
return err
}

View File

@ -52,6 +52,7 @@ type SavedHash struct {
Hash Hash
ID ID
}
type Encoder func(any) ([]byte, error)
type Decoder func([]byte, interface{}) error
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
@ -111,7 +112,7 @@ func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
@ -137,7 +138,7 @@ func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
}
}
}
fmt.Println("length of hashes", len(t.Hashes))
fmt.Println("Length of hashes", len(t.Hashes))
fmt.Println("Length of ID lists", len(t.IDs))
return &t
}
@ -234,7 +235,7 @@ func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
return nil, NoHashes
}
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
func EncodeHashes(hashes *SavedHashes, format Format) ([]byte, error) {
var encoder Encoder
switch format {
case Msgpack:

View File

@ -26,26 +26,26 @@ type sqliteStorage struct {
idExists *sql.Stmt
}
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) ([]ID, error) {
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID][]ID, error) {
if statement == nil {
statement = s.hashExactMatchStatement
}
hashes := []ID{}
hashes := map[ID][]ID{}
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
if err != nil {
return hashes, err
}
for rows.Next() {
var (
r = ID{}
id ID
foundID ID
)
err = rows.Scan(&r.Domain, &r.ID)
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
if err != nil {
rows.Close()
return hashes, err
}
hashes = append(hashes, r)
hashes[foundID] = append(hashes[foundID], id)
}
rows.Close()
return hashes, nil
@ -61,28 +61,31 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
return hashResults, err
}
results := map[Hash][]ID{}
results := map[SavedHash][]ID{}
for rows.Next() {
var (
tmpHash int64
sqlHash = Hash{Kind: hash.Kind}
id ID
sqlHash = SavedHash{
Hash: Hash{Kind: hash.Kind},
}
id ID
)
err = rows.Scan(&tmpHash, &id.Domain, &id.ID)
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
if err != nil {
rows.Close()
return hashResults, err
}
sqlHash.Hash = uint64(tmpHash)
sqlHash.Hash.Hash = uint64(tmpHash)
results[sqlHash] = append(results[sqlHash], id)
}
for sqlHash, ids := range results {
res := Result{
Hash: sqlHash,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash),
Hash: sqlHash.Hash,
ID: sqlHash.ID,
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
EquivalentIDs: ids,
}
if res.Distance <= max {
res.IDs = ToIDList(ids)
hashResults = append(hashResults, res)
}
}
@ -144,10 +147,14 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
if err != nil {
return foundMatches, err
}
foundMatches = append(foundMatches, Result{
IDs: ToIDList(idlist),
Hash: hash,
})
for id, equivalentIDs := range idlist {
foundMatches = append(foundMatches, Result{
Hash: hash,
ID: id,
Distance: 0,
EquivalentIDs: equivalentIDs,
})
}
}
tl.logTime("Search Exact")
@ -239,7 +246,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
}
}
func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
return nil
err := s.dropIndexes()
if err != nil {
@ -295,16 +302,16 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
return nil
}
func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
hashes := SavedHashes{}
tx, err := s.db.Begin()
if err != nil {
return hashes, err
return &hashes, err
}
rows, err := tx.Query("SELECT Hashes.kind, Hashes.hash, IDs.domain, IDs.stringid FROM Hashes JOIN IDs ON Hashes.id=IDs.id ORDER BY Hashes.kind, Hashes.hash;")
if err != nil {
return hashes, err
return &hashes, err
}
for rows.Next() {
var (
@ -313,14 +320,14 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
)
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
if err != nil {
return hashes, err
return &hashes, err
}
hash.Hash.Hash = uint64(tmpHash)
hashes.InsertHash(hash)
}
rows, err = tx.Query("SELECT IEIDs.equivalentid, IDs.domain, IDs.stringid FROM IDs JOIN IDsToEquivalantIDs AS IEIDs ON IDs.id=IEIDs.idid ORDER BY IEIDs.equivalentid, IDs.domain, IDs.stringid;")
if err != nil {
return hashes, err
return &hashes, err
}
var (
previousEid int64 = -1
@ -333,7 +340,7 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
)
err = rows.Scan(&newEid, &id.Domain, &id.Domain)
if err != nil {
return hashes, err
return &hashes, err
}
if newEid != previousEid {
previousEid = newEid
@ -345,7 +352,7 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
}
ids = append(ids, id)
}
return hashes, nil
return &hashes, nil
}
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
@ -430,7 +437,7 @@ func (s *sqliteStorage) PrepareStatements() error {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
s.hashExactMatchStatement, err = s.db.Prepare(`
select IDs.domain, IDs.stringid from IDs
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select QEIDs.id as id from EquivalentIDs as QEIDs
@ -444,7 +451,7 @@ func (s *sqliteStorage) PrepareStatements() error {
return fmt.Errorf("failed to prepare database statements: %w", err)
}
s.hashPartialMatchStatement, err = s.db.Prepare(`
select EIDs.hash, IDs.domain, IDs.stringid from IDs
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
join (
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs

View File

@ -56,15 +56,17 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
mappedIds[ids] = true
if result.Dist == 0 {
exactMatches = append(exactMatches, Result{
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: storedHash.Hash,
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID],
})
} else {
matches = append(matches, Result{
IDs: ToIDList(*v.ids[storedHash.ID]),
Distance: int(result.Dist),
Hash: storedHash.Hash,
Hash: storedHash.Hash,
ID: storedHash.ID,
Distance: 0,
EquivalentIDs: *v.ids[storedHash.ID],
})
}
}
@ -93,7 +95,10 @@ func (v *VPTree) MapHashes(ImageHash) {
panic("Not Implemented")
}
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
if hashes == nil {
return nil
}
// Initialize all the known equal IDs
for _, ids := range hashes.IDs {
@ -137,8 +142,8 @@ func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
}
return nil
}
func (v *VPTree) EncodeHashes() (SavedHashes, error) {
return SavedHashes{}, errors.New("Not Implemented")
func (v *VPTree) EncodeHashes() (*SavedHashes, error) {
return &SavedHashes{}, errors.New("Not Implemented")
}
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {