Compare commits
7 Commits
9322f754bf
...
3e364b1858
Author | SHA1 | Date | |
---|---|---|---|
|
3e364b1858 | ||
|
0da76f7fb5 | ||
|
d7c42f5c1d | ||
|
374c46bc48 | ||
|
aca658e32d | ||
|
acd71df302 | ||
|
0fd431f6f7 |
23
BasicMap.go
23
BasicMap.go
@ -35,7 +35,12 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
|
||||
continue
|
||||
}
|
||||
mappedIds[ids] = true
|
||||
matchingHashes = append(matchingHashes, Result{ToIDList(*b.ids[storedHash.ID]), distance, storedHash.Hash})
|
||||
matchingHashes = append(matchingHashes, Result{
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: distance,
|
||||
EquivalentIDs: *b.ids[storedHash.ID],
|
||||
})
|
||||
}
|
||||
}
|
||||
return matchingHashes
|
||||
@ -56,9 +61,10 @@ func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
|
||||
mappedIds[ids] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: storedHash.Hash,
|
||||
IDs: ToIDList(*b.ids[storedHash.ID]),
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
EquivalentIDs: *b.ids[storedHash.ID],
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -155,7 +161,10 @@ func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||
}
|
||||
|
||||
// DecodeHashes must already have a lock
|
||||
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
func (b *basicMapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
b.ids = make(map[ID]*[]ID, len(hashes.Hashes))
|
||||
|
||||
// Initialize all the known equal IDs
|
||||
@ -215,7 +224,7 @@ func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
}
|
||||
|
||||
// EncodeHashes should already have a lock
|
||||
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
||||
func (b *basicMapStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
savedHashes := SavedHashes{
|
||||
Hashes: make([]SavedHash, 0, len(b.aHashes)+len(b.dHashes)+len(b.pHashes)),
|
||||
}
|
||||
@ -230,7 +239,7 @@ func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
||||
}
|
||||
}
|
||||
|
||||
return savedHashes, nil
|
||||
return &savedHashes, nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) error {
|
||||
|
@ -3,10 +3,8 @@ package main
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"cmp"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
@ -15,17 +13,13 @@ import (
|
||||
_ "image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log"
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"runtime/pprof"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -40,24 +34,8 @@ import (
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/comic-hasher/cv"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
httpServer *http.Server
|
||||
mux *CHMux
|
||||
BaseURL *url.URL
|
||||
hashes ch.HashStorage
|
||||
Context context.Context
|
||||
cancel func()
|
||||
signalQueue chan os.Signal
|
||||
readerQueue chan string
|
||||
hashingQueue chan ch.Im
|
||||
mappingQueue chan ch.ImageHash
|
||||
onlyHashNewIDs bool
|
||||
version string
|
||||
}
|
||||
|
||||
var bufPool = &sync.Pool{
|
||||
New: func() any {
|
||||
// The Pool's New function should generally only return pointer
|
||||
@ -219,411 +197,6 @@ func main() {
|
||||
startServer(opts)
|
||||
}
|
||||
|
||||
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
|
||||
return strings.TrimSpace("lordwelch"), true
|
||||
}
|
||||
|
||||
func (s *Server) setupAppHandlers() {
|
||||
// s.mux.HandleFunc("/get_cover", s.getCover)
|
||||
s.mux.HandleFunc("/add_cover", s.addCover)
|
||||
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
|
||||
s.mux.HandleFunc("/associate_ids", s.associateIDs)
|
||||
}
|
||||
|
||||
func (s *Server) getCover(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
domain = strings.TrimSpace(values.Get("domain"))
|
||||
ID = strings.TrimSpace(values.Get("id"))
|
||||
)
|
||||
if ID == "" {
|
||||
log.Println("No ID Provided")
|
||||
http.Error(w, "No ID Provided", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if domain == "" {
|
||||
log.Println("No domain Provided")
|
||||
http.Error(w, "No domain Provided", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
// if index, ok := s.IDToCover[domain+":"+ID]; ok {
|
||||
// covers, err := json.Marshal(s.covers[index])
|
||||
// if err == nil {
|
||||
// w.Header().Add("Content-Type", "application/json")
|
||||
// w.Write(covers)
|
||||
// return
|
||||
// }
|
||||
// }
|
||||
fmt.Fprintln(w, "Not implemented")
|
||||
}
|
||||
|
||||
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
domain = strings.TrimSpace(values.Get("domain"))
|
||||
ID = strings.TrimSpace(values.Get("id"))
|
||||
newDomain = strings.TrimSpace(values.Get("newDomain"))
|
||||
newID = strings.TrimSpace(values.Get("newID"))
|
||||
)
|
||||
if ID == "" {
|
||||
msg := "No ID Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if domain == "" {
|
||||
msg := "No domain Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newID == "" {
|
||||
msg := "No newID Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newDomain == "" {
|
||||
msg := "No newDomain Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newDomain == domain {
|
||||
msg := "newDomain cannot be the same as the existing domain"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
|
||||
// msg := "No IDs belonging to " + domain + "exist on this server"
|
||||
// log.Println(msg)
|
||||
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
// }
|
||||
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
|
||||
found := false
|
||||
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
|
||||
// for i, idlist := range hash {
|
||||
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
|
||||
// found = true
|
||||
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
|
||||
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
|
||||
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
|
||||
// }
|
||||
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
if found {
|
||||
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
|
||||
} else {
|
||||
writeJson(w, http.StatusOK, result{Msg: "Old ID not found"})
|
||||
}
|
||||
}
|
||||
|
||||
type SimpleResult struct {
|
||||
Distance int
|
||||
IDList ch.IDList
|
||||
}
|
||||
|
||||
func getSimpleResults(fullResults []ch.Result) []SimpleResult {
|
||||
simpleResult := make([]SimpleResult, 0, len(fullResults))
|
||||
|
||||
slices.SortFunc(fullResults, func(a, b ch.Result) int {
|
||||
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
|
||||
})
|
||||
|
||||
// Deduplicate IDs
|
||||
idToDistance := make(map[ch.ID]int)
|
||||
|
||||
for _, fullResult := range fullResults {
|
||||
for domain, idlist := range fullResult.IDs {
|
||||
for _, idStr := range idlist {
|
||||
id := ch.ID{
|
||||
Domain: domain,
|
||||
ID: idStr,
|
||||
}
|
||||
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
|
||||
idToDistance[id] = fullResult.Distance
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Group by distance
|
||||
distanceMap := make(map[int]SimpleResult)
|
||||
for id, distance := range idToDistance {
|
||||
var (
|
||||
sr SimpleResult
|
||||
ok bool
|
||||
)
|
||||
if sr, ok = distanceMap[distance]; !ok {
|
||||
sr.IDList = make(ch.IDList)
|
||||
}
|
||||
sr.Distance = distance
|
||||
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
|
||||
distanceMap[distance] = sr
|
||||
}
|
||||
|
||||
// turn into array
|
||||
for _, sr := range distanceMap {
|
||||
simpleResult = append(simpleResult, sr)
|
||||
}
|
||||
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
})
|
||||
return simpleResult
|
||||
}
|
||||
|
||||
type result struct {
|
||||
Results any `json:"results,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
func writeJson(w http.ResponseWriter, status int, res result) {
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
var (
|
||||
bytes []byte
|
||||
err error
|
||||
)
|
||||
if bytes, err = json.Marshal(res); err != nil {
|
||||
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
|
||||
}
|
||||
w.WriteHeader(status)
|
||||
_, _ = w.Write(bytes)
|
||||
_, _ = w.Write([]byte("\n"))
|
||||
}
|
||||
|
||||
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
ahashStr = strings.TrimSpace(values.Get("ahash"))
|
||||
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
||||
phashStr = strings.TrimSpace(values.Get("phash"))
|
||||
maxStr = strings.TrimSpace(values.Get("max"))
|
||||
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
|
||||
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
||||
ahash uint64
|
||||
dhash uint64
|
||||
phash uint64
|
||||
max int = 8
|
||||
max_tmp int
|
||||
err error
|
||||
hashes []ch.Hash
|
||||
)
|
||||
|
||||
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
|
||||
log.Printf("could not parse ahash: %s", ahashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if ahash > 0 {
|
||||
hashes = append(hashes, ch.Hash{ahash, goimagehash.AHash})
|
||||
}
|
||||
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
|
||||
log.Printf("could not parse dhash: %s", dhashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if dhash > 0 {
|
||||
hashes = append(hashes, ch.Hash{dhash, goimagehash.DHash})
|
||||
}
|
||||
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
|
||||
log.Printf("could not parse phash: %s", phashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if phash > 0 {
|
||||
hashes = append(hashes, ch.Hash{phash, goimagehash.PHash})
|
||||
}
|
||||
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
|
||||
log.Printf("Invalid Max: %s", maxStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
|
||||
return
|
||||
}
|
||||
if maxStr != "" {
|
||||
max = max_tmp
|
||||
}
|
||||
|
||||
if max > 8 {
|
||||
log.Printf("Max must be less than 9: %d", max)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
|
||||
return
|
||||
}
|
||||
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
|
||||
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
})
|
||||
log.Println(err)
|
||||
if len(matches) > 0 {
|
||||
var msg string = ""
|
||||
if err != nil {
|
||||
msg = err.Error()
|
||||
}
|
||||
if simple {
|
||||
writeJson(w, http.StatusOK, result{
|
||||
Results: getSimpleResults(matches),
|
||||
Msg: msg,
|
||||
})
|
||||
return
|
||||
}
|
||||
writeJson(w, http.StatusOK, result{
|
||||
Results: matches,
|
||||
Msg: msg,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
|
||||
}
|
||||
|
||||
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotImplemented)
|
||||
return
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
domain = strings.TrimSpace(values.Get("domain"))
|
||||
ID = strings.TrimSpace(values.Get("id"))
|
||||
)
|
||||
if ID == "" {
|
||||
log.Println("No ID Provided")
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
|
||||
return
|
||||
}
|
||||
if domain == "" {
|
||||
log.Println("No domain Provided")
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
|
||||
return
|
||||
}
|
||||
i, format, err := image.Decode(r.Body)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Failed to decode Image: %s", err)
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
log.Printf("Decoded %s image from %s", format, user)
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
return
|
||||
default:
|
||||
}
|
||||
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}}
|
||||
writeJson(w, http.StatusOK, result{Msg: "Success"})
|
||||
}
|
||||
|
||||
func (s *Server) mapper(done func()) {
|
||||
defer done()
|
||||
for hash := range s.mappingQueue {
|
||||
s.hashes.MapHashes(hash)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) hasher(workerID int, done func(int)) {
|
||||
defer done(workerID)
|
||||
for image := range s.hashingQueue {
|
||||
start := time.Now()
|
||||
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
|
||||
log.Printf("Skipping existing hash with ID: %s found", image.ID)
|
||||
continue
|
||||
}
|
||||
hash := ch.HashImage(image)
|
||||
if hash.ID.Domain == "" || hash.ID.ID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
// TODO: Check channel pipelines
|
||||
case s.mappingQueue <- hash:
|
||||
default:
|
||||
}
|
||||
|
||||
elapsed := time.Since(start)
|
||||
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) reader(workerID int, done func(i int)) {
|
||||
defer done(workerID)
|
||||
for path := range s.readerQueue {
|
||||
id := ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))}
|
||||
if len(s.hashes.GetIDs(id)) > 0 {
|
||||
continue
|
||||
}
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
i, format, err := image.Decode(bufio.NewReader(file))
|
||||
file.Close()
|
||||
if err != nil {
|
||||
continue // skip this image
|
||||
}
|
||||
|
||||
im := ch.Im{
|
||||
Im: i,
|
||||
Format: format,
|
||||
ID: id,
|
||||
NewOnly: s.onlyHashNewIDs,
|
||||
}
|
||||
select {
|
||||
case s.hashingQueue <- im:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) HashLocalImages(opts Opts) {
|
||||
if opts.coverPath == "" {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
log.Println("Hashing covers at ", opts.coverPath)
|
||||
start := time.Now()
|
||||
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
err = s.httpServer.Shutdown(context.TODO())
|
||||
return fmt.Errorf("Recieved quit: %w", err)
|
||||
default:
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
s.readerQueue <- path
|
||||
return nil
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
log.Println("Err:", err, "local hashing took", elapsed)
|
||||
}()
|
||||
}
|
||||
|
||||
func signalHandler(s *Server) {
|
||||
select {
|
||||
case sig := <-s.signalQueue:
|
||||
@ -698,13 +271,17 @@ func loadHashes(opts Opts) *ch.SavedHashes {
|
||||
}
|
||||
break
|
||||
}
|
||||
if errors.Is(err, ch.NoHashes) {
|
||||
log.Println("No saved hashes to load")
|
||||
return loadedHashes
|
||||
}
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode hashes: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded %s hashes\n", format)
|
||||
return loadedHashes
|
||||
}
|
||||
func saveHashes(opts Opts, hashes ch.SavedHashes) error {
|
||||
func saveHashes(opts Opts, hashes *ch.SavedHashes) error {
|
||||
if opts.loadEmbeddedHashes && !opts.saveEmbeddedHashes {
|
||||
return errors.New("refusing to save embedded hashes")
|
||||
}
|
||||
@ -787,15 +364,6 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
||||
}
|
||||
}
|
||||
|
||||
type CHMux struct {
|
||||
version string
|
||||
*http.ServeMux
|
||||
}
|
||||
|
||||
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
|
||||
CHM.ServeMux.ServeHTTP(w, r)
|
||||
}
|
||||
func startServer(opts Opts) {
|
||||
imaging.SetMaxProcs(2)
|
||||
if opts.cpuprofile != "" {
|
||||
@ -860,7 +428,7 @@ func startServer(opts Opts) {
|
||||
|
||||
// DecodeHashes would normally need a write lock
|
||||
// nothing else has been started yet so we don't need one
|
||||
if err := server.hashes.DecodeHashes(*loadHashes(opts)); err != nil {
|
||||
if err := server.hashes.DecodeHashes(loadHashes(opts)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
|
362
cmd/comic-hasher/server.go
Normal file
362
cmd/comic-hasher/server.go
Normal file
@ -0,0 +1,362 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"cmp"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
"io/fs"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
httpServer *http.Server
|
||||
mux *CHMux
|
||||
BaseURL *url.URL
|
||||
hashes ch.HashStorage
|
||||
Context context.Context
|
||||
cancel func()
|
||||
signalQueue chan os.Signal
|
||||
readerQueue chan string
|
||||
hashingQueue chan ch.Im
|
||||
mappingQueue chan ch.ImageHash
|
||||
onlyHashNewIDs bool
|
||||
version string
|
||||
}
|
||||
|
||||
type CHMux struct {
|
||||
version string
|
||||
*http.ServeMux
|
||||
}
|
||||
|
||||
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
|
||||
CHM.ServeMux.ServeHTTP(w, r)
|
||||
}
|
||||
|
||||
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
|
||||
return strings.TrimSpace("lordwelch"), true
|
||||
}
|
||||
|
||||
func (s *Server) setupAppHandlers() {
|
||||
s.mux.HandleFunc("/add_cover", s.addCover)
|
||||
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
|
||||
s.mux.HandleFunc("/associate_ids", s.associateIDs)
|
||||
}
|
||||
|
||||
func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
domain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("domain"))))
|
||||
ID = strings.ToLower(strings.TrimSpace(values.Get("id")))
|
||||
newDomain = ch.Source(strings.ToLower(strings.TrimSpace(values.Get("newDomain"))))
|
||||
newID = strings.ToLower(strings.TrimSpace(values.Get("newID")))
|
||||
)
|
||||
if ID == "" {
|
||||
msg := "No ID Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if domain == "" {
|
||||
msg := "No domain Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newID == "" {
|
||||
msg := "No newID Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newDomain == "" {
|
||||
msg := "No newDomain Provided"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if newDomain == domain {
|
||||
msg := "newDomain cannot be the same as the existing domain"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
|
||||
err := s.hashes.AssociateIDs([]ch.NewIDs{{
|
||||
OldID: ch.ID{
|
||||
Domain: domain,
|
||||
ID: ID,
|
||||
},
|
||||
NewID: ch.ID{
|
||||
Domain: newDomain,
|
||||
ID: newID,
|
||||
},
|
||||
}})
|
||||
|
||||
if err == nil {
|
||||
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
|
||||
} else {
|
||||
writeJson(w, http.StatusOK, result{Msg: err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
type result struct {
|
||||
Results []ch.Result `json:"results,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
}
|
||||
|
||||
func writeJson(w http.ResponseWriter, status int, res result) {
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
var (
|
||||
bytes []byte
|
||||
err error
|
||||
)
|
||||
if bytes, err = json.Marshal(res); err != nil {
|
||||
bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)})
|
||||
}
|
||||
w.WriteHeader(status)
|
||||
_, _ = w.Write(bytes)
|
||||
_, _ = w.Write([]byte("\n"))
|
||||
}
|
||||
|
||||
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
ahashStr = strings.TrimSpace(values.Get("ahash"))
|
||||
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
||||
phashStr = strings.TrimSpace(values.Get("phash"))
|
||||
maxStr = strings.TrimSpace(values.Get("max"))
|
||||
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
|
||||
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
||||
ahash uint64
|
||||
dhash uint64
|
||||
phash uint64
|
||||
max int = 8
|
||||
max_tmp int
|
||||
err error
|
||||
hashes []ch.Hash
|
||||
)
|
||||
|
||||
if simple {
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "Simple results are no longer Supported"})
|
||||
return
|
||||
}
|
||||
|
||||
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
|
||||
log.Printf("could not parse ahash: %s", ahashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if ahash > 0 {
|
||||
hashes = append(hashes, ch.Hash{Hash: ahash, Kind: goimagehash.AHash})
|
||||
}
|
||||
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
|
||||
log.Printf("could not parse dhash: %s", dhashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if dhash > 0 {
|
||||
hashes = append(hashes, ch.Hash{Hash: dhash, Kind: goimagehash.DHash})
|
||||
}
|
||||
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
|
||||
log.Printf("could not parse phash: %s", phashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if phash > 0 {
|
||||
hashes = append(hashes, ch.Hash{Hash: phash, Kind: goimagehash.PHash})
|
||||
}
|
||||
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
|
||||
log.Printf("Invalid Max: %s", maxStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
|
||||
return
|
||||
}
|
||||
if maxStr != "" {
|
||||
max = max_tmp
|
||||
}
|
||||
|
||||
if max > 8 {
|
||||
log.Printf("Max must be less than 9: %d", max)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
|
||||
return
|
||||
}
|
||||
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
|
||||
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
})
|
||||
log.Println(err)
|
||||
if len(matches) > 0 {
|
||||
var msg string = ""
|
||||
if err != nil {
|
||||
msg = err.Error()
|
||||
}
|
||||
|
||||
writeJson(w, http.StatusOK, result{
|
||||
Results: matches,
|
||||
Msg: msg,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"})
|
||||
}
|
||||
|
||||
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
|
||||
user, authed := s.authenticated(w, r)
|
||||
if !authed || user == "" {
|
||||
http.Error(w, "Invalid Auth", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
if true {
|
||||
w.WriteHeader(http.StatusNotImplemented)
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
domain = strings.TrimSpace(values.Get("domain"))
|
||||
ID = strings.TrimSpace(values.Get("id"))
|
||||
)
|
||||
|
||||
if ID == "" {
|
||||
log.Println("No ID Provided")
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"})
|
||||
return
|
||||
}
|
||||
if domain == "" {
|
||||
log.Println("No domain Provided")
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"})
|
||||
return
|
||||
}
|
||||
i, format, err := image.Decode(r.Body)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Failed to decode Image: %s", err)
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
log.Printf("Decoded %s image from %s", format, user)
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
return
|
||||
default:
|
||||
}
|
||||
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}}
|
||||
writeJson(w, http.StatusOK, result{Msg: "Success"})
|
||||
}
|
||||
|
||||
func (s *Server) mapper(done func()) {
|
||||
defer done()
|
||||
for hash := range s.mappingQueue {
|
||||
s.hashes.MapHashes(hash)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) hasher(workerID int, done func(int)) {
|
||||
defer done(workerID)
|
||||
for image := range s.hashingQueue {
|
||||
start := time.Now()
|
||||
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
|
||||
log.Printf("Skipping existing hash with ID: %s found", image.ID)
|
||||
continue
|
||||
}
|
||||
hash := ch.HashImage(image)
|
||||
if hash.ID.Domain == "" || hash.ID.ID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
// TODO: Check channel pipelines
|
||||
case s.mappingQueue <- hash:
|
||||
default:
|
||||
}
|
||||
|
||||
elapsed := time.Since(start)
|
||||
log.Printf("Hashing took %v: worker: %v. %s: %064b id: %s\n", elapsed, workerID, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) reader(workerID int, done func(i int)) {
|
||||
defer done(workerID)
|
||||
for path := range s.readerQueue {
|
||||
id := ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))}
|
||||
if len(s.hashes.GetIDs(id)) > 0 {
|
||||
continue
|
||||
}
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
i, format, err := image.Decode(bufio.NewReader(file))
|
||||
file.Close()
|
||||
if err != nil {
|
||||
continue // skip this image
|
||||
}
|
||||
|
||||
im := ch.Im{
|
||||
Im: i,
|
||||
Format: format,
|
||||
ID: id,
|
||||
NewOnly: s.onlyHashNewIDs,
|
||||
}
|
||||
select {
|
||||
case s.hashingQueue <- im:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) HashLocalImages(opts Opts) {
|
||||
if opts.coverPath == "" {
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
log.Println("Hashing covers at ", opts.coverPath)
|
||||
start := time.Now()
|
||||
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-s.Context.Done():
|
||||
log.Println("Recieved quit")
|
||||
err = s.httpServer.Shutdown(context.TODO())
|
||||
return fmt.Errorf("Recieved quit: %w", err)
|
||||
default:
|
||||
}
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
s.readerQueue <- path
|
||||
return nil
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
log.Println("Err:", err, "local hashing took", elapsed)
|
||||
}()
|
||||
}
|
211
cv/cv.go
211
cv/cv.go
@ -34,8 +34,9 @@ type Download struct {
|
||||
}
|
||||
|
||||
type Issue struct {
|
||||
ID int `json:"id"`
|
||||
Image struct {
|
||||
ID int `json:"id"`
|
||||
IssueNumber string `json:"issue_number"`
|
||||
Image struct {
|
||||
IconURL string `json:"icon_url,omitempty"`
|
||||
MediumURL string `json:"medium_url,omitempty"`
|
||||
ScreenURL string `json:"screen_url,omitempty"`
|
||||
@ -86,8 +87,12 @@ type CVDownloader struct {
|
||||
}
|
||||
|
||||
var (
|
||||
ErrQuit = errors.New("Quit")
|
||||
ErrInvalidPage = errors.New("Invalid ComicVine Page")
|
||||
ErrQuit = errors.New("quit")
|
||||
ErrInvalidPage = errors.New("invalid ComicVine page")
|
||||
ErrInvalidIndex = errors.New("invalid page index")
|
||||
ErrDownloadFail = errors.New("download failed")
|
||||
ErrMissingPage = errors.New("page missing")
|
||||
ErrUpdateNeeded = errors.New("update needed")
|
||||
)
|
||||
|
||||
func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
||||
@ -145,9 +150,57 @@ func getOffset(name string) int {
|
||||
return i
|
||||
}
|
||||
|
||||
func (c *CVDownloader) findDownloadedPage(offset int) int {
|
||||
index := offset / 100
|
||||
if index < len(c.fileList) && getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
|
||||
return index
|
||||
}
|
||||
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
|
||||
return cmp.Compare(getOffset(a), b)
|
||||
})
|
||||
if found {
|
||||
return index
|
||||
}
|
||||
return -1
|
||||
}
|
||||
func (c *CVDownloader) getDownloadedIssues(offset int, update bool) (*CVResult, error) {
|
||||
index := c.findDownloadedPage(offset)
|
||||
if index < 0 {
|
||||
return nil, ErrMissingPage
|
||||
}
|
||||
issue, err := c.loadIssues(c.fileList[index])
|
||||
if err != nil || issue == nil {
|
||||
err = fmt.Errorf("Failed to read page at offset %d: %w", offset, err)
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
|
||||
|
||||
if update && (len(issue.Results) == 0 || issue.Results[0].IssueNumber == "") {
|
||||
err = fmt.Errorf("Deleting page %d to update records from cv", offset)
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
|
||||
if index != len(c.fileList)-1 {
|
||||
err = fmt.Errorf("Wrong index: expected %d got %d", len(c.fileList), index)
|
||||
return nil, err
|
||||
}
|
||||
log.Println("Deleting the last page to detect new comics")
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
}
|
||||
|
||||
return issue, nil
|
||||
}
|
||||
|
||||
// updateIssues c.downloadQueue must not be closed before this function has returned
|
||||
func (c *CVDownloader) updateIssues() int {
|
||||
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,image,volume")
|
||||
func (c *CVDownloader) updateIssues() (int, error) {
|
||||
base_url, err := url.Parse("https://comicvine.gamespot.com/api/issues/?sort=date_added,id:asc&format=json&field_list=id,issue_number,image,volume")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@ -182,84 +235,44 @@ func (c *CVDownloader) updateIssues() int {
|
||||
offset -= 100
|
||||
return failCount < 15
|
||||
}
|
||||
updated := 0
|
||||
for offset = 0; offset <= c.totalResults; offset += 100 {
|
||||
index := offset / 100
|
||||
if c.hasQuit() {
|
||||
return offset - 100
|
||||
return offset - 100, ErrQuit
|
||||
}
|
||||
if index < len(c.fileList) {
|
||||
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
|
||||
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
|
||||
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
|
||||
prev = -1
|
||||
failCount = 0
|
||||
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
|
||||
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
|
||||
if index != len(c.fileList)-1 {
|
||||
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
|
||||
return offset - 100
|
||||
}
|
||||
log.Println("Deleting the last page to detect new comics")
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
} else {
|
||||
select {
|
||||
case <-c.Context.Done():
|
||||
case c.downloadQueue <- issue:
|
||||
}
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
log.Println("Failed to read page at offset", offset, issue, err)
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
}
|
||||
} else {
|
||||
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[index]))
|
||||
|
||||
issue, err := c.getDownloadedIssues(offset, updated < 9)
|
||||
if err == nil && issue != nil {
|
||||
|
||||
prev = -1
|
||||
failCount = 0
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
return offset - 100, ErrQuit
|
||||
case c.downloadQueue <- issue:
|
||||
}
|
||||
continue
|
||||
}
|
||||
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
|
||||
return cmp.Compare(getOffset(a), b)
|
||||
})
|
||||
if found {
|
||||
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
|
||||
prev = -1
|
||||
failCount = 0
|
||||
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
|
||||
select {
|
||||
case <-c.Context.Done():
|
||||
case c.downloadQueue <- issue:
|
||||
}
|
||||
if c.totalResults == issue.Offset+issue.NumberOfPageResults {
|
||||
if index != len(c.fileList)-1 {
|
||||
log.Printf("Wrong index: expected %d got %d", len(c.fileList), index)
|
||||
return offset - 100
|
||||
}
|
||||
log.Println("Deleting the last page to detect new comics")
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
log.Println("Failed to read page at offset", offset, issue, err)
|
||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||
c.fileList = slices.Delete(c.fileList, index, (index)+1)
|
||||
}
|
||||
if errors.Is(err, ErrInvalidIndex) {
|
||||
return offset - 100, err
|
||||
}
|
||||
if err != nil && !errors.Is(err, ErrMissingPage) {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
log.Println("Starting download at offset", offset)
|
||||
issue := &CVResult{}
|
||||
issue = &CVResult{}
|
||||
URI := (*base_url)
|
||||
query = base_url.Query()
|
||||
query.Add("offset", strconv.Itoa(offset))
|
||||
URI.RawQuery = query.Encode()
|
||||
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
return offset - 100
|
||||
case <-time.After(10 * time.Second):
|
||||
case <-c.Context.Done(): // Allows us to return immediately even during a timeout
|
||||
return offset - 100, ErrQuit
|
||||
case <-time.After(10 * time.Second): // Enforces a minimum 10s wait between API hits
|
||||
}
|
||||
|
||||
resp, err, cancelDownloadCTX := Get(URI.String())
|
||||
if err != nil {
|
||||
cancelDownloadCTX()
|
||||
@ -267,7 +280,7 @@ func (c *CVDownloader) updateIssues() int {
|
||||
continue
|
||||
}
|
||||
// Fail and let comic-hasher try the whole thing again later
|
||||
return offset - 100
|
||||
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
cancelDownloadCTX()
|
||||
@ -275,14 +288,11 @@ func (c *CVDownloader) updateIssues() int {
|
||||
_ = resp.Body.Close()
|
||||
continue
|
||||
}
|
||||
log.Println("Failed to download this page, we'll wait for an hour to see if it clears up")
|
||||
select {
|
||||
case <-c.Context.Done(): // allows us to return immediately even during a timeout
|
||||
_ = resp.Body.Close()
|
||||
return offset - 100
|
||||
case <-time.After(1 * time.Hour):
|
||||
}
|
||||
msg, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
return offset - 100, fmt.Errorf("%w: response code: %d Message: %s", ErrDownloadFail, resp.StatusCode, string(msg))
|
||||
}
|
||||
|
||||
file, err := os.Create(filepath.Join(c.JSONPath, "cv-"+strconv.Itoa(offset)+".json"))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@ -297,7 +307,7 @@ func (c *CVDownloader) updateIssues() int {
|
||||
if retry(URI.String(), err) {
|
||||
continue
|
||||
}
|
||||
return offset - 100
|
||||
return offset - 100, fmt.Errorf("%w: %w", ErrDownloadFail, err)
|
||||
}
|
||||
cancelDownloadCTX()
|
||||
if issue.NumberOfTotalResults > c.totalResults {
|
||||
@ -305,13 +315,14 @@ func (c *CVDownloader) updateIssues() int {
|
||||
}
|
||||
prev = -1
|
||||
failCount = 0
|
||||
updated += 1
|
||||
select {
|
||||
case c.downloadQueue <- issue:
|
||||
}
|
||||
c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset))
|
||||
c.insertIssuePage(offset)
|
||||
log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset)
|
||||
}
|
||||
return offset
|
||||
return offset, nil
|
||||
}
|
||||
|
||||
type download struct {
|
||||
@ -436,12 +447,19 @@ func (c *CVDownloader) downloadImages() {
|
||||
for list := range c.downloadQueue {
|
||||
log.Printf("Checking downloads at offset %v\r", list.Offset)
|
||||
for _, issue := range list.Results {
|
||||
type i struct {
|
||||
type image struct {
|
||||
url string
|
||||
name string
|
||||
}
|
||||
imageURLs := []i{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
|
||||
imageURLs := []image{{issue.Image.IconURL, "icon_url"}, {issue.Image.MediumURL, "medium_url"}, {issue.Image.ScreenURL, "screen_url"}, {issue.Image.ScreenLargeURL, "screen_large_url"}, {issue.Image.SmallURL, "small_url"}, {issue.Image.SuperURL, "super_url"}, {issue.Image.ThumbURL, "thumb_url"}, {issue.Image.TinyURL, "tiny_url"}, {issue.Image.OriginalURL, "original_url"}}
|
||||
for _, image := range imageURLs {
|
||||
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
|
||||
continue
|
||||
}
|
||||
if c.chdb.CheckURL(image.url) {
|
||||
log.Printf("Skipping known bad url %s", image.url)
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(image.url, "6373148-blank.png") {
|
||||
c.notFound <- download{
|
||||
url: image.url,
|
||||
@ -452,14 +470,6 @@ func (c *CVDownloader) downloadImages() {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
|
||||
continue
|
||||
}
|
||||
if c.chdb.CheckURL(image.url) {
|
||||
log.Printf("Skipping known bad url %s", image.url)
|
||||
continue
|
||||
}
|
||||
|
||||
uri, err := url.ParseRequestURI(image.url)
|
||||
if err != nil {
|
||||
c.notFound <- download{
|
||||
@ -589,6 +599,15 @@ func (c *CVDownloader) cleanDirs() {
|
||||
return nil
|
||||
})
|
||||
}
|
||||
func (c *CVDownloader) insertIssuePage(offset int) {
|
||||
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
|
||||
return cmp.Compare(getOffset(a), b)
|
||||
})
|
||||
if found {
|
||||
return
|
||||
}
|
||||
c.fileList = slices.Insert(c.fileList, index, fmt.Sprintf("cv-%v.json", offset))
|
||||
}
|
||||
|
||||
func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids bool, get_id func(id ch.ID) ch.IDList, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
||||
return &CVDownloader{
|
||||
@ -596,7 +615,7 @@ func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, only_hash_new_ids
|
||||
JSONPath: filepath.Join(workPath, "_json"),
|
||||
ImagePath: filepath.Join(workPath, "_image"),
|
||||
APIKey: APIKey,
|
||||
bufPool: bufPool, // Only used if keepDownloadedImages is false to save space on byte buffers. The buffers get sent back via finishedDownloadQueue
|
||||
bufPool: bufPool, // Only used if keepDownloadedImages is false to save memory on byte buffers. The buffers get sent back via finishedDownloadQueue
|
||||
FinishedDownloadQueue: finishedDownloadQueue,
|
||||
SendExistingImages: sendExistingImages,
|
||||
KeepDownloadedImages: keepDownloadedImages,
|
||||
@ -625,6 +644,11 @@ func DownloadCovers(c *CVDownloader) {
|
||||
var d *os.File
|
||||
d, err = os.Open(c.JSONPath)
|
||||
c.fileList, err = d.Readdirnames(-1)
|
||||
for i := len(c.fileList) - 1; i >= 0; i-- {
|
||||
if !strings.Contains(c.fileList[i], "json") {
|
||||
c.fileList = slices.Delete(c.fileList, i, i+1)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Unable to open path for json files: %w", err))
|
||||
}
|
||||
@ -645,7 +669,10 @@ func DownloadCovers(c *CVDownloader) {
|
||||
dwg.Done()
|
||||
}()
|
||||
|
||||
offset := c.updateIssues()
|
||||
offset, err := c.updateIssues()
|
||||
if err != nil {
|
||||
log.Printf("Failed to download CV Covers: %s", err)
|
||||
}
|
||||
issueCount := len(c.fileList) * 100
|
||||
|
||||
log.Println("Number of issues", issueCount, " expected:", c.totalResults)
|
||||
|
4
go.mod
4
go.mod
@ -14,8 +14,8 @@ require (
|
||||
github.com/ncruces/go-sqlite3 v0.23.1
|
||||
github.com/vmihailenco/msgpack v4.0.4+incompatible
|
||||
go.etcd.io/bbolt v1.4.0
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa
|
||||
golang.org/x/image v0.24.0
|
||||
golang.org/x/sys v0.30.0
|
||||
golang.org/x/text v0.22.0
|
||||
gonum.org/v1/gonum v0.15.1
|
||||
modernc.org/sqlite v1.35.0
|
||||
@ -48,7 +48,7 @@ require (
|
||||
github.com/therootcompany/xz v1.0.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.10 // indirect
|
||||
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
|
||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
google.golang.org/appengine v1.6.8 // indirect
|
||||
google.golang.org/protobuf v1.36.5 // indirect
|
||||
modernc.org/libc v1.61.13 // indirect
|
||||
|
12
hashing.go
12
hashing.go
@ -56,11 +56,11 @@ type ID struct {
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
IDs IDList
|
||||
Distance int
|
||||
Hash Hash
|
||||
Hash Hash
|
||||
ID ID
|
||||
Distance int
|
||||
EquivalentIDs []ID
|
||||
}
|
||||
|
||||
type Im struct {
|
||||
Im image.Image
|
||||
Format string
|
||||
@ -110,8 +110,8 @@ type NewIDs struct {
|
||||
type HashStorage interface {
|
||||
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
|
||||
MapHashes(ImageHash)
|
||||
DecodeHashes(hashes SavedHashes) error
|
||||
EncodeHashes() (SavedHashes, error)
|
||||
DecodeHashes(hashes *SavedHashes) error
|
||||
EncodeHashes() (*SavedHashes, error)
|
||||
AssociateIDs(newIDs []NewIDs) error
|
||||
GetIDs(id ID) IDList
|
||||
}
|
||||
|
12
map.go
12
map.go
@ -61,9 +61,10 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
||||
mappedIds[ids] = true
|
||||
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: storedHash.Hash,
|
||||
IDs: ToIDList(*m.ids[storedHash.ID]),
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
EquivalentIDs: *m.ids[storedHash.ID],
|
||||
})
|
||||
|
||||
}
|
||||
@ -97,7 +98,10 @@ func (m *MapStorage) MapHashes(hash ImageHash) {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
func (m *MapStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
if err := m.basicMapStorage.DecodeHashes(hashes); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -52,6 +52,7 @@ type SavedHash struct {
|
||||
Hash Hash
|
||||
ID ID
|
||||
}
|
||||
|
||||
type Encoder func(any) ([]byte, error)
|
||||
type Decoder func([]byte, interface{}) error
|
||||
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
|
||||
@ -111,7 +112,7 @@ func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
return &t
|
||||
}
|
||||
@ -137,7 +138,7 @@ func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of hashes", len(t.Hashes))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
return &t
|
||||
}
|
||||
@ -234,7 +235,7 @@ func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
|
||||
return nil, NoHashes
|
||||
}
|
||||
|
||||
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
|
||||
func EncodeHashes(hashes *SavedHashes, format Format) ([]byte, error) {
|
||||
var encoder Encoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
|
63
sqlite.go
63
sqlite.go
@ -26,26 +26,26 @@ type sqliteStorage struct {
|
||||
idExists *sql.Stmt
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) ([]ID, error) {
|
||||
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, hash Hash) (map[ID][]ID, error) {
|
||||
if statement == nil {
|
||||
statement = s.hashExactMatchStatement
|
||||
}
|
||||
hashes := []ID{}
|
||||
hashes := map[ID][]ID{}
|
||||
rows, err := statement.Query(hash.Kind, int64(hash.Hash))
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var (
|
||||
r = ID{}
|
||||
id ID
|
||||
foundID ID
|
||||
)
|
||||
err = rows.Scan(&r.Domain, &r.ID)
|
||||
err = rows.Scan(&foundID.Domain, &foundID.ID, &id.Domain, &id.ID)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return hashes, err
|
||||
}
|
||||
hashes = append(hashes, r)
|
||||
hashes[foundID] = append(hashes[foundID], id)
|
||||
}
|
||||
rows.Close()
|
||||
return hashes, nil
|
||||
@ -61,28 +61,31 @@ func (s *sqliteStorage) findPartialHashes(tl timeLog, statement *sql.Stmt, max i
|
||||
return hashResults, err
|
||||
}
|
||||
|
||||
results := map[Hash][]ID{}
|
||||
results := map[SavedHash][]ID{}
|
||||
for rows.Next() {
|
||||
var (
|
||||
tmpHash int64
|
||||
sqlHash = Hash{Kind: hash.Kind}
|
||||
id ID
|
||||
sqlHash = SavedHash{
|
||||
Hash: Hash{Kind: hash.Kind},
|
||||
}
|
||||
id ID
|
||||
)
|
||||
err = rows.Scan(&tmpHash, &id.Domain, &id.ID)
|
||||
err = rows.Scan(&sqlHash.ID.Domain, &sqlHash.ID.ID, &tmpHash, &id.Domain, &id.ID)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return hashResults, err
|
||||
}
|
||||
sqlHash.Hash = uint64(tmpHash)
|
||||
sqlHash.Hash.Hash = uint64(tmpHash)
|
||||
results[sqlHash] = append(results[sqlHash], id)
|
||||
}
|
||||
for sqlHash, ids := range results {
|
||||
res := Result{
|
||||
Hash: sqlHash,
|
||||
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash),
|
||||
Hash: sqlHash.Hash,
|
||||
ID: sqlHash.ID,
|
||||
Distance: bits.OnesCount64(hash.Hash ^ sqlHash.Hash.Hash),
|
||||
EquivalentIDs: ids,
|
||||
}
|
||||
if res.Distance <= max {
|
||||
res.IDs = ToIDList(ids)
|
||||
hashResults = append(hashResults, res)
|
||||
}
|
||||
}
|
||||
@ -144,10 +147,14 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
||||
if err != nil {
|
||||
return foundMatches, err
|
||||
}
|
||||
foundMatches = append(foundMatches, Result{
|
||||
IDs: ToIDList(idlist),
|
||||
Hash: hash,
|
||||
})
|
||||
for id, equivalentIDs := range idlist {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Hash: hash,
|
||||
ID: id,
|
||||
Distance: 0,
|
||||
EquivalentIDs: equivalentIDs,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
tl.logTime("Search Exact")
|
||||
@ -239,7 +246,7 @@ func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
func (s *sqliteStorage) DecodeHashes(hashes *SavedHashes) error {
|
||||
return nil
|
||||
err := s.dropIndexes()
|
||||
if err != nil {
|
||||
@ -295,16 +302,16 @@ func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
||||
func (s *sqliteStorage) EncodeHashes() (*SavedHashes, error) {
|
||||
hashes := SavedHashes{}
|
||||
tx, err := s.db.Begin()
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
return &hashes, err
|
||||
}
|
||||
|
||||
rows, err := tx.Query("SELECT Hashes.kind, Hashes.hash, IDs.domain, IDs.stringid FROM Hashes JOIN IDs ON Hashes.id=IDs.id ORDER BY Hashes.kind, Hashes.hash;")
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
return &hashes, err
|
||||
}
|
||||
for rows.Next() {
|
||||
var (
|
||||
@ -313,14 +320,14 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
||||
)
|
||||
err = rows.Scan(&hash.Hash.Kind, &tmpHash, &hash.ID.Domain, &hash.ID.ID)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
return &hashes, err
|
||||
}
|
||||
hash.Hash.Hash = uint64(tmpHash)
|
||||
hashes.InsertHash(hash)
|
||||
}
|
||||
rows, err = tx.Query("SELECT IEIDs.equivalentid, IDs.domain, IDs.stringid FROM IDs JOIN IDsToEquivalantIDs AS IEIDs ON IDs.id=IEIDs.idid ORDER BY IEIDs.equivalentid, IDs.domain, IDs.stringid;")
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
return &hashes, err
|
||||
}
|
||||
var (
|
||||
previousEid int64 = -1
|
||||
@ -333,7 +340,7 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
||||
)
|
||||
err = rows.Scan(&newEid, &id.Domain, &id.Domain)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
return &hashes, err
|
||||
}
|
||||
if newEid != previousEid {
|
||||
previousEid = newEid
|
||||
@ -345,7 +352,7 @@ func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
return hashes, nil
|
||||
return &hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) error {
|
||||
@ -430,7 +437,7 @@ func (s *sqliteStorage) PrepareStatements() error {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
s.hashExactMatchStatement, err = s.db.Prepare(`
|
||||
select IDs.domain, IDs.stringid from IDs
|
||||
select QIDs.domain, QIDs.stringid, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
@ -444,7 +451,7 @@ func (s *sqliteStorage) PrepareStatements() error {
|
||||
return fmt.Errorf("failed to prepare database statements: %w", err)
|
||||
}
|
||||
s.hashPartialMatchStatement, err = s.db.Prepare(`
|
||||
select EIDs.hash, IDs.domain, IDs.stringid from IDs
|
||||
select QIDs.domain, QIDs.stringid, EIDs.hash, IDs.domain, IDs.stringid from IDs
|
||||
join IDsToEquivalantIDs as IEIDs on IDs.id=IEIDs.idid
|
||||
join (
|
||||
select Hashes.hash as hash, QEIDs.id as id from EquivalentIDs as QEIDs
|
||||
|
23
vp-tree.go
23
vp-tree.go
@ -56,15 +56,17 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
|
||||
mappedIds[ids] = true
|
||||
if result.Dist == 0 {
|
||||
exactMatches = append(exactMatches, Result{
|
||||
IDs: ToIDList(*v.ids[storedHash.ID]),
|
||||
Distance: int(result.Dist),
|
||||
Hash: storedHash.Hash,
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
EquivalentIDs: *v.ids[storedHash.ID],
|
||||
})
|
||||
} else {
|
||||
matches = append(matches, Result{
|
||||
IDs: ToIDList(*v.ids[storedHash.ID]),
|
||||
Distance: int(result.Dist),
|
||||
Hash: storedHash.Hash,
|
||||
Hash: storedHash.Hash,
|
||||
ID: storedHash.ID,
|
||||
Distance: 0,
|
||||
EquivalentIDs: *v.ids[storedHash.ID],
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -93,7 +95,10 @@ func (v *VPTree) MapHashes(ImageHash) {
|
||||
panic("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
|
||||
func (v *VPTree) DecodeHashes(hashes *SavedHashes) error {
|
||||
if hashes == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize all the known equal IDs
|
||||
for _, ids := range hashes.IDs {
|
||||
@ -137,8 +142,8 @@ func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (v *VPTree) EncodeHashes() (SavedHashes, error) {
|
||||
return SavedHashes{}, errors.New("Not Implemented")
|
||||
func (v *VPTree) EncodeHashes() (*SavedHashes, error) {
|
||||
return &SavedHashes{}, errors.New("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) AssociateIDs(newIDs []NewIDs) error {
|
||||
|
Loading…
x
Reference in New Issue
Block a user