Make runtime hash storage modular
This commit is contained in:
parent
007a726764
commit
0069ffd5cb
@ -29,9 +29,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/kr/pretty"
|
||||||
|
|
||||||
"github.com/vmihailenco/msgpack/v5"
|
"github.com/vmihailenco/msgpack/v5"
|
||||||
|
|
||||||
"github.com/disintegration/imaging"
|
|
||||||
_ "golang.org/x/image/tiff"
|
_ "golang.org/x/image/tiff"
|
||||||
_ "golang.org/x/image/vp8"
|
_ "golang.org/x/image/vp8"
|
||||||
_ "golang.org/x/image/vp8l"
|
_ "golang.org/x/image/vp8l"
|
||||||
@ -39,37 +40,20 @@ import (
|
|||||||
|
|
||||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||||
"gitea.narnian.us/lordwelch/goimagehash"
|
"gitea.narnian.us/lordwelch/goimagehash"
|
||||||
// "github.com/google/uuid"
|
|
||||||
// "github.com/zitadel/oidc/pkg/client/rp"
|
|
||||||
// httphelper "github.com/zitadel/oidc/pkg/http"
|
|
||||||
// "github.com/zitadel/oidc/pkg/oidc"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Server struct {
|
type Server struct {
|
||||||
httpServer *http.Server
|
httpServer *http.Server
|
||||||
mux *http.ServeMux
|
mux *http.ServeMux
|
||||||
BaseURL *url.URL
|
BaseURL *url.URL
|
||||||
// token chan<- *oidc.Tokens
|
hashes ch.HashStorage
|
||||||
// Partial hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers
|
|
||||||
PartialAhash [8]map[uint8][]uint64
|
|
||||||
PartialDhash [8]map[uint8][]uint64
|
|
||||||
PartialPhash [8]map[uint8][]uint64
|
|
||||||
FullAhash map[uint64][]string // Maps ahash's to lists of ID's domain:id
|
|
||||||
FullDhash map[uint64][]string // Maps dhash's to lists of ID's domain:id
|
|
||||||
FullPhash map[uint64][]string // Maps phash's to lists of ID's domain:id
|
|
||||||
ids map[ch.Source]map[string]struct{}
|
|
||||||
hashMutex sync.RWMutex
|
|
||||||
quit chan struct{}
|
quit chan struct{}
|
||||||
signalQueue chan os.Signal
|
signalQueue chan os.Signal
|
||||||
readerQueue chan string
|
readerQueue chan string
|
||||||
hashingQueue chan ch.Im
|
hashingQueue chan ch.Im
|
||||||
mappingQueue chan ch.Hash
|
mappingQueue chan ch.ImageHash
|
||||||
}
|
}
|
||||||
|
|
||||||
// var key = []byte(uuid.New().String())[:16]
|
|
||||||
|
|
||||||
type savedHashes map[ch.Source]map[string][3]uint64
|
|
||||||
|
|
||||||
type Format int
|
type Format int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -135,6 +119,8 @@ func main() {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
opts.sqlitePath, _ = filepath.Abs(opts.sqlitePath)
|
||||||
|
pretty.Logln(opts)
|
||||||
startServer(opts)
|
startServer(opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,25 +210,25 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
|
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
|
||||||
msg := "No IDs belonging to " + domain + "exist on this server"
|
// msg := "No IDs belonging to " + domain + "exist on this server"
|
||||||
log.Println(msg)
|
// log.Println(msg)
|
||||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||||
}
|
// }
|
||||||
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
|
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
|
||||||
found := false
|
found := false
|
||||||
for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
|
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
|
||||||
for i, idlist := range hash {
|
// for i, idlist := range hash {
|
||||||
if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
|
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
|
||||||
found = true
|
// found = true
|
||||||
hash[i] = ch.Insert(idlist, newDomain+":"+newID)
|
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
|
||||||
if _, ok := s.ids[ch.Source(newDomain)]; !ok {
|
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
|
||||||
s.ids[ch.Source(newDomain)] = make(map[string]struct{})
|
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
|
||||||
}
|
// }
|
||||||
s.ids[ch.Source(newDomain)][newID] = struct{}{}
|
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
if found {
|
if found {
|
||||||
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
|
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
|
||||||
} else {
|
} else {
|
||||||
@ -250,70 +236,6 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) getMatches(ahash, dhash, phash uint64, max int, skipNonExact bool) []ch.Result {
|
|
||||||
var foundMatches []ch.Result
|
|
||||||
s.hashMutex.RLock()
|
|
||||||
defer s.hashMutex.RUnlock()
|
|
||||||
|
|
||||||
if skipNonExact { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
|
||||||
if matchedResults, ok := s.FullAhash[ahash]; ok && ahash != 0 {
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}})
|
|
||||||
}
|
|
||||||
if matchedResults, ok := s.FullDhash[dhash]; ok && dhash != 0 {
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: dhash, Kind: goimagehash.DHash}})
|
|
||||||
}
|
|
||||||
if matchedResults, ok := s.FullPhash[phash]; ok && phash != 0 {
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: phash, Kind: goimagehash.PHash}})
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have exact matches don't bother with other matches
|
|
||||||
if len(foundMatches) > 0 && skipNonExact {
|
|
||||||
return foundMatches
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foundHashes := make(map[uint64]struct{})
|
|
||||||
if ahash != 0 {
|
|
||||||
for i, partialHash := range ch.SplitHash(ahash) {
|
|
||||||
for _, match := range ch.Atleast(max, ahash, s.PartialAhash[i][partialHash]) {
|
|
||||||
_, alreadyMatched := foundHashes[match.Hash]
|
|
||||||
if matchedResults, ok := s.FullAhash[match.Hash]; ok && !alreadyMatched {
|
|
||||||
foundHashes[match.Hash] = struct{}{}
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foundHashes = make(map[uint64]struct{})
|
|
||||||
if dhash != 0 {
|
|
||||||
for i, partialHash := range ch.SplitHash(dhash) {
|
|
||||||
for _, match := range ch.Atleast(max, dhash, s.PartialDhash[i][partialHash]) {
|
|
||||||
_, alreadyMatched := foundHashes[match.Hash]
|
|
||||||
if matchedResults, ok := s.FullDhash[match.Hash]; ok && !alreadyMatched {
|
|
||||||
foundHashes[match.Hash] = struct{}{}
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foundHashes = make(map[uint64]struct{})
|
|
||||||
if phash != 0 {
|
|
||||||
for i, partialHash := range ch.SplitHash(phash) {
|
|
||||||
for _, match := range ch.Atleast(max, phash, s.PartialPhash[i][partialHash]) {
|
|
||||||
_, alreadyMatched := foundHashes[match.Hash]
|
|
||||||
if matchedResults, ok := s.FullPhash[match.Hash]; ok && !alreadyMatched {
|
|
||||||
foundHashes[match.Hash] = struct{}{}
|
|
||||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return foundMatches
|
|
||||||
}
|
|
||||||
|
|
||||||
type SimpleResult struct {
|
type SimpleResult struct {
|
||||||
Distance int
|
Distance int
|
||||||
IDList ch.IDList
|
IDList ch.IDList
|
||||||
@ -323,67 +245,31 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
|
|||||||
simpleResult := make([]SimpleResult, 0, len(fullResults))
|
simpleResult := make([]SimpleResult, 0, len(fullResults))
|
||||||
|
|
||||||
slices.SortFunc(fullResults, func(a, b ch.Result) int {
|
slices.SortFunc(fullResults, func(a, b ch.Result) int {
|
||||||
return cmp.Compare(a.Distance, b.Distance)
|
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
|
||||||
})
|
})
|
||||||
|
|
||||||
// Deduplicate IDs
|
// Deduplicate IDs
|
||||||
idToDistance := make(map[string]int)
|
distance := make(map[int]SimpleResult)
|
||||||
for _, fullResult := range fullResults {
|
|
||||||
for _, id := range fullResult.IDs {
|
|
||||||
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
|
|
||||||
idToDistance[id] = fullResult.Distance
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Group by distance
|
for _, fullResult := range fullResults {
|
||||||
distanceMap := make(map[int]SimpleResult)
|
simple, ok := distance[fullResult.Distance]
|
||||||
for id, distance := range idToDistance {
|
if !ok {
|
||||||
var (
|
simple.IDList = make(ch.IDList)
|
||||||
sr SimpleResult
|
}
|
||||||
ok bool
|
for source, ids := range fullResult.IDs {
|
||||||
)
|
for _, id := range ids {
|
||||||
if sr, ok = distanceMap[distance]; !ok {
|
simple.IDList[source] = ch.Insert(simple.IDList[source], id)
|
||||||
sr.IDList = make(ch.IDList)
|
}
|
||||||
}
|
}
|
||||||
sourceID := strings.SplitN(id, ":", 2)
|
|
||||||
sr.Distance = distance
|
|
||||||
sr.IDList[ch.Source(sourceID[0])] = append(sr.IDList[ch.Source(sourceID[0])], sourceID[1])
|
|
||||||
distanceMap[distance] = sr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// turn into array
|
// turn into array
|
||||||
for _, sr := range distanceMap {
|
for _, sr := range distance {
|
||||||
simpleResult = append(simpleResult, sr)
|
simpleResult = append(simpleResult, sr)
|
||||||
}
|
}
|
||||||
return simpleResult
|
return simpleResult
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIResult struct {
|
|
||||||
IDList ch.IDList
|
|
||||||
Distance int
|
|
||||||
Hash ch.ImageHash
|
|
||||||
}
|
|
||||||
|
|
||||||
func getResults(fullResults []ch.Result) []APIResult {
|
|
||||||
apiResults := make([]APIResult, 0, len(fullResults))
|
|
||||||
for _, res := range fullResults {
|
|
||||||
idlist := make(ch.IDList)
|
|
||||||
for _, id := range res.IDs {
|
|
||||||
sourceID := strings.SplitN(id, ":", 2)
|
|
||||||
idlist[ch.Source(sourceID[0])] = append(idlist[ch.Source(sourceID[0])], sourceID[1])
|
|
||||||
}
|
|
||||||
apiResults = append(apiResults,
|
|
||||||
APIResult{
|
|
||||||
Distance: res.Distance,
|
|
||||||
Hash: res.Hash,
|
|
||||||
IDList: idlist,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return apiResults
|
|
||||||
}
|
|
||||||
|
|
||||||
type result struct {
|
type result struct {
|
||||||
Results any `json:"results,omitempty"`
|
Results any `json:"results,omitempty"`
|
||||||
Msg string `json:"msg,omitempty"`
|
Msg string `json:"msg,omitempty"`
|
||||||
@ -416,7 +302,7 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
|||||||
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
||||||
phashStr = strings.TrimSpace(values.Get("phash"))
|
phashStr = strings.TrimSpace(values.Get("phash"))
|
||||||
maxStr = strings.TrimSpace(values.Get("max"))
|
maxStr = strings.TrimSpace(values.Get("max"))
|
||||||
skipNonExact = strings.ToLower(strings.TrimSpace(values.Get("skipNonExact"))) != "false"
|
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
|
||||||
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
||||||
ahash uint64
|
ahash uint64
|
||||||
dhash uint64
|
dhash uint64
|
||||||
@ -455,13 +341,24 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
|
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
matches := s.getMatches(ahash, dhash, phash, max, skipNonExact)
|
matches, err := s.hashes.GetMatches([]ch.Hash{{ahash, goimagehash.AHash}, {dhash, goimagehash.DHash}, {phash, goimagehash.PHash}}, max, exactOnly)
|
||||||
|
log.Println(err)
|
||||||
if len(matches) > 0 {
|
if len(matches) > 0 {
|
||||||
|
var msg string = ""
|
||||||
|
if err != nil {
|
||||||
|
msg = err.Error()
|
||||||
|
}
|
||||||
if simple {
|
if simple {
|
||||||
writeJson(w, http.StatusOK, result{Results: getSimpleResults(matches)})
|
writeJson(w, http.StatusOK, result{
|
||||||
|
Results: getSimpleResults(matches),
|
||||||
|
Msg: msg,
|
||||||
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJson(w, http.StatusOK, result{Results: getResults(matches)})
|
writeJson(w, http.StatusOK, result{
|
||||||
|
Results: matches,
|
||||||
|
Msg: msg,
|
||||||
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -503,69 +400,14 @@ func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
s.hashingQueue <- ch.Im{Im: i, Format: format, Domain: ch.Source(domain), ID: ID, Path: ""}
|
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}, Path: ""}
|
||||||
writeJson(w, http.StatusOK, result{Msg: "Success"})
|
writeJson(w, http.StatusOK, result{Msg: "Success"})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) MapHashes(hash ch.Hash) {
|
|
||||||
s.hashMutex.Lock()
|
|
||||||
defer s.hashMutex.Unlock()
|
|
||||||
s.mapHashes(hash.Ahash.GetHash(), hash.Dhash.GetHash(), hash.Phash.GetHash(), hash.Domain, hash.ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Server) mapHashes(ahash, dhash, phash uint64, domain ch.Source, id string) {
|
|
||||||
|
|
||||||
if _, ok := s.ids[domain]; !ok {
|
|
||||||
s.ids[domain] = make(map[string]struct{})
|
|
||||||
}
|
|
||||||
s.ids[domain][id] = struct{}{}
|
|
||||||
|
|
||||||
if _, ok := s.FullAhash[ahash]; !ok {
|
|
||||||
s.FullAhash[ahash] = make([]string, 0, 3)
|
|
||||||
}
|
|
||||||
s.FullAhash[ahash] = ch.Insert(s.FullAhash[ahash], string(domain)+":"+id)
|
|
||||||
|
|
||||||
if _, ok := s.FullDhash[dhash]; !ok {
|
|
||||||
s.FullDhash[dhash] = make([]string, 0, 3)
|
|
||||||
}
|
|
||||||
s.FullDhash[dhash] = ch.Insert(s.FullDhash[dhash], string(domain)+":"+id)
|
|
||||||
|
|
||||||
if _, ok := s.FullPhash[phash]; !ok {
|
|
||||||
s.FullPhash[phash] = make([]string, 0, 3)
|
|
||||||
}
|
|
||||||
s.FullPhash[phash] = ch.Insert(s.FullPhash[phash], string(domain)+":"+id)
|
|
||||||
|
|
||||||
for i, partialHash := range ch.SplitHash(ahash) {
|
|
||||||
s.PartialAhash[i][partialHash] = append(s.PartialAhash[i][partialHash], ahash)
|
|
||||||
}
|
|
||||||
for i, partialHash := range ch.SplitHash(dhash) {
|
|
||||||
s.PartialDhash[i][partialHash] = append(s.PartialDhash[i][partialHash], dhash)
|
|
||||||
}
|
|
||||||
for i, partialHash := range ch.SplitHash(phash) {
|
|
||||||
s.PartialPhash[i][partialHash] = append(s.PartialPhash[i][partialHash], phash)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Server) initHashes() {
|
|
||||||
for i := range s.PartialAhash {
|
|
||||||
s.PartialAhash[i] = make(map[uint8][]uint64)
|
|
||||||
}
|
|
||||||
for i := range s.PartialDhash {
|
|
||||||
s.PartialDhash[i] = make(map[uint8][]uint64)
|
|
||||||
}
|
|
||||||
for i := range s.PartialPhash {
|
|
||||||
s.PartialPhash[i] = make(map[uint8][]uint64)
|
|
||||||
}
|
|
||||||
s.FullAhash = make(map[uint64][]string)
|
|
||||||
s.FullDhash = make(map[uint64][]string)
|
|
||||||
s.FullPhash = make(map[uint64][]string)
|
|
||||||
s.ids = make(map[ch.Source]map[string]struct{})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Server) mapper(done func()) {
|
func (s *Server) mapper(done func()) {
|
||||||
defer done()
|
defer done()
|
||||||
for hash := range s.mappingQueue {
|
for hash := range s.mappingQueue {
|
||||||
s.MapHashes(hash)
|
s.hashes.MapHashes(hash)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -575,7 +417,7 @@ func (s *Server) hasher(workerID int, done func()) {
|
|||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
hash := ch.HashImage(image)
|
hash := ch.HashImage(image)
|
||||||
if hash.Domain == "" {
|
if hash.ID.Domain == "" || hash.ID.ID == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -588,7 +430,7 @@ func (s *Server) hasher(workerID int, done func()) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
elapsed := time.Since(start)
|
elapsed := time.Since(start)
|
||||||
log.Printf("Hashing took %v: worker: %v. path: %s ahash: %064b id: %s\n", elapsed, workerID, image.Path, hash.Ahash.GetHash(), hash.ID)
|
log.Printf("Hashing took %v: worker: %v. path: %s %s: %064b id: %s\n", elapsed, workerID, image.Path, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -605,7 +447,11 @@ func (s *Server) reader(workerID int, done func()) {
|
|||||||
}
|
}
|
||||||
file.Close()
|
file.Close()
|
||||||
|
|
||||||
im := ch.Im{Im: i, Format: format, Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path)), Path: path}
|
im := ch.Im{
|
||||||
|
Im: i, Format: format,
|
||||||
|
ID: ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))},
|
||||||
|
Path: path,
|
||||||
|
}
|
||||||
select {
|
select {
|
||||||
case <-s.quit:
|
case <-s.quit:
|
||||||
log.Println("Recieved quit")
|
log.Println("Recieved quit")
|
||||||
@ -616,94 +462,43 @@ func (s *Server) reader(workerID int, done func()) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) encodeHashes(e Encoder) ([]byte, error) {
|
|
||||||
hashes := make(savedHashes)
|
|
||||||
for source, ids := range s.ids {
|
|
||||||
hashes[source] = make(map[string][3]uint64, len(ids))
|
|
||||||
}
|
|
||||||
for hash, idlist := range s.FullAhash {
|
|
||||||
for _, id := range idlist {
|
|
||||||
sourceID := strings.SplitN(id, ":", 2)
|
|
||||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
|
||||||
h[0] = hash
|
|
||||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for hash, idlist := range s.FullDhash {
|
|
||||||
for _, id := range idlist {
|
|
||||||
sourceID := strings.SplitN(id, ":", 2)
|
|
||||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
|
||||||
h[1] = hash
|
|
||||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
for hash, idlist := range s.FullPhash {
|
|
||||||
for _, id := range idlist {
|
|
||||||
sourceID := strings.SplitN(id, ":", 2)
|
|
||||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
|
||||||
h[2] = hash
|
|
||||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return e(hashes)
|
|
||||||
}
|
|
||||||
|
|
||||||
// EncodeHashes must have a lock to s.hashMutex
|
// EncodeHashes must have a lock to s.hashMutex
|
||||||
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
|
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
|
||||||
|
var encoder Encoder
|
||||||
switch format {
|
switch format {
|
||||||
case Msgpack:
|
case Msgpack:
|
||||||
return s.encodeHashes(msgpack.Marshal)
|
encoder = msgpack.Marshal
|
||||||
case JSON:
|
case JSON:
|
||||||
return s.encodeHashes(json.Marshal)
|
encoder = json.Marshal
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("Unknown format: %v", format)
|
return nil, fmt.Errorf("Unknown format: %v", format)
|
||||||
}
|
}
|
||||||
}
|
hashes, err := s.hashes.EncodeHashes()
|
||||||
|
|
||||||
func (s *Server) decodeHashes(d Decoder, hashes []byte) error {
|
|
||||||
loadedHashes := make(savedHashes)
|
|
||||||
err := d(hashes, &loadedHashes)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
return encoder(hashes)
|
||||||
for domain, ids := range loadedHashes {
|
|
||||||
for id := range ids {
|
|
||||||
if _, ok := s.ids[domain]; ok {
|
|
||||||
s.ids[domain][id] = struct{}{}
|
|
||||||
} else {
|
|
||||||
s.ids[domain] = make(map[string]struct{})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, sourceHashes := range loadedHashes {
|
|
||||||
s.FullAhash = make(map[uint64][]string, len(sourceHashes))
|
|
||||||
s.FullDhash = make(map[uint64][]string, len(sourceHashes))
|
|
||||||
s.FullPhash = make(map[uint64][]string, len(sourceHashes))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
for domain, sourceHashes := range loadedHashes {
|
|
||||||
for id, h := range sourceHashes {
|
|
||||||
s.mapHashes(h[0], h[1], h[2], domain, id)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// DecodeHashes must have a lock to s.hashMutex
|
// DecodeHashes must have a lock to s.hashMutex
|
||||||
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
|
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
|
||||||
|
var decoder Decoder
|
||||||
switch format {
|
switch format {
|
||||||
case Msgpack:
|
case Msgpack:
|
||||||
return s.decodeHashes(msgpack.Unmarshal, hashes)
|
decoder = msgpack.Unmarshal
|
||||||
case JSON:
|
case JSON:
|
||||||
return s.decodeHashes(json.Unmarshal, hashes)
|
decoder = json.Unmarshal
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Unknown format: %v", format)
|
return fmt.Errorf("Unknown format: %v", format)
|
||||||
}
|
}
|
||||||
|
loadedHashes := make(ch.SavedHashes)
|
||||||
|
err := decoder(hashes, &loadedHashes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.hashes.DecodeHashes(loadedHashes)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Server) HashLocalImages(opts Opts) {
|
func (s *Server) HashLocalImages(opts Opts) {
|
||||||
@ -769,13 +564,13 @@ func startServer(opts Opts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
server := Server{
|
server := Server{
|
||||||
// token: make(chan *oidc.Tokens),
|
|
||||||
quit: make(chan struct{}),
|
quit: make(chan struct{}),
|
||||||
signalQueue: make(chan os.Signal, 1),
|
signalQueue: make(chan os.Signal, 1),
|
||||||
readerQueue: make(chan string, 1120130), // Number gotten from checking queue size
|
readerQueue: make(chan string, 100),
|
||||||
hashingQueue: make(chan ch.Im),
|
hashingQueue: make(chan ch.Im),
|
||||||
mappingQueue: make(chan ch.Hash),
|
mappingQueue: make(chan ch.ImageHash),
|
||||||
mux: mux,
|
mux: mux,
|
||||||
httpServer: &http.Server{
|
httpServer: &http.Server{
|
||||||
Addr: ":8080",
|
Addr: ":8080",
|
||||||
@ -786,12 +581,16 @@ func startServer(opts Opts) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
Notify(server.signalQueue)
|
Notify(server.signalQueue)
|
||||||
imaging.SetMaxProcs(1)
|
var err error
|
||||||
fmt.Println("init hashes")
|
fmt.Println("init hashes")
|
||||||
server.initHashes()
|
server.hashes, err = ch.NewMapStorage()
|
||||||
// server.setupOauthHandlers()
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
fmt.Println("init handlers")
|
fmt.Println("init handlers")
|
||||||
server.setupAppHandlers()
|
server.setupAppHandlers()
|
||||||
|
|
||||||
fmt.Println("init hashers")
|
fmt.Println("init hashers")
|
||||||
rwg := sync.WaitGroup{}
|
rwg := sync.WaitGroup{}
|
||||||
for i := range 10 {
|
for i := range 10 {
|
||||||
@ -829,7 +628,7 @@ func startServer(opts Opts) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
|
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
|
||||||
}
|
}
|
||||||
fmt.Printf("Loaded embedded %s hashes ahashes: %d dhashes: %d phashes: %d\n", format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
|
fmt.Printf("Loaded embedded %s hashes\n", format)
|
||||||
} else {
|
} else {
|
||||||
if f, err := os.Open(opts.hashesPath); err == nil {
|
if f, err := os.Open(opts.hashesPath); err == nil {
|
||||||
var buf io.Reader = f
|
var buf io.Reader = f
|
||||||
@ -854,7 +653,7 @@ func startServer(opts Opts) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
|
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
|
||||||
}
|
}
|
||||||
fmt.Printf("Loaded hashes from %q %s hashes ahashes: %d dhashes: %d phashes: %d\n", opts.hashesPath, format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
|
fmt.Printf("Loaded hashes from %q %s\n", opts.hashesPath, format)
|
||||||
} else {
|
} else {
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
fmt.Println("No saved hashes to load")
|
fmt.Println("No saved hashes to load")
|
||||||
@ -867,7 +666,7 @@ func startServer(opts Opts) {
|
|||||||
server.HashLocalImages(opts)
|
server.HashLocalImages(opts)
|
||||||
|
|
||||||
fmt.Println("Listening on ", server.httpServer.Addr)
|
fmt.Println("Listening on ", server.httpServer.Addr)
|
||||||
err := server.httpServer.ListenAndServe()
|
err = server.httpServer.ListenAndServe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
|
@ -106,9 +106,9 @@ func main() {
|
|||||||
debugImage(debugim, 8, 8)
|
debugImage(debugim, 8, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
hash := ch.HashImage(ch.Im{Im: im, Format: format, Domain: ch.Source(ch.ComicVine), ID: "nothing"})
|
hash := ch.HashImage(ch.Im{Im: im, Format: format, ID: ch.ID{Domain: ch.Source(ch.ComicVine), ID: "nothing"}})
|
||||||
|
|
||||||
fmt.Println("ahash: ", hash.Ahash.BinString())
|
fmt.Println("ahash: ", goimagehash.NewImageHash(hash.Hashes[0].Hash, hash.Hashes[0].Kind).BinString())
|
||||||
fmt.Println("dhash: ", hash.Dhash.BinString())
|
fmt.Println("dhash: ", goimagehash.NewImageHash(hash.Hashes[1].Hash, hash.Hashes[1].Kind).BinString())
|
||||||
fmt.Println("phash: ", hash.Phash.BinString())
|
fmt.Println("phash: ", goimagehash.NewImageHash(hash.Hashes[2].Hash, hash.Hashes[2].Kind).BinString())
|
||||||
}
|
}
|
||||||
|
4
go.mod
4
go.mod
@ -6,8 +6,8 @@ toolchain go1.22.2
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00
|
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00
|
||||||
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09
|
|
||||||
github.com/fmartingr/go-comicinfo/v2 v2.0.2
|
github.com/fmartingr/go-comicinfo/v2 v2.0.2
|
||||||
|
github.com/kr/pretty v0.1.0
|
||||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8
|
github.com/mholt/archiver/v4 v4.0.0-alpha.8
|
||||||
golang.org/x/image v0.19.0
|
golang.org/x/image v0.19.0
|
||||||
golang.org/x/text v0.17.0
|
golang.org/x/text v0.17.0
|
||||||
@ -24,6 +24,7 @@ require (
|
|||||||
github.com/bodgit/sevenzip v1.3.0 // indirect
|
github.com/bodgit/sevenzip v1.3.0 // indirect
|
||||||
github.com/bodgit/windows v1.0.0 // indirect
|
github.com/bodgit/windows v1.0.0 // indirect
|
||||||
github.com/connesc/cipherio v0.2.1 // indirect
|
github.com/connesc/cipherio v0.2.1 // indirect
|
||||||
|
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 // indirect
|
||||||
github.com/dsnet/compress v0.0.1 // indirect
|
github.com/dsnet/compress v0.0.1 // indirect
|
||||||
github.com/golang/mock v1.6.0 // indirect
|
github.com/golang/mock v1.6.0 // indirect
|
||||||
github.com/golang/snappy v0.0.4 // indirect
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
@ -31,6 +32,7 @@ require (
|
|||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||||
github.com/klauspost/compress v1.15.9 // indirect
|
github.com/klauspost/compress v1.15.9 // indirect
|
||||||
github.com/klauspost/pgzip v1.2.5 // indirect
|
github.com/klauspost/pgzip v1.2.5 // indirect
|
||||||
|
github.com/kr/text v0.1.0 // indirect
|
||||||
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
|
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
|
||||||
github.com/pierrec/lz4/v4 v4.1.15 // indirect
|
github.com/pierrec/lz4/v4 v4.1.15 // indirect
|
||||||
github.com/therootcompany/xz v1.0.1 // indirect
|
github.com/therootcompany/xz v1.0.1 // indirect
|
||||||
|
2
go.sum
2
go.sum
@ -94,8 +94,10 @@ github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHU
|
|||||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
|
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
|
||||||
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||||
|
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk=
|
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk=
|
||||||
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||||
|
74
hashing.go
74
hashing.go
@ -50,35 +50,49 @@ type Match struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ID struct {
|
type ID struct {
|
||||||
Domain, ID string
|
Domain Source
|
||||||
|
ID string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Result struct {
|
type Result struct {
|
||||||
IDs []string // domain:id
|
IDs IDList
|
||||||
Distance int
|
Distance int
|
||||||
Hash ImageHash
|
Hash Hash
|
||||||
}
|
}
|
||||||
|
|
||||||
type Im struct {
|
type Im struct {
|
||||||
Im image.Image
|
Im image.Image
|
||||||
Format string
|
Format string
|
||||||
Domain Source
|
Path string
|
||||||
ID, Path string
|
ID ID
|
||||||
}
|
|
||||||
|
|
||||||
type Hash struct {
|
|
||||||
Ahash *goimagehash.ImageHash
|
|
||||||
Dhash *goimagehash.ImageHash
|
|
||||||
Phash *goimagehash.ImageHash
|
|
||||||
Domain Source
|
|
||||||
ID string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ImageHash struct {
|
type ImageHash struct {
|
||||||
|
Hashes []Hash
|
||||||
|
ID ID
|
||||||
|
}
|
||||||
|
|
||||||
|
type Hash struct {
|
||||||
Hash uint64
|
Hash uint64
|
||||||
Kind goimagehash.Kind
|
Kind goimagehash.Kind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type SavedHashes map[Source]map[string][3]uint64
|
||||||
|
|
||||||
|
type NewIDs struct {
|
||||||
|
OldID ID
|
||||||
|
NewID ID
|
||||||
|
}
|
||||||
|
|
||||||
|
type HashStorage interface {
|
||||||
|
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
|
||||||
|
MapHashes(ImageHash)
|
||||||
|
DecodeHashes(hashes SavedHashes) error
|
||||||
|
EncodeHashes() (SavedHashes, error)
|
||||||
|
AssociateIDs(newIDs []NewIDs)
|
||||||
|
GetIDs(id ID) IDList
|
||||||
|
}
|
||||||
|
|
||||||
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
||||||
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
||||||
for _, storedHash := range hashes {
|
for _, storedHash := range hashes {
|
||||||
@ -98,47 +112,49 @@ func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
|
|||||||
return slices.Insert(slice, index, item)
|
return slices.Insert(slice, index, item)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
|
||||||
|
index, itemFound := slices.BinarySearch(slice, item)
|
||||||
|
if itemFound {
|
||||||
|
return slice, index
|
||||||
|
}
|
||||||
|
return slices.Insert(slice, index, item), index
|
||||||
|
}
|
||||||
|
|
||||||
func MemStats() uint64 {
|
func MemStats() uint64 {
|
||||||
var m runtime.MemStats
|
var m runtime.MemStats
|
||||||
runtime.ReadMemStats(&m)
|
runtime.ReadMemStats(&m)
|
||||||
return m.Alloc
|
return m.Alloc
|
||||||
}
|
}
|
||||||
|
|
||||||
func HashImage(i Im) Hash {
|
func HashImage(i Im) ImageHash {
|
||||||
if i.Format == "webp" {
|
if i.Format == "webp" {
|
||||||
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
|
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
err error = nil
|
err error
|
||||||
ahash *goimagehash.ImageHash
|
|
||||||
dhash *goimagehash.ImageHash
|
|
||||||
phash *goimagehash.ImageHash
|
|
||||||
)
|
)
|
||||||
|
|
||||||
ahash, err = goimagehash.AverageHash(i.Im)
|
ahash, err := goimagehash.AverageHash(i.Im)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
|
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
|
||||||
log.Println(msg)
|
log.Println(msg)
|
||||||
return Hash{}
|
return ImageHash{}
|
||||||
}
|
}
|
||||||
dhash, err = goimagehash.DifferenceHash(i.Im)
|
dhash, err := goimagehash.DifferenceHash(i.Im)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
|
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
|
||||||
log.Println(msg)
|
log.Println(msg)
|
||||||
return Hash{}
|
return ImageHash{}
|
||||||
}
|
}
|
||||||
phash, err = goimagehash.PerceptionHash(i.Im)
|
phash, err := goimagehash.PerceptionHash(i.Im)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
msg := fmt.Sprintf("Failed to phash Image: %s", err)
|
msg := fmt.Sprintf("Failed to phash Image: %s", err)
|
||||||
log.Println(msg)
|
log.Println(msg)
|
||||||
return Hash{}
|
return ImageHash{}
|
||||||
}
|
}
|
||||||
return Hash{
|
return ImageHash{
|
||||||
Ahash: ahash,
|
Hashes: []Hash{{ahash.GetHash(), ahash.GetKind()}, {dhash.GetHash(), dhash.GetKind()}, {phash.GetHash(), phash.GetKind()}},
|
||||||
Dhash: dhash,
|
|
||||||
Phash: phash,
|
|
||||||
Domain: i.Domain,
|
|
||||||
ID: i.ID,
|
ID: i.ID,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
274
map.go
Normal file
274
map.go
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
package ch
|
||||||
|
|
||||||
|
import (
|
||||||
|
"cmp"
|
||||||
|
"math/bits"
|
||||||
|
"slices"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"gitea.narnian.us/lordwelch/goimagehash"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mapStorage struct {
|
||||||
|
hashMutex sync.RWMutex
|
||||||
|
partialHash [3][8]map[uint8][]int
|
||||||
|
// partialAhash [8]map[uint8][]int
|
||||||
|
// partialDhash [8]map[uint8][]int
|
||||||
|
// partialPhash [8]map[uint8][]int
|
||||||
|
|
||||||
|
ids []ID
|
||||||
|
|
||||||
|
idToHash map[int][3][]int
|
||||||
|
|
||||||
|
hashes [3][]uint64
|
||||||
|
// ahashes []uint64
|
||||||
|
// dhashes []uint64
|
||||||
|
// phashes []uint64
|
||||||
|
|
||||||
|
hashToID [3]map[int][]int
|
||||||
|
// ahashToID map[int][]int
|
||||||
|
// dhashToID map[int][]int
|
||||||
|
// phashToID map[int][]int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) addID(id ID) int {
|
||||||
|
index, itemFound := slices.BinarySearchFunc(m.ids, id, func(existing, new ID) int {
|
||||||
|
return cmp.Or(
|
||||||
|
cmp.Compare(existing.Domain, new.Domain),
|
||||||
|
cmp.Compare(existing.ID, new.ID),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
if itemFound {
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
m.ids = slices.Insert(m.ids, index, id)
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) getID(id ID) (int, bool) {
|
||||||
|
return slices.BinarySearchFunc(m.ids, id, func(existing, new ID) int {
|
||||||
|
return cmp.Or(
|
||||||
|
cmp.Compare(existing.Domain, new.Domain),
|
||||||
|
cmp.Compare(existing.ID, new.ID),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64, hashes []int) []Result {
|
||||||
|
hashType := int(hashKind) - 1
|
||||||
|
matchingHashes := make([]Result, 0, len(hashes)/2) // hope that we don't need all of them
|
||||||
|
for _, idx := range hashes {
|
||||||
|
storedHash := m.hashes[hashType][idx]
|
||||||
|
distance := bits.OnesCount64(searchHash ^ storedHash)
|
||||||
|
if distance <= maxDistance {
|
||||||
|
ids := make(IDList)
|
||||||
|
for _, idLocation := range m.hashToID[hashType][idx] {
|
||||||
|
ids[m.ids[idLocation].Domain] = Insert(ids[m.ids[idLocation].Domain], m.ids[idLocation].ID)
|
||||||
|
}
|
||||||
|
matchingHashes = append(matchingHashes, Result{ids, distance, Hash{storedHash, hashKind}})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matchingHashes
|
||||||
|
}
|
||||||
|
func (m *mapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||||
|
var foundMatches []Result
|
||||||
|
m.hashMutex.RLock()
|
||||||
|
defer m.hashMutex.RUnlock()
|
||||||
|
|
||||||
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||||
|
for _, hash := range hashes {
|
||||||
|
hashType := int(hash.Kind) - 1
|
||||||
|
if hashLocation, found := slices.BinarySearch(m.hashes[hashType], hash.Hash); found {
|
||||||
|
idlist := make(IDList)
|
||||||
|
for _, idLocation := range m.hashToID[hashType][hashLocation] {
|
||||||
|
|
||||||
|
for _, hashLocation := range m.idToHash[idLocation][0] {
|
||||||
|
for _, foundIDLocation := range m.hashToID[hashType][hashLocation] {
|
||||||
|
foundID := m.ids[foundIDLocation]
|
||||||
|
idlist[foundID.Domain] = Insert(idlist[foundID.Domain], foundID.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(idlist) > 0 {
|
||||||
|
foundMatches = append(foundMatches, Result{
|
||||||
|
Distance: 0,
|
||||||
|
Hash: hash,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have exact matches don't bother with other matches
|
||||||
|
if len(foundMatches) > 0 && exactOnly {
|
||||||
|
return foundMatches, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foundHashes := make(map[uint64]struct{})
|
||||||
|
for _, hash := range hashes {
|
||||||
|
if hash.Hash == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
hashType := int(hash.Kind) - 1
|
||||||
|
for i, partialHash := range SplitHash(hash.Hash) {
|
||||||
|
for _, match := range m.Atleast(hash.Kind, max, hash.Hash, m.partialHash[hashType][i][partialHash]) {
|
||||||
|
_, alreadyMatched := foundHashes[match.Hash.Hash]
|
||||||
|
if alreadyMatched {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
foundMatches = append(foundMatches, match)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return foundMatches, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) MapHashes(hash ImageHash) {
|
||||||
|
|
||||||
|
idIndex := m.addID(hash.ID)
|
||||||
|
idHashes := m.idToHash[idIndex]
|
||||||
|
for _, hash := range hash.Hashes {
|
||||||
|
var (
|
||||||
|
hashIndex int
|
||||||
|
hashType = int(hash.Kind) - 1
|
||||||
|
)
|
||||||
|
m.hashes[hashType], hashIndex = InsertIdx(m.hashes[hashType], hash.Hash)
|
||||||
|
for i, partialHash := range SplitHash(hash.Hash) {
|
||||||
|
m.partialHash[hashType][i][partialHash] = append(m.partialHash[hashType][i][partialHash], hashIndex)
|
||||||
|
}
|
||||||
|
idHashes[hashType] = Insert(idHashes[hashType], hashIndex)
|
||||||
|
m.hashToID[hashType][hashIndex] = Insert(m.hashToID[hashType][hashIndex], idIndex)
|
||||||
|
}
|
||||||
|
m.idToHash[idIndex] = idHashes
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||||
|
|
||||||
|
for _, sourceHashes := range hashes {
|
||||||
|
m.hashes[0] = make([]uint64, 0, len(sourceHashes))
|
||||||
|
m.hashes[1] = make([]uint64, 0, len(sourceHashes))
|
||||||
|
m.hashes[2] = make([]uint64, 0, len(sourceHashes))
|
||||||
|
break
|
||||||
|
}
|
||||||
|
for domain, sourceHashes := range hashes {
|
||||||
|
for id, h := range sourceHashes {
|
||||||
|
m.ids = append(m.ids, ID{Domain: Source(domain), ID: id})
|
||||||
|
|
||||||
|
for _, hash := range []Hash{Hash{h[0], goimagehash.AHash}, Hash{h[1], goimagehash.DHash}, Hash{h[2], goimagehash.PHash}} {
|
||||||
|
var (
|
||||||
|
hashType = int(hash.Kind) - 1
|
||||||
|
)
|
||||||
|
m.hashes[hashType] = append(m.hashes[hashType], hash.Hash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.SortFunc(m.ids, func(existing, new ID) int {
|
||||||
|
return cmp.Or(
|
||||||
|
cmp.Compare(existing.Domain, new.Domain),
|
||||||
|
cmp.Compare(existing.ID, new.ID),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
slices.Sort(m.hashes[0])
|
||||||
|
slices.Sort(m.hashes[1])
|
||||||
|
slices.Sort(m.hashes[2])
|
||||||
|
for domain, sourceHashes := range hashes {
|
||||||
|
for id, h := range sourceHashes {
|
||||||
|
m.MapHashes(ImageHash{
|
||||||
|
Hashes: []Hash{{h[0], goimagehash.AHash}, {h[1], goimagehash.DHash}, {h[2], goimagehash.PHash}},
|
||||||
|
ID: ID{Domain: Source(domain), ID: id},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) EncodeHashes() (SavedHashes, error) {
|
||||||
|
hashes := make(SavedHashes)
|
||||||
|
for idLocation, hashLocation := range m.idToHash {
|
||||||
|
id := m.ids[idLocation]
|
||||||
|
_, ok := hashes[id.Domain]
|
||||||
|
if !ok {
|
||||||
|
hashes[id.Domain] = make(map[string][3]uint64)
|
||||||
|
}
|
||||||
|
// TODO: Add all hashes. Currently saved hashes does not allow multiple IDs for a single hash
|
||||||
|
hashes[id.Domain][id.ID] = [3]uint64{
|
||||||
|
m.hashes[0][hashLocation[0][0]],
|
||||||
|
m.hashes[1][hashLocation[1][0]],
|
||||||
|
m.hashes[2][hashLocation[2][0]],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hashes, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) AssociateIDs(newids []NewIDs) {
|
||||||
|
for _, ids := range newids {
|
||||||
|
oldIDLocation, found := m.getID(ids.OldID)
|
||||||
|
if !found {
|
||||||
|
msg := "No IDs belonging to " + ids.OldID.Domain + "exist on this server"
|
||||||
|
panic(msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
newIDLocation := m.addID(ids.NewID)
|
||||||
|
|
||||||
|
for _, hashType := range []int{int(goimagehash.AHash), int(goimagehash.DHash), int(goimagehash.PHash)} {
|
||||||
|
for _, hashLocation := range m.idToHash[oldIDLocation][hashType] {
|
||||||
|
m.hashToID[hashType][hashLocation] = Insert(m.hashToID[hashType][hashLocation], newIDLocation)
|
||||||
|
idHashes := m.idToHash[newIDLocation]
|
||||||
|
idHashes[hashType] = Insert(idHashes[hashType], hashLocation)
|
||||||
|
m.idToHash[newIDLocation] = idHashes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *mapStorage) GetIDs(id ID) IDList {
|
||||||
|
idIndex, found := m.getID(id)
|
||||||
|
if !found {
|
||||||
|
msg := "No IDs belonging to " + id.Domain + "exist on this server"
|
||||||
|
panic(msg)
|
||||||
|
}
|
||||||
|
ids := make(IDList)
|
||||||
|
|
||||||
|
for _, hashLocation := range m.idToHash[idIndex][0] {
|
||||||
|
for _, foundIDLocation := range m.hashToID[0][hashLocation] {
|
||||||
|
foundID := m.ids[foundIDLocation]
|
||||||
|
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, hashLocation := range m.idToHash[idIndex][1] {
|
||||||
|
for _, foundIDLocation := range m.hashToID[1][hashLocation] {
|
||||||
|
foundID := m.ids[foundIDLocation]
|
||||||
|
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, hashLocation := range m.idToHash[idIndex][2] {
|
||||||
|
for _, foundIDLocation := range m.hashToID[2][hashLocation] {
|
||||||
|
foundID := m.ids[foundIDLocation]
|
||||||
|
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMapStorage() (HashStorage, error) {
|
||||||
|
storage := &mapStorage{
|
||||||
|
hashMutex: sync.RWMutex{},
|
||||||
|
idToHash: make(map[int][3][]int),
|
||||||
|
hashToID: [3]map[int][]int{
|
||||||
|
make(map[int][]int),
|
||||||
|
make(map[int][]int),
|
||||||
|
make(map[int][]int),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for i := range storage.partialHash[0] {
|
||||||
|
storage.partialHash[0][i] = make(map[uint8][]int)
|
||||||
|
}
|
||||||
|
for i := range storage.partialHash[1] {
|
||||||
|
storage.partialHash[1][i] = make(map[uint8][]int)
|
||||||
|
}
|
||||||
|
for i := range storage.partialHash[2] {
|
||||||
|
storage.partialHash[2][i] = make(map[uint8][]int)
|
||||||
|
}
|
||||||
|
return storage, nil
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user