Make runtime hash storage modular
This commit is contained in:
@ -29,9 +29,10 @@ import (
_ ""
_ ""
_ ""
@ -39,37 +40,20 @@ import (
ch ""
// ""
// ""
// httphelper ""
// ""
type Server struct {
httpServer *http.Server
mux *http.ServeMux
BaseURL *url.URL
// token chan<- *oidc.Tokens
// Partial hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers
PartialAhash [8]map[uint8][]uint64
PartialDhash [8]map[uint8][]uint64
PartialPhash [8]map[uint8][]uint64
FullAhash map[uint64][]string // Maps ahash's to lists of ID's domain:id
FullDhash map[uint64][]string // Maps dhash's to lists of ID's domain:id
FullPhash map[uint64][]string // Maps phash's to lists of ID's domain:id
ids map[ch.Source]map[string]struct{}
hashMutex sync.RWMutex
httpServer *http.Server
mux *http.ServeMux
BaseURL *url.URL
hashes ch.HashStorage
quit chan struct{}
signalQueue chan os.Signal
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.Hash
mappingQueue chan ch.ImageHash
// var key = []byte(uuid.New().String())[:16]
type savedHashes map[ch.Source]map[string][3]uint64
type Format int
const (
@ -135,6 +119,8 @@ func main() {
opts.sqlitePath, _ = filepath.Abs(opts.sqlitePath)
@ -224,25 +210,25 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
writeJson(w, http.StatusBadRequest, result{Msg: msg})
if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
msg := "No IDs belonging to " + domain + "exist on this server"
writeJson(w, http.StatusBadRequest, result{Msg: msg})
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
// msg := "No IDs belonging to " + domain + "exist on this server"
// log.Println(msg)
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
// }
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
found := false
for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
for i, idlist := range hash {
if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
found = true
hash[i] = ch.Insert(idlist, newDomain+":"+newID)
if _, ok := s.ids[ch.Source(newDomain)]; !ok {
s.ids[ch.Source(newDomain)] = make(map[string]struct{})
s.ids[ch.Source(newDomain)][newID] = struct{}{}
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
// for i, idlist := range hash {
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
// found = true
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
// }
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
// }
// }
// }
if found {
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
} else {
@ -250,70 +236,6 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
func (s *Server) getMatches(ahash, dhash, phash uint64, max int, skipNonExact bool) []ch.Result {
var foundMatches []ch.Result
defer s.hashMutex.RUnlock()
if skipNonExact { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
if matchedResults, ok := s.FullAhash[ahash]; ok && ahash != 0 {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}})
if matchedResults, ok := s.FullDhash[dhash]; ok && dhash != 0 {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: dhash, Kind: goimagehash.DHash}})
if matchedResults, ok := s.FullPhash[phash]; ok && phash != 0 {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: phash, Kind: goimagehash.PHash}})
// If we have exact matches don't bother with other matches
if len(foundMatches) > 0 && skipNonExact {
return foundMatches
foundHashes := make(map[uint64]struct{})
if ahash != 0 {
for i, partialHash := range ch.SplitHash(ahash) {
for _, match := range ch.Atleast(max, ahash, s.PartialAhash[i][partialHash]) {
_, alreadyMatched := foundHashes[match.Hash]
if matchedResults, ok := s.FullAhash[match.Hash]; ok && !alreadyMatched {
foundHashes[match.Hash] = struct{}{}
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}})
foundHashes = make(map[uint64]struct{})
if dhash != 0 {
for i, partialHash := range ch.SplitHash(dhash) {
for _, match := range ch.Atleast(max, dhash, s.PartialDhash[i][partialHash]) {
_, alreadyMatched := foundHashes[match.Hash]
if matchedResults, ok := s.FullDhash[match.Hash]; ok && !alreadyMatched {
foundHashes[match.Hash] = struct{}{}
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}})
foundHashes = make(map[uint64]struct{})
if phash != 0 {
for i, partialHash := range ch.SplitHash(phash) {
for _, match := range ch.Atleast(max, phash, s.PartialPhash[i][partialHash]) {
_, alreadyMatched := foundHashes[match.Hash]
if matchedResults, ok := s.FullPhash[match.Hash]; ok && !alreadyMatched {
foundHashes[match.Hash] = struct{}{}
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}})
return foundMatches
type SimpleResult struct {
Distance int
IDList ch.IDList
@ -323,67 +245,31 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
simpleResult := make([]SimpleResult, 0, len(fullResults))
slices.SortFunc(fullResults, func(a, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
// Deduplicate IDs
idToDistance := make(map[string]int)
distance := make(map[int]SimpleResult)
for _, fullResult := range fullResults {
for _, id := range fullResult.IDs {
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
idToDistance[id] = fullResult.Distance
simple, ok := distance[fullResult.Distance]
if !ok {
simple.IDList = make(ch.IDList)
for source, ids := range fullResult.IDs {
for _, id := range ids {
simple.IDList[source] = ch.Insert(simple.IDList[source], id)
// Group by distance
distanceMap := make(map[int]SimpleResult)
for id, distance := range idToDistance {
var (
sr SimpleResult
ok bool
if sr, ok = distanceMap[distance]; !ok {
sr.IDList = make(ch.IDList)
sourceID := strings.SplitN(id, ":", 2)
sr.Distance = distance
sr.IDList[ch.Source(sourceID[0])] = append(sr.IDList[ch.Source(sourceID[0])], sourceID[1])
distanceMap[distance] = sr
// turn into array
for _, sr := range distanceMap {
for _, sr := range distance {
simpleResult = append(simpleResult, sr)
return simpleResult
type APIResult struct {
IDList ch.IDList
Distance int
Hash ch.ImageHash
func getResults(fullResults []ch.Result) []APIResult {
apiResults := make([]APIResult, 0, len(fullResults))
for _, res := range fullResults {
idlist := make(ch.IDList)
for _, id := range res.IDs {
sourceID := strings.SplitN(id, ":", 2)
idlist[ch.Source(sourceID[0])] = append(idlist[ch.Source(sourceID[0])], sourceID[1])
apiResults = append(apiResults,
Distance: res.Distance,
Hash: res.Hash,
IDList: idlist,
return apiResults
type result struct {
Results any `json:"results,omitempty"`
Msg string `json:"msg,omitempty"`
@ -411,19 +297,19 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
skipNonExact = strings.ToLower(strings.TrimSpace(values.Get("skipNonExact"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
maxStr = strings.TrimSpace(values.Get("max"))
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
max int = 8
max_tmp int
err error
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
@ -455,13 +341,24 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
matches := s.getMatches(ahash, dhash, phash, max, skipNonExact)
matches, err := s.hashes.GetMatches([]ch.Hash{{ahash, goimagehash.AHash}, {dhash, goimagehash.DHash}, {phash, goimagehash.PHash}}, max, exactOnly)
if len(matches) > 0 {
var msg string = ""
if err != nil {
msg = err.Error()
if simple {
writeJson(w, http.StatusOK, result{Results: getSimpleResults(matches)})
writeJson(w, http.StatusOK, result{
Results: getSimpleResults(matches),
Msg: msg,
writeJson(w, http.StatusOK, result{Results: getResults(matches)})
writeJson(w, http.StatusOK, result{
Results: matches,
Msg: msg,
@ -503,69 +400,14 @@ func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
s.hashingQueue <- ch.Im{Im: i, Format: format, Domain: ch.Source(domain), ID: ID, Path: ""}
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}, Path: ""}
writeJson(w, http.StatusOK, result{Msg: "Success"})
func (s *Server) MapHashes(hash ch.Hash) {
defer s.hashMutex.Unlock()
s.mapHashes(hash.Ahash.GetHash(), hash.Dhash.GetHash(), hash.Phash.GetHash(), hash.Domain, hash.ID)
func (s *Server) mapHashes(ahash, dhash, phash uint64, domain ch.Source, id string) {
if _, ok := s.ids[domain]; !ok {
s.ids[domain] = make(map[string]struct{})
s.ids[domain][id] = struct{}{}
if _, ok := s.FullAhash[ahash]; !ok {
s.FullAhash[ahash] = make([]string, 0, 3)
s.FullAhash[ahash] = ch.Insert(s.FullAhash[ahash], string(domain)+":"+id)
if _, ok := s.FullDhash[dhash]; !ok {
s.FullDhash[dhash] = make([]string, 0, 3)
s.FullDhash[dhash] = ch.Insert(s.FullDhash[dhash], string(domain)+":"+id)
if _, ok := s.FullPhash[phash]; !ok {
s.FullPhash[phash] = make([]string, 0, 3)
s.FullPhash[phash] = ch.Insert(s.FullPhash[phash], string(domain)+":"+id)
for i, partialHash := range ch.SplitHash(ahash) {
s.PartialAhash[i][partialHash] = append(s.PartialAhash[i][partialHash], ahash)
for i, partialHash := range ch.SplitHash(dhash) {
s.PartialDhash[i][partialHash] = append(s.PartialDhash[i][partialHash], dhash)
for i, partialHash := range ch.SplitHash(phash) {
s.PartialPhash[i][partialHash] = append(s.PartialPhash[i][partialHash], phash)
func (s *Server) initHashes() {
for i := range s.PartialAhash {
s.PartialAhash[i] = make(map[uint8][]uint64)
for i := range s.PartialDhash {
s.PartialDhash[i] = make(map[uint8][]uint64)
for i := range s.PartialPhash {
s.PartialPhash[i] = make(map[uint8][]uint64)
s.FullAhash = make(map[uint64][]string)
s.FullDhash = make(map[uint64][]string)
s.FullPhash = make(map[uint64][]string)
s.ids = make(map[ch.Source]map[string]struct{})
func (s *Server) mapper(done func()) {
defer done()
for hash := range s.mappingQueue {
@ -575,7 +417,7 @@ func (s *Server) hasher(workerID int, done func()) {
start := time.Now()
hash := ch.HashImage(image)
if hash.Domain == "" {
if hash.ID.Domain == "" || hash.ID.ID == "" {
@ -588,7 +430,7 @@ func (s *Server) hasher(workerID int, done func()) {
elapsed := time.Since(start)
log.Printf("Hashing took %v: worker: %v. path: %s ahash: %064b id: %s\n", elapsed, workerID, image.Path, hash.Ahash.GetHash(), hash.ID)
log.Printf("Hashing took %v: worker: %v. path: %s %s: %064b id: %s\n", elapsed, workerID, image.Path, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
@ -605,7 +447,11 @@ func (s *Server) reader(workerID int, done func()) {
im := ch.Im{Im: i, Format: format, Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path)), Path: path}
im := ch.Im{
Im: i, Format: format,
ID: ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))},
Path: path,
select {
case <-s.quit:
log.Println("Recieved quit")
@ -616,94 +462,43 @@ func (s *Server) reader(workerID int, done func()) {
func (s *Server) encodeHashes(e Encoder) ([]byte, error) {
hashes := make(savedHashes)
for source, ids := range s.ids {
hashes[source] = make(map[string][3]uint64, len(ids))
for hash, idlist := range s.FullAhash {
for _, id := range idlist {
sourceID := strings.SplitN(id, ":", 2)
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
h[0] = hash
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
for hash, idlist := range s.FullDhash {
for _, id := range idlist {
sourceID := strings.SplitN(id, ":", 2)
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
h[1] = hash
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
for hash, idlist := range s.FullPhash {
for _, id := range idlist {
sourceID := strings.SplitN(id, ":", 2)
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
h[2] = hash
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
return e(hashes)
// EncodeHashes must have a lock to s.hashMutex
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
var encoder Encoder
switch format {
case Msgpack:
return s.encodeHashes(msgpack.Marshal)
encoder = msgpack.Marshal
case JSON:
return s.encodeHashes(json.Marshal)
encoder = json.Marshal
return nil, fmt.Errorf("Unknown format: %v", format)
func (s *Server) decodeHashes(d Decoder, hashes []byte) error {
loadedHashes := make(savedHashes)
err := d(hashes, &loadedHashes)
hashes, err := s.hashes.EncodeHashes()
if err != nil {
return err
return nil, err
for domain, ids := range loadedHashes {
for id := range ids {
if _, ok := s.ids[domain]; ok {
s.ids[domain][id] = struct{}{}
} else {
s.ids[domain] = make(map[string]struct{})
for _, sourceHashes := range loadedHashes {
s.FullAhash = make(map[uint64][]string, len(sourceHashes))
s.FullDhash = make(map[uint64][]string, len(sourceHashes))
s.FullPhash = make(map[uint64][]string, len(sourceHashes))
for domain, sourceHashes := range loadedHashes {
for id, h := range sourceHashes {
s.mapHashes(h[0], h[1], h[2], domain, id)
return nil
return encoder(hashes)
// DecodeHashes must have a lock to s.hashMutex
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
var decoder Decoder
switch format {
case Msgpack:
return s.decodeHashes(msgpack.Unmarshal, hashes)
decoder = msgpack.Unmarshal
case JSON:
return s.decodeHashes(json.Unmarshal, hashes)
decoder = json.Unmarshal
return fmt.Errorf("Unknown format: %v", format)
loadedHashes := make(ch.SavedHashes)
err := decoder(hashes, &loadedHashes)
if err != nil {
return err
return s.hashes.DecodeHashes(loadedHashes)
func (s *Server) HashLocalImages(opts Opts) {
@ -769,13 +564,13 @@ func startServer(opts Opts) {
mux := http.NewServeMux()
server := Server{
// token: make(chan *oidc.Tokens),
quit: make(chan struct{}),
signalQueue: make(chan os.Signal, 1),
readerQueue: make(chan string, 1120130), // Number gotten from checking queue size
readerQueue: make(chan string, 100),
hashingQueue: make(chan ch.Im),
mappingQueue: make(chan ch.Hash),
mappingQueue: make(chan ch.ImageHash),
mux: mux,
httpServer: &http.Server{
Addr: ":8080",
@ -786,12 +581,16 @@ func startServer(opts Opts) {
var err error
fmt.Println("init hashes")
// server.setupOauthHandlers()
server.hashes, err = ch.NewMapStorage()
if err != nil {
fmt.Println("init handlers")
fmt.Println("init hashers")
rwg := sync.WaitGroup{}
for i := range 10 {
@ -829,7 +628,7 @@ func startServer(opts Opts) {
if err != nil {
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
fmt.Printf("Loaded embedded %s hashes ahashes: %d dhashes: %d phashes: %d\n", format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
fmt.Printf("Loaded embedded %s hashes\n", format)
} else {
if f, err := os.Open(opts.hashesPath); err == nil {
var buf io.Reader = f
@ -854,7 +653,7 @@ func startServer(opts Opts) {
if err != nil {
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
fmt.Printf("Loaded hashes from %q %s hashes ahashes: %d dhashes: %d phashes: %d\n", opts.hashesPath, format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
fmt.Printf("Loaded hashes from %q %s\n", opts.hashesPath, format)
} else {
if errors.Is(err, os.ErrNotExist) {
fmt.Println("No saved hashes to load")
@ -867,7 +666,7 @@ func startServer(opts Opts) {
fmt.Println("Listening on ", server.httpServer.Addr)
err := server.httpServer.ListenAndServe()
err = server.httpServer.ListenAndServe()
if err != nil {
@ -106,9 +106,9 @@ func main() {
debugImage(debugim, 8, 8)
hash := ch.HashImage(ch.Im{Im: im, Format: format, Domain: ch.Source(ch.ComicVine), ID: "nothing"})
hash := ch.HashImage(ch.Im{Im: im, Format: format, ID: ch.ID{Domain: ch.Source(ch.ComicVine), ID: "nothing"}})
fmt.Println("ahash: ", hash.Ahash.BinString())
fmt.Println("dhash: ", hash.Dhash.BinString())
fmt.Println("phash: ", hash.Phash.BinString())
fmt.Println("ahash: ", goimagehash.NewImageHash(hash.Hashes[0].Hash, hash.Hashes[0].Kind).BinString())
fmt.Println("dhash: ", goimagehash.NewImageHash(hash.Hashes[1].Hash, hash.Hashes[1].Kind).BinString())
fmt.Println("phash: ", goimagehash.NewImageHash(hash.Hashes[2].Hash, hash.Hashes[2].Kind).BinString())
@ -6,8 +6,8 @@ toolchain go1.22.2
require (
|||| v0.0.0-20240812025715-33ff96e45f00
|||| v1.6.3-0.20201218193011-d40f48ce0f09
|||| v2.0.2
|||| v0.1.0
|||| v4.0.0-alpha.8
|||| v0.19.0
|||| v0.17.0
@ -24,6 +24,7 @@ require (
|||| v1.3.0 // indirect
|||| v1.0.0 // indirect
|||| v0.2.1 // indirect
|||| v1.6.3-0.20201218193011-d40f48ce0f09 // indirect
|||| v0.0.1 // indirect
|||| v1.6.0 // indirect
|||| v0.0.4 // indirect
@ -31,6 +32,7 @@ require (
|||| v1.1.1 // indirect
|||| v1.15.9 // indirect
|||| v1.2.5 // indirect
|||| v0.1.0 // indirect
|||| v2.0.0-beta.2 // indirect
|||| v4.1.15 // indirect
|||| v1.0.1 // indirect
@ -94,8 +94,10 @@ v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHU
|||| v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|||| v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
|||| v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|||| v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|||| v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|||| v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|||| v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|||| v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|||| v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk=
|||| v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
@ -50,35 +50,49 @@ type Match struct {
type ID struct {
Domain, ID string
type Result struct {
IDs []string // domain:id
Distance int
Hash ImageHash
type Im struct {
Im image.Image
Format string
Domain Source
ID, Path string
type Hash struct {
Ahash *goimagehash.ImageHash
Dhash *goimagehash.ImageHash
Phash *goimagehash.ImageHash
Domain Source
ID string
type Result struct {
IDs IDList
Distance int
Hash Hash
type Im struct {
Im image.Image
Format string
Path string
type ImageHash struct {
Hashes []Hash
type Hash struct {
Hash uint64
Kind goimagehash.Kind
type SavedHashes map[Source]map[string][3]uint64
type NewIDs struct {
type HashStorage interface {
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
DecodeHashes(hashes SavedHashes) error
EncodeHashes() (SavedHashes, error)
AssociateIDs(newIDs []NewIDs)
GetIDs(id ID) IDList
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
for _, storedHash := range hashes {
@ -98,47 +112,49 @@ func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
return slices.Insert(slice, index, item)
func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
index, itemFound := slices.BinarySearch(slice, item)
if itemFound {
return slice, index
return slices.Insert(slice, index, item), index
func MemStats() uint64 {
var m runtime.MemStats
return m.Alloc
func HashImage(i Im) Hash {
func HashImage(i Im) ImageHash {
if i.Format == "webp" {
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
var (
err error = nil
ahash *goimagehash.ImageHash
dhash *goimagehash.ImageHash
phash *goimagehash.ImageHash
err error
ahash, err = goimagehash.AverageHash(i.Im)
ahash, err := goimagehash.AverageHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
return Hash{}
return ImageHash{}
dhash, err = goimagehash.DifferenceHash(i.Im)
dhash, err := goimagehash.DifferenceHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
return Hash{}
return ImageHash{}
phash, err = goimagehash.PerceptionHash(i.Im)
phash, err := goimagehash.PerceptionHash(i.Im)
if err != nil {
msg := fmt.Sprintf("Failed to phash Image: %s", err)
return Hash{}
return ImageHash{}
return Hash{
Ahash: ahash,
Dhash: dhash,
Phash: phash,
Domain: i.Domain,
return ImageHash{
Hashes: []Hash{{ahash.GetHash(), ahash.GetKind()}, {dhash.GetHash(), dhash.GetKind()}, {phash.GetHash(), phash.GetKind()}},
ID: i.ID,
Normal file
Normal file
@ -0,0 +1,274 @@
package ch
import (
type mapStorage struct {
hashMutex sync.RWMutex
partialHash [3][8]map[uint8][]int
// partialAhash [8]map[uint8][]int
// partialDhash [8]map[uint8][]int
// partialPhash [8]map[uint8][]int
ids []ID
idToHash map[int][3][]int
hashes [3][]uint64
// ahashes []uint64
// dhashes []uint64
// phashes []uint64
hashToID [3]map[int][]int
// ahashToID map[int][]int
// dhashToID map[int][]int
// phashToID map[int][]int
func (m *mapStorage) addID(id ID) int {
index, itemFound := slices.BinarySearchFunc(m.ids, id, func(existing, new ID) int {
return cmp.Or(
cmp.Compare(existing.Domain, new.Domain),
cmp.Compare(existing.ID, new.ID),
if itemFound {
return index
m.ids = slices.Insert(m.ids, index, id)
return index
func (m *mapStorage) getID(id ID) (int, bool) {
return slices.BinarySearchFunc(m.ids, id, func(existing, new ID) int {
return cmp.Or(
cmp.Compare(existing.Domain, new.Domain),
cmp.Compare(existing.ID, new.ID),
func (m *mapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64, hashes []int) []Result {
hashType := int(hashKind) - 1
matchingHashes := make([]Result, 0, len(hashes)/2) // hope that we don't need all of them
for _, idx := range hashes {
storedHash := m.hashes[hashType][idx]
distance := bits.OnesCount64(searchHash ^ storedHash)
if distance <= maxDistance {
ids := make(IDList)
for _, idLocation := range m.hashToID[hashType][idx] {
ids[m.ids[idLocation].Domain] = Insert(ids[m.ids[idLocation].Domain], m.ids[idLocation].ID)
matchingHashes = append(matchingHashes, Result{ids, distance, Hash{storedHash, hashKind}})
return matchingHashes
func (m *mapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var foundMatches []Result
defer m.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes {
hashType := int(hash.Kind) - 1
if hashLocation, found := slices.BinarySearch(m.hashes[hashType], hash.Hash); found {
idlist := make(IDList)
for _, idLocation := range m.hashToID[hashType][hashLocation] {
for _, hashLocation := range m.idToHash[idLocation][0] {
for _, foundIDLocation := range m.hashToID[hashType][hashLocation] {
foundID := m.ids[foundIDLocation]
idlist[foundID.Domain] = Insert(idlist[foundID.Domain], foundID.ID)
if len(idlist) > 0 {
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: hash,
// If we have exact matches don't bother with other matches
if len(foundMatches) > 0 && exactOnly {
return foundMatches, nil
foundHashes := make(map[uint64]struct{})
for _, hash := range hashes {
if hash.Hash == 0 {
hashType := int(hash.Kind) - 1
for i, partialHash := range SplitHash(hash.Hash) {
for _, match := range m.Atleast(hash.Kind, max, hash.Hash, m.partialHash[hashType][i][partialHash]) {
_, alreadyMatched := foundHashes[match.Hash.Hash]
if alreadyMatched {
foundMatches = append(foundMatches, match)
return foundMatches, nil
func (m *mapStorage) MapHashes(hash ImageHash) {
idIndex := m.addID(hash.ID)
idHashes := m.idToHash[idIndex]
for _, hash := range hash.Hashes {
var (
hashIndex int
hashType = int(hash.Kind) - 1
m.hashes[hashType], hashIndex = InsertIdx(m.hashes[hashType], hash.Hash)
for i, partialHash := range SplitHash(hash.Hash) {
m.partialHash[hashType][i][partialHash] = append(m.partialHash[hashType][i][partialHash], hashIndex)
idHashes[hashType] = Insert(idHashes[hashType], hashIndex)
m.hashToID[hashType][hashIndex] = Insert(m.hashToID[hashType][hashIndex], idIndex)
m.idToHash[idIndex] = idHashes
func (m *mapStorage) DecodeHashes(hashes SavedHashes) error {
for _, sourceHashes := range hashes {
m.hashes[0] = make([]uint64, 0, len(sourceHashes))
m.hashes[1] = make([]uint64, 0, len(sourceHashes))
m.hashes[2] = make([]uint64, 0, len(sourceHashes))
for domain, sourceHashes := range hashes {
for id, h := range sourceHashes {
m.ids = append(m.ids, ID{Domain: Source(domain), ID: id})
for _, hash := range []Hash{Hash{h[0], goimagehash.AHash}, Hash{h[1], goimagehash.DHash}, Hash{h[2], goimagehash.PHash}} {
var (
hashType = int(hash.Kind) - 1
m.hashes[hashType] = append(m.hashes[hashType], hash.Hash)
slices.SortFunc(m.ids, func(existing, new ID) int {
return cmp.Or(
cmp.Compare(existing.Domain, new.Domain),
cmp.Compare(existing.ID, new.ID),
for domain, sourceHashes := range hashes {
for id, h := range sourceHashes {
Hashes: []Hash{{h[0], goimagehash.AHash}, {h[1], goimagehash.DHash}, {h[2], goimagehash.PHash}},
ID: ID{Domain: Source(domain), ID: id},
return nil
func (m *mapStorage) EncodeHashes() (SavedHashes, error) {
hashes := make(SavedHashes)
for idLocation, hashLocation := range m.idToHash {
id := m.ids[idLocation]
_, ok := hashes[id.Domain]
if !ok {
hashes[id.Domain] = make(map[string][3]uint64)
// TODO: Add all hashes. Currently saved hashes does not allow multiple IDs for a single hash
hashes[id.Domain][id.ID] = [3]uint64{
return hashes, nil
func (m *mapStorage) AssociateIDs(newids []NewIDs) {
for _, ids := range newids {
oldIDLocation, found := m.getID(ids.OldID)
if !found {
msg := "No IDs belonging to " + ids.OldID.Domain + "exist on this server"
newIDLocation := m.addID(ids.NewID)
for _, hashType := range []int{int(goimagehash.AHash), int(goimagehash.DHash), int(goimagehash.PHash)} {
for _, hashLocation := range m.idToHash[oldIDLocation][hashType] {
m.hashToID[hashType][hashLocation] = Insert(m.hashToID[hashType][hashLocation], newIDLocation)
idHashes := m.idToHash[newIDLocation]
idHashes[hashType] = Insert(idHashes[hashType], hashLocation)
m.idToHash[newIDLocation] = idHashes
func (m *mapStorage) GetIDs(id ID) IDList {
idIndex, found := m.getID(id)
if !found {
msg := "No IDs belonging to " + id.Domain + "exist on this server"
ids := make(IDList)
for _, hashLocation := range m.idToHash[idIndex][0] {
for _, foundIDLocation := range m.hashToID[0][hashLocation] {
foundID := m.ids[foundIDLocation]
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
for _, hashLocation := range m.idToHash[idIndex][1] {
for _, foundIDLocation := range m.hashToID[1][hashLocation] {
foundID := m.ids[foundIDLocation]
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
for _, hashLocation := range m.idToHash[idIndex][2] {
for _, foundIDLocation := range m.hashToID[2][hashLocation] {
foundID := m.ids[foundIDLocation]
ids[foundID.Domain] = Insert(ids[foundID.Domain], foundID.ID)
return ids
func NewMapStorage() (HashStorage, error) {
storage := &mapStorage{
hashMutex: sync.RWMutex{},
idToHash: make(map[int][3][]int),
hashToID: [3]map[int][]int{
for i := range storage.partialHash[0] {
storage.partialHash[0][i] = make(map[uint8][]int)
for i := range storage.partialHash[1] {
storage.partialHash[1][i] = make(map[uint8][]int)
for i := range storage.partialHash[2] {
storage.partialHash[2][i] = make(map[uint8][]int)
return storage, nil
Reference in New Issue
Block a user