Compare commits

...

3 Commits

Author SHA1 Message Date
Timmy Welch
bf23bb5a4c Add Server http Header and add -listen and -debug options 2025-04-13 15:58:34 -07:00
Timmy Welch
d7946c2aaf Fix simple results 2025-04-13 15:57:29 -07:00
Timmy Welch
f54b4b1d9d Fix exact matches to match previous behavior 2025-04-13 15:54:17 -07:00
7 changed files with 112 additions and 44 deletions

View File

@ -41,6 +41,32 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
return matchingHashes return matchingHashes
} }
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
var foundMatches []Result
for _, hash := range hashes {
mappedIds := map[*[]ID]bool{}
index, count := b.findHash(hash)
if count > 0 {
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*b.ids[storedHash.ID]),
})
}
}
}
return foundMatches
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var ( var (
foundMatches []Result foundMatches []Result
@ -52,31 +78,12 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
defer b.hashMutex.RUnlock() defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
for _, hash := range hashes { foundMatches = b.exactMatches(hashes, max)
mappedIds := map[*[]ID]bool{}
index, count := b.findHash(hash)
if count > 0 {
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
ids := b.ids[storedHash.ID]
if mappedIds[ids] {
continue
}
mappedIds[ids] = true
foundMatches = append(foundMatches, Result{
Distance: 0,
Hash: storedHash.Hash,
IDs: ToIDList(*b.ids[storedHash.ID]),
})
}
}
}
tl.logTime("Search Exact") tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil return foundMatches, nil
}
} }
foundHashes := make(map[uint64]struct{}) foundHashes := make(map[uint64]struct{})

View File

@ -22,6 +22,7 @@ import (
"net/url" "net/url"
"os" "os"
"path/filepath" "path/filepath"
"runtime/debug"
"runtime/pprof" "runtime/pprof"
"slices" "slices"
"strconv" "strconv"
@ -44,7 +45,7 @@ import (
type Server struct { type Server struct {
httpServer *http.Server httpServer *http.Server
mux *http.ServeMux mux *CHMux
BaseURL *url.URL BaseURL *url.URL
hashes ch.HashStorage hashes ch.HashStorage
Context context.Context Context context.Context
@ -54,6 +55,7 @@ type Server struct {
hashingQueue chan ch.Im hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash mappingQueue chan ch.ImageHash
onlyHashNewIDs bool onlyHashNewIDs bool
version string
} }
var bufPool = &sync.Pool{ var bufPool = &sync.Pool{
@ -128,12 +130,28 @@ type Opts struct {
onlyHashNewIDs bool onlyHashNewIDs bool
deleteHashedImages bool deleteHashedImages bool
path string path string
version string
addr string
debugPort string
cv CVOpts cv CVOpts
} }
func main() { func main() {
opts := Opts{format: ch.Msgpack, storageType: BasicMap} // flag is weird version := "devel"
buildInfo, buildInfoFound := debug.ReadBuildInfo()
versionInfo := strings.SplitN(buildInfo.Main.Version, "-", 3)
if buildInfoFound {
switch len(versionInfo) {
default:
version = buildInfo.Main.Version
case 2:
version = versionInfo[1]
case 3:
version = versionInfo[0] + "-" + versionInfo[2]
}
}
opts := Opts{format: ch.Msgpack, storageType: BasicMap, version: version} // flag is weird
wd, err := os.Getwd() wd, err := os.Getwd()
fmt.Println(err) fmt.Println(err)
if err != nil { if err != nil {
@ -141,10 +159,9 @@ func main() {
} else { } else {
wd = filepath.Join(wd, "comic-hasher") wd = filepath.Join(wd, "comic-hasher")
} }
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file") flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
flag.StringVar(&opts.addr, "listen", ":8080", "Address to listen on")
flag.StringVar(&opts.debugPort, "debug-port", "", "Port to listen to for debug info")
flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files") flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files")
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif") flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
@ -166,6 +183,11 @@ func main() {
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded") flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
flag.Parse() flag.Parse()
if opts.debugPort != "" {
go func() {
log.Println(http.ListenAndServe("127.0.0.1:"+opts.debugPort, nil))
}()
}
if opts.coverPath != "" { if opts.coverPath != "" {
_, err := os.Stat(opts.coverPath) _, err := os.Stat(opts.coverPath)
if err != nil { if err != nil {
@ -322,24 +344,43 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
}) })
// Deduplicate IDs // Deduplicate IDs
distance := make(map[int]SimpleResult) idToDistance := make(map[ch.ID]int)
for _, fullResult := range fullResults { for _, fullResult := range fullResults {
simple, ok := distance[fullResult.Distance] for domain, idlist := range fullResult.IDs {
if !ok { for _, idStr := range idlist {
simple.IDList = make(ch.IDList) id := ch.ID{
} Domain: domain,
for source, ids := range fullResult.IDs { ID: idStr,
for _, id := range ids { }
simple.IDList[source] = ch.Insert(simple.IDList[source], id) if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
idToDistance[id] = fullResult.Distance
}
} }
} }
} }
// Group by distance
distanceMap := make(map[int]SimpleResult)
for id, distance := range idToDistance {
var (
sr SimpleResult
ok bool
)
if sr, ok = distanceMap[distance]; !ok {
sr.IDList = make(ch.IDList)
}
sr.Distance = distance
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
distanceMap[distance] = sr
}
// turn into array // turn into array
for _, sr := range distance { for _, sr := range distanceMap {
simpleResult = append(simpleResult, sr) simpleResult = append(simpleResult, sr)
} }
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
return cmp.Compare(a.Distance, b.Distance)
})
return simpleResult return simpleResult
} }
@ -746,6 +787,14 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
} }
} }
type CHMux struct {
version string
*http.ServeMux
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
}
func startServer(opts Opts) { func startServer(opts Opts) {
imaging.SetMaxProcs(2) imaging.SetMaxProcs(2)
if opts.cpuprofile != "" { if opts.cpuprofile != "" {
@ -757,7 +806,7 @@ func startServer(opts Opts) {
defer pprof.StopCPUProfile() defer pprof.StopCPUProfile()
} }
mux := http.NewServeMux() mux := &CHMux{opts.version, &http.ServeMux{}}
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
server := Server{ server := Server{
@ -769,13 +818,14 @@ func startServer(opts Opts) {
mappingQueue: make(chan ch.ImageHash, 1), mappingQueue: make(chan ch.ImageHash, 1),
mux: mux, mux: mux,
httpServer: &http.Server{ httpServer: &http.Server{
Addr: ":8080", Addr: opts.addr,
Handler: mux, Handler: mux,
ReadTimeout: 10 * time.Second, ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second, WriteTimeout: 10 * time.Second,
MaxHeaderBytes: 1 << 20, MaxHeaderBytes: 1 << 20,
}, },
onlyHashNewIDs: opts.onlyHashNewIDs, onlyHashNewIDs: opts.onlyHashNewIDs,
version: opts.version,
} }
Notify(server.signalQueue) Notify(server.signalQueue)
var err error var err error

2
go.mod
View File

@ -15,6 +15,7 @@ require (
github.com/vmihailenco/msgpack v4.0.4+incompatible github.com/vmihailenco/msgpack v4.0.4+incompatible
go.etcd.io/bbolt v1.4.0 go.etcd.io/bbolt v1.4.0
golang.org/x/image v0.24.0 golang.org/x/image v0.24.0
golang.org/x/sys v0.30.0
golang.org/x/text v0.22.0 golang.org/x/text v0.22.0
gonum.org/v1/gonum v0.15.1 gonum.org/v1/gonum v0.15.1
modernc.org/sqlite v1.35.0 modernc.org/sqlite v1.35.0
@ -48,7 +49,6 @@ require (
github.com/ulikunitz/xz v0.5.10 // indirect github.com/ulikunitz/xz v0.5.10 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/appengine v1.6.8 // indirect google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.36.5 // indirect google.golang.org/protobuf v1.36.5 // indirect
modernc.org/libc v1.61.13 // indirect modernc.org/libc v1.61.13 // indirect

9
map.go
View File

@ -23,8 +23,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
m.hashMutex.RLock() m.hashMutex.RLock()
defer m.hashMutex.RUnlock() defer m.hashMutex.RUnlock()
if exactOnly { if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
return m.basicMapStorage.GetMatches(hashes, max, exactOnly) foundMatches = m.exactMatches(hashes, max)
tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
} }
tl.resetTime() tl.resetTime()
defer tl.logTime("Search Complete") defer tl.logTime("Search Complete")

View File

@ -151,6 +151,7 @@ func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
if len(loadedHashes) == 0 { if len(loadedHashes) == 0 {
return nil, NoHashes return nil, NoHashes
} }
fmt.Println("Loaded V0 hashes")
return ConvertHashesV0(loadedHashes), nil return ConvertHashesV0(loadedHashes), nil
} }
@ -167,6 +168,7 @@ func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
if hashesCount < 1 { if hashesCount < 1 {
return nil, NoHashes return nil, NoHashes
} }
fmt.Println("Loaded V1 hashes")
return ConvertHashesV1(loadedHashes), nil return ConvertHashesV1(loadedHashes), nil
} }
@ -180,6 +182,7 @@ func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
return nil, NoHashes return nil, NoHashes
} }
fmt.Println("Loaded V2 hashes")
return &loadedHashes, nil return &loadedHashes, nil
} }

View File

@ -150,7 +150,10 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
}) })
} }
return foundMatches, nil tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
} }
foundHashes := make(map[uint64]struct{}) foundHashes := make(map[uint64]struct{})

View File

@ -69,7 +69,7 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
} }
} }
} }
if exactOnly { if exactOnly && len(exactMatches) > 0 {
return exactMatches, nil return exactMatches, nil
} }
exactMatches = append(exactMatches, matches...) exactMatches = append(exactMatches, matches...)