Compare commits

...

3 Commits

Author SHA1 Message Date
Timmy Welch
bf23bb5a4c Add Server http Header and add -listen and -debug options 2025-04-13 15:58:34 -07:00
Timmy Welch
d7946c2aaf Fix simple results 2025-04-13 15:57:29 -07:00
Timmy Welch
f54b4b1d9d Fix exact matches to match previous behavior 2025-04-13 15:54:17 -07:00
7 changed files with 112 additions and 44 deletions

View File

@ -41,17 +41,8 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
return matchingHashes
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var (
foundMatches []Result
tl timeLog
)
tl.resetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
var foundMatches []Result
for _, hash := range hashes {
mappedIds := map[*[]ID]bool{}
@ -73,11 +64,27 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
}
}
return foundMatches
}
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
var (
foundMatches []Result
tl timeLog
)
tl.resetTime()
defer tl.logTime(fmt.Sprintf("Search Complete: max: %v ExactOnly: %v", max, exactOnly))
b.hashMutex.RLock()
defer b.hashMutex.RUnlock()
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = b.exactMatches(hashes, max)
tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
foundHashes := make(map[uint64]struct{})
totalPartialHashes := 0

View File

@ -22,6 +22,7 @@ import (
"net/url"
"os"
"path/filepath"
"runtime/debug"
"runtime/pprof"
"slices"
"strconv"
@ -44,7 +45,7 @@ import (
type Server struct {
httpServer *http.Server
mux *http.ServeMux
mux *CHMux
BaseURL *url.URL
hashes ch.HashStorage
Context context.Context
@ -54,6 +55,7 @@ type Server struct {
hashingQueue chan ch.Im
mappingQueue chan ch.ImageHash
onlyHashNewIDs bool
version string
}
var bufPool = &sync.Pool{
@ -128,12 +130,28 @@ type Opts struct {
onlyHashNewIDs bool
deleteHashedImages bool
path string
version string
addr string
debugPort string
cv CVOpts
}
func main() {
opts := Opts{format: ch.Msgpack, storageType: BasicMap} // flag is weird
version := "devel"
buildInfo, buildInfoFound := debug.ReadBuildInfo()
versionInfo := strings.SplitN(buildInfo.Main.Version, "-", 3)
if buildInfoFound {
switch len(versionInfo) {
default:
version = buildInfo.Main.Version
case 2:
version = versionInfo[1]
case 3:
version = versionInfo[0] + "-" + versionInfo[2]
}
}
opts := Opts{format: ch.Msgpack, storageType: BasicMap, version: version} // flag is weird
wd, err := os.Getwd()
fmt.Println(err)
if err != nil {
@ -141,10 +159,9 @@ func main() {
} else {
wd = filepath.Join(wd, "comic-hasher")
}
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
flag.StringVar(&opts.addr, "listen", ":8080", "Address to listen on")
flag.StringVar(&opts.debugPort, "debug-port", "", "Port to listen to for debug info")
flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files")
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
@ -166,6 +183,11 @@ func main() {
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
flag.Parse()
if opts.debugPort != "" {
go func() {
log.Println(http.ListenAndServe("127.0.0.1:"+opts.debugPort, nil))
}()
}
if opts.coverPath != "" {
_, err := os.Stat(opts.coverPath)
if err != nil {
@ -322,24 +344,43 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
})
// Deduplicate IDs
distance := make(map[int]SimpleResult)
idToDistance := make(map[ch.ID]int)
for _, fullResult := range fullResults {
simple, ok := distance[fullResult.Distance]
if !ok {
simple.IDList = make(ch.IDList)
for domain, idlist := range fullResult.IDs {
for _, idStr := range idlist {
id := ch.ID{
Domain: domain,
ID: idStr,
}
for source, ids := range fullResult.IDs {
for _, id := range ids {
simple.IDList[source] = ch.Insert(simple.IDList[source], id)
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
idToDistance[id] = fullResult.Distance
}
}
}
}
// Group by distance
distanceMap := make(map[int]SimpleResult)
for id, distance := range idToDistance {
var (
sr SimpleResult
ok bool
)
if sr, ok = distanceMap[distance]; !ok {
sr.IDList = make(ch.IDList)
}
sr.Distance = distance
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
distanceMap[distance] = sr
}
// turn into array
for _, sr := range distance {
for _, sr := range distanceMap {
simpleResult = append(simpleResult, sr)
}
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
return cmp.Compare(a.Distance, b.Distance)
})
return simpleResult
}
@ -746,6 +787,14 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
}
}
type CHMux struct {
version string
*http.ServeMux
}
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
}
func startServer(opts Opts) {
imaging.SetMaxProcs(2)
if opts.cpuprofile != "" {
@ -757,7 +806,7 @@ func startServer(opts Opts) {
defer pprof.StopCPUProfile()
}
mux := http.NewServeMux()
mux := &CHMux{opts.version, &http.ServeMux{}}
ctx, cancel := context.WithCancel(context.Background())
server := Server{
@ -769,13 +818,14 @@ func startServer(opts Opts) {
mappingQueue: make(chan ch.ImageHash, 1),
mux: mux,
httpServer: &http.Server{
Addr: ":8080",
Addr: opts.addr,
Handler: mux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
MaxHeaderBytes: 1 << 20,
},
onlyHashNewIDs: opts.onlyHashNewIDs,
version: opts.version,
}
Notify(server.signalQueue)
var err error

2
go.mod
View File

@ -15,6 +15,7 @@ require (
github.com/vmihailenco/msgpack v4.0.4+incompatible
go.etcd.io/bbolt v1.4.0
golang.org/x/image v0.24.0
golang.org/x/sys v0.30.0
golang.org/x/text v0.22.0
gonum.org/v1/gonum v0.15.1
modernc.org/sqlite v1.35.0
@ -48,7 +49,6 @@ require (
github.com/ulikunitz/xz v0.5.10 // indirect
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.36.5 // indirect
modernc.org/libc v1.61.13 // indirect

9
map.go
View File

@ -23,8 +23,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
m.hashMutex.RLock()
defer m.hashMutex.RUnlock()
if exactOnly {
return m.basicMapStorage.GetMatches(hashes, max, exactOnly)
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
foundMatches = m.exactMatches(hashes, max)
tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
tl.resetTime()
defer tl.logTime("Search Complete")

View File

@ -151,6 +151,7 @@ func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
if len(loadedHashes) == 0 {
return nil, NoHashes
}
fmt.Println("Loaded V0 hashes")
return ConvertHashesV0(loadedHashes), nil
}
@ -167,6 +168,7 @@ func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
if hashesCount < 1 {
return nil, NoHashes
}
fmt.Println("Loaded V1 hashes")
return ConvertHashesV1(loadedHashes), nil
}
@ -180,6 +182,7 @@ func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
return nil, NoHashes
}
fmt.Println("Loaded V2 hashes")
return &loadedHashes, nil
}

View File

@ -150,8 +150,11 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
})
}
tl.logTime("Search Exact")
if len(foundMatches) > 0 {
return foundMatches, nil
}
}
foundHashes := make(map[uint64]struct{})

View File

@ -69,7 +69,7 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
}
}
}
if exactOnly {
if exactOnly && len(exactMatches) > 0 {
return exactMatches, nil
}
exactMatches = append(exactMatches, matches...)