Compare commits
3 Commits
a2765b0582
...
bf23bb5a4c
Author | SHA1 | Date | |
---|---|---|---|
|
bf23bb5a4c | ||
|
d7946c2aaf | ||
|
f54b4b1d9d |
53
BasicMap.go
53
BasicMap.go
@ -41,6 +41,32 @@ func (b *basicMapStorage) atleast(kind goimagehash.Kind, maxDistance int, search
|
|||||||
return matchingHashes
|
return matchingHashes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b *basicMapStorage) exactMatches(hashes []Hash, max int) []Result {
|
||||||
|
var foundMatches []Result
|
||||||
|
for _, hash := range hashes {
|
||||||
|
mappedIds := map[*[]ID]bool{}
|
||||||
|
|
||||||
|
index, count := b.findHash(hash)
|
||||||
|
if count > 0 {
|
||||||
|
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
|
||||||
|
ids := b.ids[storedHash.ID]
|
||||||
|
if mappedIds[ids] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mappedIds[ids] = true
|
||||||
|
|
||||||
|
foundMatches = append(foundMatches, Result{
|
||||||
|
Distance: 0,
|
||||||
|
Hash: storedHash.Hash,
|
||||||
|
IDs: ToIDList(*b.ids[storedHash.ID]),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return foundMatches
|
||||||
|
}
|
||||||
|
|
||||||
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||||
var (
|
var (
|
||||||
foundMatches []Result
|
foundMatches []Result
|
||||||
@ -52,31 +78,12 @@ func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]
|
|||||||
defer b.hashMutex.RUnlock()
|
defer b.hashMutex.RUnlock()
|
||||||
|
|
||||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||||
for _, hash := range hashes {
|
foundMatches = b.exactMatches(hashes, max)
|
||||||
mappedIds := map[*[]ID]bool{}
|
|
||||||
|
|
||||||
index, count := b.findHash(hash)
|
|
||||||
if count > 0 {
|
|
||||||
for _, storedHash := range (*b.getCurrentHashes(hash.Kind))[index : index+count] {
|
|
||||||
ids := b.ids[storedHash.ID]
|
|
||||||
if mappedIds[ids] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
mappedIds[ids] = true
|
|
||||||
|
|
||||||
foundMatches = append(foundMatches, Result{
|
|
||||||
Distance: 0,
|
|
||||||
Hash: storedHash.Hash,
|
|
||||||
IDs: ToIDList(*b.ids[storedHash.ID]),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
tl.logTime("Search Exact")
|
tl.logTime("Search Exact")
|
||||||
|
if len(foundMatches) > 0 {
|
||||||
return foundMatches, nil
|
return foundMatches, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
foundHashes := make(map[uint64]struct{})
|
foundHashes := make(map[uint64]struct{})
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime/debug"
|
||||||
"runtime/pprof"
|
"runtime/pprof"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -44,7 +45,7 @@ import (
|
|||||||
|
|
||||||
type Server struct {
|
type Server struct {
|
||||||
httpServer *http.Server
|
httpServer *http.Server
|
||||||
mux *http.ServeMux
|
mux *CHMux
|
||||||
BaseURL *url.URL
|
BaseURL *url.URL
|
||||||
hashes ch.HashStorage
|
hashes ch.HashStorage
|
||||||
Context context.Context
|
Context context.Context
|
||||||
@ -54,6 +55,7 @@ type Server struct {
|
|||||||
hashingQueue chan ch.Im
|
hashingQueue chan ch.Im
|
||||||
mappingQueue chan ch.ImageHash
|
mappingQueue chan ch.ImageHash
|
||||||
onlyHashNewIDs bool
|
onlyHashNewIDs bool
|
||||||
|
version string
|
||||||
}
|
}
|
||||||
|
|
||||||
var bufPool = &sync.Pool{
|
var bufPool = &sync.Pool{
|
||||||
@ -128,12 +130,28 @@ type Opts struct {
|
|||||||
onlyHashNewIDs bool
|
onlyHashNewIDs bool
|
||||||
deleteHashedImages bool
|
deleteHashedImages bool
|
||||||
path string
|
path string
|
||||||
|
version string
|
||||||
|
addr string
|
||||||
|
debugPort string
|
||||||
|
|
||||||
cv CVOpts
|
cv CVOpts
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
opts := Opts{format: ch.Msgpack, storageType: BasicMap} // flag is weird
|
version := "devel"
|
||||||
|
buildInfo, buildInfoFound := debug.ReadBuildInfo()
|
||||||
|
versionInfo := strings.SplitN(buildInfo.Main.Version, "-", 3)
|
||||||
|
if buildInfoFound {
|
||||||
|
switch len(versionInfo) {
|
||||||
|
default:
|
||||||
|
version = buildInfo.Main.Version
|
||||||
|
case 2:
|
||||||
|
version = versionInfo[1]
|
||||||
|
case 3:
|
||||||
|
version = versionInfo[0] + "-" + versionInfo[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
opts := Opts{format: ch.Msgpack, storageType: BasicMap, version: version} // flag is weird
|
||||||
wd, err := os.Getwd()
|
wd, err := os.Getwd()
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -141,10 +159,9 @@ func main() {
|
|||||||
} else {
|
} else {
|
||||||
wd = filepath.Join(wd, "comic-hasher")
|
wd = filepath.Join(wd, "comic-hasher")
|
||||||
}
|
}
|
||||||
go func() {
|
|
||||||
log.Println(http.ListenAndServe("localhost:6060", nil))
|
|
||||||
}()
|
|
||||||
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
|
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
|
||||||
|
flag.StringVar(&opts.addr, "listen", ":8080", "Address to listen on")
|
||||||
|
flag.StringVar(&opts.debugPort, "debug-port", "", "Port to listen to for debug info")
|
||||||
|
|
||||||
flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files")
|
flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files")
|
||||||
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
|
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
|
||||||
@ -166,6 +183,11 @@ func main() {
|
|||||||
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
|
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
|
if opts.debugPort != "" {
|
||||||
|
go func() {
|
||||||
|
log.Println(http.ListenAndServe("127.0.0.1:"+opts.debugPort, nil))
|
||||||
|
}()
|
||||||
|
}
|
||||||
if opts.coverPath != "" {
|
if opts.coverPath != "" {
|
||||||
_, err := os.Stat(opts.coverPath)
|
_, err := os.Stat(opts.coverPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -322,24 +344,43 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
|
|||||||
})
|
})
|
||||||
|
|
||||||
// Deduplicate IDs
|
// Deduplicate IDs
|
||||||
distance := make(map[int]SimpleResult)
|
idToDistance := make(map[ch.ID]int)
|
||||||
|
|
||||||
for _, fullResult := range fullResults {
|
for _, fullResult := range fullResults {
|
||||||
simple, ok := distance[fullResult.Distance]
|
for domain, idlist := range fullResult.IDs {
|
||||||
if !ok {
|
for _, idStr := range idlist {
|
||||||
simple.IDList = make(ch.IDList)
|
id := ch.ID{
|
||||||
}
|
Domain: domain,
|
||||||
for source, ids := range fullResult.IDs {
|
ID: idStr,
|
||||||
for _, id := range ids {
|
}
|
||||||
simple.IDList[source] = ch.Insert(simple.IDList[source], id)
|
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
|
||||||
|
idToDistance[id] = fullResult.Distance
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Group by distance
|
||||||
|
distanceMap := make(map[int]SimpleResult)
|
||||||
|
for id, distance := range idToDistance {
|
||||||
|
var (
|
||||||
|
sr SimpleResult
|
||||||
|
ok bool
|
||||||
|
)
|
||||||
|
if sr, ok = distanceMap[distance]; !ok {
|
||||||
|
sr.IDList = make(ch.IDList)
|
||||||
|
}
|
||||||
|
sr.Distance = distance
|
||||||
|
sr.IDList[id.Domain] = append(sr.IDList[id.Domain], id.ID)
|
||||||
|
distanceMap[distance] = sr
|
||||||
|
}
|
||||||
|
|
||||||
// turn into array
|
// turn into array
|
||||||
for _, sr := range distance {
|
for _, sr := range distanceMap {
|
||||||
simpleResult = append(simpleResult, sr)
|
simpleResult = append(simpleResult, sr)
|
||||||
}
|
}
|
||||||
|
slices.SortFunc(simpleResult, func(a, b SimpleResult) int {
|
||||||
|
return cmp.Compare(a.Distance, b.Distance)
|
||||||
|
})
|
||||||
return simpleResult
|
return simpleResult
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -746,6 +787,14 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type CHMux struct {
|
||||||
|
version string
|
||||||
|
*http.ServeMux
|
||||||
|
}
|
||||||
|
|
||||||
|
func (CHM *CHMux) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Server", "Comic-Hasher "+CHM.version)
|
||||||
|
}
|
||||||
func startServer(opts Opts) {
|
func startServer(opts Opts) {
|
||||||
imaging.SetMaxProcs(2)
|
imaging.SetMaxProcs(2)
|
||||||
if opts.cpuprofile != "" {
|
if opts.cpuprofile != "" {
|
||||||
@ -757,7 +806,7 @@ func startServer(opts Opts) {
|
|||||||
defer pprof.StopCPUProfile()
|
defer pprof.StopCPUProfile()
|
||||||
}
|
}
|
||||||
|
|
||||||
mux := http.NewServeMux()
|
mux := &CHMux{opts.version, &http.ServeMux{}}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
server := Server{
|
server := Server{
|
||||||
@ -769,13 +818,14 @@ func startServer(opts Opts) {
|
|||||||
mappingQueue: make(chan ch.ImageHash, 1),
|
mappingQueue: make(chan ch.ImageHash, 1),
|
||||||
mux: mux,
|
mux: mux,
|
||||||
httpServer: &http.Server{
|
httpServer: &http.Server{
|
||||||
Addr: ":8080",
|
Addr: opts.addr,
|
||||||
Handler: mux,
|
Handler: mux,
|
||||||
ReadTimeout: 10 * time.Second,
|
ReadTimeout: 10 * time.Second,
|
||||||
WriteTimeout: 10 * time.Second,
|
WriteTimeout: 10 * time.Second,
|
||||||
MaxHeaderBytes: 1 << 20,
|
MaxHeaderBytes: 1 << 20,
|
||||||
},
|
},
|
||||||
onlyHashNewIDs: opts.onlyHashNewIDs,
|
onlyHashNewIDs: opts.onlyHashNewIDs,
|
||||||
|
version: opts.version,
|
||||||
}
|
}
|
||||||
Notify(server.signalQueue)
|
Notify(server.signalQueue)
|
||||||
var err error
|
var err error
|
||||||
|
2
go.mod
2
go.mod
@ -15,6 +15,7 @@ require (
|
|||||||
github.com/vmihailenco/msgpack v4.0.4+incompatible
|
github.com/vmihailenco/msgpack v4.0.4+incompatible
|
||||||
go.etcd.io/bbolt v1.4.0
|
go.etcd.io/bbolt v1.4.0
|
||||||
golang.org/x/image v0.24.0
|
golang.org/x/image v0.24.0
|
||||||
|
golang.org/x/sys v0.30.0
|
||||||
golang.org/x/text v0.22.0
|
golang.org/x/text v0.22.0
|
||||||
gonum.org/v1/gonum v0.15.1
|
gonum.org/v1/gonum v0.15.1
|
||||||
modernc.org/sqlite v1.35.0
|
modernc.org/sqlite v1.35.0
|
||||||
@ -48,7 +49,6 @@ require (
|
|||||||
github.com/ulikunitz/xz v0.5.10 // indirect
|
github.com/ulikunitz/xz v0.5.10 // indirect
|
||||||
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
|
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
|
||||||
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
|
golang.org/x/exp v0.0.0-20250218142911-aa4b98e5adaa // indirect
|
||||||
golang.org/x/sys v0.30.0 // indirect
|
|
||||||
google.golang.org/appengine v1.6.8 // indirect
|
google.golang.org/appengine v1.6.8 // indirect
|
||||||
google.golang.org/protobuf v1.36.5 // indirect
|
google.golang.org/protobuf v1.36.5 // indirect
|
||||||
modernc.org/libc v1.61.13 // indirect
|
modernc.org/libc v1.61.13 // indirect
|
||||||
|
9
map.go
9
map.go
@ -23,8 +23,13 @@ func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Resul
|
|||||||
m.hashMutex.RLock()
|
m.hashMutex.RLock()
|
||||||
defer m.hashMutex.RUnlock()
|
defer m.hashMutex.RUnlock()
|
||||||
|
|
||||||
if exactOnly {
|
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||||
return m.basicMapStorage.GetMatches(hashes, max, exactOnly)
|
foundMatches = m.exactMatches(hashes, max)
|
||||||
|
|
||||||
|
tl.logTime("Search Exact")
|
||||||
|
if len(foundMatches) > 0 {
|
||||||
|
return foundMatches, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tl.resetTime()
|
tl.resetTime()
|
||||||
defer tl.logTime("Search Complete")
|
defer tl.logTime("Search Complete")
|
||||||
|
@ -151,6 +151,7 @@ func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|||||||
if len(loadedHashes) == 0 {
|
if len(loadedHashes) == 0 {
|
||||||
return nil, NoHashes
|
return nil, NoHashes
|
||||||
}
|
}
|
||||||
|
fmt.Println("Loaded V0 hashes")
|
||||||
return ConvertHashesV0(loadedHashes), nil
|
return ConvertHashesV0(loadedHashes), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -167,6 +168,7 @@ func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|||||||
if hashesCount < 1 {
|
if hashesCount < 1 {
|
||||||
return nil, NoHashes
|
return nil, NoHashes
|
||||||
}
|
}
|
||||||
|
fmt.Println("Loaded V1 hashes")
|
||||||
return ConvertHashesV1(loadedHashes), nil
|
return ConvertHashesV1(loadedHashes), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,6 +182,7 @@ func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|||||||
return nil, NoHashes
|
return nil, NoHashes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fmt.Println("Loaded V2 hashes")
|
||||||
return &loadedHashes, nil
|
return &loadedHashes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,7 +150,10 @@ func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Re
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return foundMatches, nil
|
tl.logTime("Search Exact")
|
||||||
|
if len(foundMatches) > 0 {
|
||||||
|
return foundMatches, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
foundHashes := make(map[uint64]struct{})
|
foundHashes := make(map[uint64]struct{})
|
||||||
|
@ -69,7 +69,7 @@ func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, e
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if exactOnly {
|
if exactOnly && len(exactMatches) > 0 {
|
||||||
return exactMatches, nil
|
return exactMatches, nil
|
||||||
}
|
}
|
||||||
exactMatches = append(exactMatches, matches...)
|
exactMatches = append(exactMatches, matches...)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user