Compare commits
4 Commits
007a726764
...
0928ed6ccf
Author | SHA1 | Date | |
---|---|---|---|
|
0928ed6ccf | ||
|
b1de95021a | ||
|
1955444dcf | ||
|
0069ffd5cb |
@ -13,7 +13,7 @@ repos:
|
||||
- id: go-imports
|
||||
args: [-w]
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.59.1
|
||||
rev: v1.60.3
|
||||
hooks:
|
||||
- id: golangci-lint
|
||||
- repo: https://github.com/asottile/setup-cfg-fmt
|
||||
|
151
BasicMap.go
Normal file
151
BasicMap.go
Normal file
@ -0,0 +1,151 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/bits"
|
||||
"sync"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
)
|
||||
|
||||
type basicMapStorage struct {
|
||||
hashMutex sync.RWMutex
|
||||
|
||||
ids map[ID]*[]ID
|
||||
hashes [3]map[uint64]*[]ID
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) Atleast(hashKind goimagehash.Kind, maxDistance int, searchHash uint64) []Result {
|
||||
hashType := int(hashKind) - 1
|
||||
matchingHashes := make([]Result, 0, 100) // hope that we don't need all of them
|
||||
for storedHash, ids := range b.hashes[hashType] {
|
||||
distance := bits.OnesCount64(searchHash ^ storedHash)
|
||||
if distance <= maxDistance {
|
||||
matchingHashes = append(matchingHashes, Result{ToIDList(*ids), distance, Hash{storedHash, hashKind}})
|
||||
}
|
||||
}
|
||||
return matchingHashes
|
||||
}
|
||||
func (b *basicMapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var foundMatches []Result
|
||||
b.hashMutex.RLock()
|
||||
defer b.hashMutex.RUnlock()
|
||||
resetTime()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
for _, hash := range hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
ids := b.hashes[hashType][hash.Hash]
|
||||
if ids != nil && len(*ids) > 0 {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: hash,
|
||||
IDs: ToIDList(*ids),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// If we have exact matches don't bother with other matches
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
logTime("Search Exact")
|
||||
}
|
||||
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
totalPartialHashes := 0
|
||||
for _, hash := range hashes {
|
||||
for _, match := range b.Atleast(hash.Kind, max, hash.Hash) {
|
||||
_, alreadyMatched := foundHashes[match.Hash.Hash]
|
||||
if alreadyMatched {
|
||||
continue
|
||||
}
|
||||
foundHashes[match.Hash.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, match)
|
||||
}
|
||||
|
||||
}
|
||||
fmt.Println("Total partial hashes tested:", totalPartialHashes, len(foundHashes))
|
||||
logTime("Search Complete")
|
||||
go b.printSizes()
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) MapHashes(hash ImageHash) {
|
||||
for _, ih := range hash.Hashes {
|
||||
var (
|
||||
hashType = int(ih.Kind) - 1
|
||||
)
|
||||
|
||||
*b.hashes[hashType][ih.Hash] = InsertID((*b.hashes[hashType][ih.Hash]), hash.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
b.hashes[hashType] = make(map[uint64]*[]ID, len(sourceHashes))
|
||||
for savedHash, idlistLocation := range sourceHashes {
|
||||
b.hashes[hashType][savedHash] = &hashes.IDs[idlistLocation]
|
||||
}
|
||||
}
|
||||
b.printSizes()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) printSizes() {
|
||||
// fmt.Println("Size of", "hashes:", size.Of(b.hashes)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "ids:", size.Of(b.ids)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "basicMapStorage:", size.Of(b)/1024/1024, "MB")
|
||||
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) EncodeHashes() (SavedHashes, error) {
|
||||
hashes := SavedHashes{}
|
||||
idmap := map[*[]ID]int{}
|
||||
for _, ids := range b.ids {
|
||||
if _, ok := idmap[ids]; ok {
|
||||
continue
|
||||
}
|
||||
hashes.IDs = append(hashes.IDs, *ids)
|
||||
idmap[ids] = len(hashes.IDs)
|
||||
}
|
||||
for hashType, hashToID := range b.hashes {
|
||||
for hash, ids := range hashToID {
|
||||
hashes.Hashes[hashType][hash] = idmap[ids]
|
||||
}
|
||||
}
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) AssociateIDs(newids []NewIDs) {
|
||||
for _, newid := range newids {
|
||||
ids, found := b.ids[newid.OldID]
|
||||
if !found {
|
||||
msg := "No IDs belonging to " + newid.OldID.Domain + "exist on this server"
|
||||
panic(msg)
|
||||
}
|
||||
*ids = InsertID(*ids, newid.NewID)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *basicMapStorage) GetIDs(id ID) IDList {
|
||||
ids, found := b.ids[id]
|
||||
if !found {
|
||||
msg := "No IDs belonging to " + id.Domain + "exist on this server"
|
||||
panic(msg)
|
||||
}
|
||||
return ToIDList(*ids)
|
||||
}
|
||||
|
||||
func NewBasicMapStorage() (HashStorage, error) {
|
||||
storage := &basicMapStorage{
|
||||
hashMutex: sync.RWMutex{},
|
||||
|
||||
hashes: [3]map[uint64]*[]ID{
|
||||
make(map[uint64]*[]ID),
|
||||
make(map[uint64]*[]ID),
|
||||
make(map[uint64]*[]ID),
|
||||
},
|
||||
}
|
||||
return storage, nil
|
||||
}
|
@ -29,9 +29,10 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/kr/pretty"
|
||||
|
||||
"github.com/vmihailenco/msgpack/v5"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
_ "golang.org/x/image/tiff"
|
||||
_ "golang.org/x/image/vp8"
|
||||
_ "golang.org/x/image/vp8l"
|
||||
@ -39,37 +40,20 @@ import (
|
||||
|
||||
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
// "github.com/google/uuid"
|
||||
// "github.com/zitadel/oidc/pkg/client/rp"
|
||||
// httphelper "github.com/zitadel/oidc/pkg/http"
|
||||
// "github.com/zitadel/oidc/pkg/oidc"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
httpServer *http.Server
|
||||
mux *http.ServeMux
|
||||
BaseURL *url.URL
|
||||
// token chan<- *oidc.Tokens
|
||||
// Partial hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers
|
||||
PartialAhash [8]map[uint8][]uint64
|
||||
PartialDhash [8]map[uint8][]uint64
|
||||
PartialPhash [8]map[uint8][]uint64
|
||||
FullAhash map[uint64][]string // Maps ahash's to lists of ID's domain:id
|
||||
FullDhash map[uint64][]string // Maps dhash's to lists of ID's domain:id
|
||||
FullPhash map[uint64][]string // Maps phash's to lists of ID's domain:id
|
||||
ids map[ch.Source]map[string]struct{}
|
||||
hashMutex sync.RWMutex
|
||||
httpServer *http.Server
|
||||
mux *http.ServeMux
|
||||
BaseURL *url.URL
|
||||
hashes ch.HashStorage
|
||||
quit chan struct{}
|
||||
signalQueue chan os.Signal
|
||||
readerQueue chan string
|
||||
hashingQueue chan ch.Im
|
||||
mappingQueue chan ch.Hash
|
||||
mappingQueue chan ch.ImageHash
|
||||
}
|
||||
|
||||
// var key = []byte(uuid.New().String())[:16]
|
||||
|
||||
type savedHashes map[ch.Source]map[string][3]uint64
|
||||
|
||||
type Format int
|
||||
|
||||
const (
|
||||
@ -94,9 +78,6 @@ func (f Format) String() string {
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
type Encoder func(any) ([]byte, error)
|
||||
type Decoder func([]byte, interface{}) error
|
||||
|
||||
func (f *Format) Set(s string) error {
|
||||
if format, known := formatValues[strings.ToLower(s)]; known {
|
||||
*f = format
|
||||
@ -106,27 +87,76 @@ func (f *Format) Set(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type Storage int
|
||||
|
||||
const (
|
||||
Map = iota + 1
|
||||
BasicMap
|
||||
Sqlite
|
||||
Sqlite3
|
||||
VPTree
|
||||
)
|
||||
|
||||
var storageNames = map[Storage]string{
|
||||
Map: "map",
|
||||
BasicMap: "basicmap",
|
||||
Sqlite: "sqlite",
|
||||
Sqlite3: "sqlite3",
|
||||
VPTree: "vptree",
|
||||
}
|
||||
|
||||
var storageValues = map[string]Storage{
|
||||
"map": Map,
|
||||
"basicmap": BasicMap,
|
||||
"sqlite": Sqlite,
|
||||
"sqlite3": Sqlite3,
|
||||
"vptree": VPTree,
|
||||
}
|
||||
|
||||
func (f Storage) String() string {
|
||||
if name, known := storageNames[f]; known {
|
||||
return name
|
||||
}
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
func (f *Storage) Set(s string) error {
|
||||
if storage, known := storageValues[strings.ToLower(s)]; known {
|
||||
*f = storage
|
||||
} else {
|
||||
return fmt.Errorf("Unknown storage type: %d", f)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type Encoder func(any) ([]byte, error)
|
||||
type Decoder func([]byte, interface{}) error
|
||||
|
||||
type Opts struct {
|
||||
cpuprofile string
|
||||
coverPath string
|
||||
sqlitePath string
|
||||
loadEmbeddedHashes bool
|
||||
saveEmbeddedHashes bool
|
||||
format Format
|
||||
hashesPath string
|
||||
storageType Storage
|
||||
}
|
||||
|
||||
func main() {
|
||||
opts := Opts{format: Msgpack} // flag is weird
|
||||
opts := Opts{format: Msgpack, storageType: BasicMap} // flag is weird
|
||||
go func() {
|
||||
log.Println(http.ListenAndServe("localhost:6060", nil))
|
||||
}()
|
||||
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
|
||||
|
||||
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to covers to add to hash database. must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
|
||||
flag.StringVar(&opts.sqlitePath, "sqlite-path", "tmp.sqlite", "Path to sqlite database to use for matching hashes, substantialy reduces memory usage")
|
||||
flag.BoolVar(&opts.loadEmbeddedHashes, "use-embedded-hashes", true, "Use hashes embedded in the application as a starting point")
|
||||
flag.BoolVar(&opts.saveEmbeddedHashes, "save-embedded-hashes", false, "Save hashes even if we loaded the embedded hashes")
|
||||
flag.StringVar(&opts.hashesPath, "hashes", "hashes.gz", "Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option")
|
||||
flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)")
|
||||
flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)")
|
||||
flag.Parse()
|
||||
|
||||
if opts.coverPath != "" {
|
||||
@ -135,6 +165,8 @@ func main() {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
opts.sqlitePath, _ = filepath.Abs(opts.sqlitePath)
|
||||
log.Println(pretty.Formatter(opts))
|
||||
startServer(opts)
|
||||
}
|
||||
|
||||
@ -224,25 +256,25 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
return
|
||||
}
|
||||
if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
|
||||
msg := "No IDs belonging to " + domain + "exist on this server"
|
||||
log.Println(msg)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
}
|
||||
// if _, domainExists := s.ids[ch.Source(domain)]; !domainExists {
|
||||
// msg := "No IDs belonging to " + domain + "exist on this server"
|
||||
// log.Println(msg)
|
||||
// writeJson(w, http.StatusBadRequest, result{Msg: msg})
|
||||
// }
|
||||
log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID)
|
||||
found := false
|
||||
for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
|
||||
for i, idlist := range hash {
|
||||
if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
|
||||
found = true
|
||||
hash[i] = ch.Insert(idlist, newDomain+":"+newID)
|
||||
if _, ok := s.ids[ch.Source(newDomain)]; !ok {
|
||||
s.ids[ch.Source(newDomain)] = make(map[string]struct{})
|
||||
}
|
||||
s.ids[ch.Source(newDomain)][newID] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
// for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} {
|
||||
// for i, idlist := range hash {
|
||||
// if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash {
|
||||
// found = true
|
||||
// hash[i] = ch.Insert(idlist, newDomain+":"+newID)
|
||||
// if _, ok := s.ids[ch.Source(newDomain)]; !ok {
|
||||
// s.ids[ch.Source(newDomain)] = make(map[string]struct{})
|
||||
// }
|
||||
// s.ids[ch.Source(newDomain)][newID] = struct{}{}
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
if found {
|
||||
writeJson(w, http.StatusOK, result{Msg: "New ID added"})
|
||||
} else {
|
||||
@ -250,70 +282,6 @@ func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) getMatches(ahash, dhash, phash uint64, max int, skipNonExact bool) []ch.Result {
|
||||
var foundMatches []ch.Result
|
||||
s.hashMutex.RLock()
|
||||
defer s.hashMutex.RUnlock()
|
||||
|
||||
if skipNonExact { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
if matchedResults, ok := s.FullAhash[ahash]; ok && ahash != 0 {
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}})
|
||||
}
|
||||
if matchedResults, ok := s.FullDhash[dhash]; ok && dhash != 0 {
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: dhash, Kind: goimagehash.DHash}})
|
||||
}
|
||||
if matchedResults, ok := s.FullPhash[phash]; ok && phash != 0 {
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: phash, Kind: goimagehash.PHash}})
|
||||
}
|
||||
|
||||
// If we have exact matches don't bother with other matches
|
||||
if len(foundMatches) > 0 && skipNonExact {
|
||||
return foundMatches
|
||||
}
|
||||
}
|
||||
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
if ahash != 0 {
|
||||
for i, partialHash := range ch.SplitHash(ahash) {
|
||||
for _, match := range ch.Atleast(max, ahash, s.PartialAhash[i][partialHash]) {
|
||||
_, alreadyMatched := foundHashes[match.Hash]
|
||||
if matchedResults, ok := s.FullAhash[match.Hash]; ok && !alreadyMatched {
|
||||
foundHashes[match.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foundHashes = make(map[uint64]struct{})
|
||||
if dhash != 0 {
|
||||
for i, partialHash := range ch.SplitHash(dhash) {
|
||||
for _, match := range ch.Atleast(max, dhash, s.PartialDhash[i][partialHash]) {
|
||||
_, alreadyMatched := foundHashes[match.Hash]
|
||||
if matchedResults, ok := s.FullDhash[match.Hash]; ok && !alreadyMatched {
|
||||
foundHashes[match.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foundHashes = make(map[uint64]struct{})
|
||||
if phash != 0 {
|
||||
for i, partialHash := range ch.SplitHash(phash) {
|
||||
for _, match := range ch.Atleast(max, phash, s.PartialPhash[i][partialHash]) {
|
||||
_, alreadyMatched := foundHashes[match.Hash]
|
||||
if matchedResults, ok := s.FullPhash[match.Hash]; ok && !alreadyMatched {
|
||||
foundHashes[match.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return foundMatches
|
||||
}
|
||||
|
||||
type SimpleResult struct {
|
||||
Distance int
|
||||
IDList ch.IDList
|
||||
@ -323,67 +291,31 @@ func getSimpleResults(fullResults []ch.Result) []SimpleResult {
|
||||
simpleResult := make([]SimpleResult, 0, len(fullResults))
|
||||
|
||||
slices.SortFunc(fullResults, func(a, b ch.Result) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
return cmp.Compare(a.Distance, b.Distance) * -1 // Reverses sort
|
||||
})
|
||||
|
||||
// Deduplicate IDs
|
||||
idToDistance := make(map[string]int)
|
||||
distance := make(map[int]SimpleResult)
|
||||
|
||||
for _, fullResult := range fullResults {
|
||||
for _, id := range fullResult.IDs {
|
||||
if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance {
|
||||
idToDistance[id] = fullResult.Distance
|
||||
simple, ok := distance[fullResult.Distance]
|
||||
if !ok {
|
||||
simple.IDList = make(ch.IDList)
|
||||
}
|
||||
for source, ids := range fullResult.IDs {
|
||||
for _, id := range ids {
|
||||
simple.IDList[source] = ch.Insert(simple.IDList[source], id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group by distance
|
||||
distanceMap := make(map[int]SimpleResult)
|
||||
for id, distance := range idToDistance {
|
||||
var (
|
||||
sr SimpleResult
|
||||
ok bool
|
||||
)
|
||||
if sr, ok = distanceMap[distance]; !ok {
|
||||
sr.IDList = make(ch.IDList)
|
||||
}
|
||||
sourceID := strings.SplitN(id, ":", 2)
|
||||
sr.Distance = distance
|
||||
sr.IDList[ch.Source(sourceID[0])] = append(sr.IDList[ch.Source(sourceID[0])], sourceID[1])
|
||||
distanceMap[distance] = sr
|
||||
}
|
||||
|
||||
// turn into array
|
||||
for _, sr := range distanceMap {
|
||||
for _, sr := range distance {
|
||||
simpleResult = append(simpleResult, sr)
|
||||
}
|
||||
return simpleResult
|
||||
}
|
||||
|
||||
type APIResult struct {
|
||||
IDList ch.IDList
|
||||
Distance int
|
||||
Hash ch.ImageHash
|
||||
}
|
||||
|
||||
func getResults(fullResults []ch.Result) []APIResult {
|
||||
apiResults := make([]APIResult, 0, len(fullResults))
|
||||
for _, res := range fullResults {
|
||||
idlist := make(ch.IDList)
|
||||
for _, id := range res.IDs {
|
||||
sourceID := strings.SplitN(id, ":", 2)
|
||||
idlist[ch.Source(sourceID[0])] = append(idlist[ch.Source(sourceID[0])], sourceID[1])
|
||||
}
|
||||
apiResults = append(apiResults,
|
||||
APIResult{
|
||||
Distance: res.Distance,
|
||||
Hash: res.Hash,
|
||||
IDList: idlist,
|
||||
},
|
||||
)
|
||||
}
|
||||
return apiResults
|
||||
}
|
||||
|
||||
type result struct {
|
||||
Results any `json:"results,omitempty"`
|
||||
Msg string `json:"msg,omitempty"`
|
||||
@ -411,19 +343,20 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
var (
|
||||
values = r.URL.Query()
|
||||
ahashStr = strings.TrimSpace(values.Get("ahash"))
|
||||
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
||||
phashStr = strings.TrimSpace(values.Get("phash"))
|
||||
maxStr = strings.TrimSpace(values.Get("max"))
|
||||
skipNonExact = strings.ToLower(strings.TrimSpace(values.Get("skipNonExact"))) != "false"
|
||||
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
||||
ahash uint64
|
||||
dhash uint64
|
||||
phash uint64
|
||||
max int = 8
|
||||
max_tmp int
|
||||
err error
|
||||
values = r.URL.Query()
|
||||
ahashStr = strings.TrimSpace(values.Get("ahash"))
|
||||
dhashStr = strings.TrimSpace(values.Get("dhash"))
|
||||
phashStr = strings.TrimSpace(values.Get("phash"))
|
||||
maxStr = strings.TrimSpace(values.Get("max"))
|
||||
exactOnly = strings.ToLower(strings.TrimSpace(values.Get("exactOnly"))) != "false"
|
||||
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
|
||||
ahash uint64
|
||||
dhash uint64
|
||||
phash uint64
|
||||
max int = 8
|
||||
max_tmp int
|
||||
err error
|
||||
hashes []ch.Hash
|
||||
)
|
||||
|
||||
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
|
||||
@ -431,16 +364,25 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if ahash > 0 {
|
||||
hashes = append(hashes, ch.Hash{ahash, goimagehash.AHash})
|
||||
}
|
||||
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
|
||||
log.Printf("could not parse dhash: %s", dhashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if dhash > 0 {
|
||||
hashes = append(hashes, ch.Hash{dhash, goimagehash.DHash})
|
||||
}
|
||||
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
|
||||
log.Printf("could not parse phash: %s", phashStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"})
|
||||
return
|
||||
}
|
||||
if phash > 0 {
|
||||
hashes = append(hashes, ch.Hash{phash, goimagehash.PHash})
|
||||
}
|
||||
if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" {
|
||||
log.Printf("Invalid Max: %s", maxStr)
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)})
|
||||
@ -455,13 +397,27 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
|
||||
writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)})
|
||||
return
|
||||
}
|
||||
matches := s.getMatches(ahash, dhash, phash, max, skipNonExact)
|
||||
matches, err := s.hashes.GetMatches(hashes, max, exactOnly)
|
||||
slices.SortFunc(matches, func(a ch.Result, b ch.Result) int {
|
||||
return cmp.Compare(a.Distance, b.Distance)
|
||||
})
|
||||
log.Println(err)
|
||||
if len(matches) > 0 {
|
||||
var msg string = ""
|
||||
if err != nil {
|
||||
msg = err.Error()
|
||||
}
|
||||
if simple {
|
||||
writeJson(w, http.StatusOK, result{Results: getSimpleResults(matches)})
|
||||
writeJson(w, http.StatusOK, result{
|
||||
Results: getSimpleResults(matches),
|
||||
Msg: msg,
|
||||
})
|
||||
return
|
||||
}
|
||||
writeJson(w, http.StatusOK, result{Results: getResults(matches)})
|
||||
writeJson(w, http.StatusOK, result{
|
||||
Results: matches,
|
||||
Msg: msg,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
@ -503,69 +459,14 @@ func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
default:
|
||||
}
|
||||
s.hashingQueue <- ch.Im{Im: i, Format: format, Domain: ch.Source(domain), ID: ID, Path: ""}
|
||||
s.hashingQueue <- ch.Im{Im: i, Format: format, ID: ch.ID{Domain: ch.Source(domain), ID: ID}, Path: ""}
|
||||
writeJson(w, http.StatusOK, result{Msg: "Success"})
|
||||
}
|
||||
|
||||
func (s *Server) MapHashes(hash ch.Hash) {
|
||||
s.hashMutex.Lock()
|
||||
defer s.hashMutex.Unlock()
|
||||
s.mapHashes(hash.Ahash.GetHash(), hash.Dhash.GetHash(), hash.Phash.GetHash(), hash.Domain, hash.ID)
|
||||
}
|
||||
|
||||
func (s *Server) mapHashes(ahash, dhash, phash uint64, domain ch.Source, id string) {
|
||||
|
||||
if _, ok := s.ids[domain]; !ok {
|
||||
s.ids[domain] = make(map[string]struct{})
|
||||
}
|
||||
s.ids[domain][id] = struct{}{}
|
||||
|
||||
if _, ok := s.FullAhash[ahash]; !ok {
|
||||
s.FullAhash[ahash] = make([]string, 0, 3)
|
||||
}
|
||||
s.FullAhash[ahash] = ch.Insert(s.FullAhash[ahash], string(domain)+":"+id)
|
||||
|
||||
if _, ok := s.FullDhash[dhash]; !ok {
|
||||
s.FullDhash[dhash] = make([]string, 0, 3)
|
||||
}
|
||||
s.FullDhash[dhash] = ch.Insert(s.FullDhash[dhash], string(domain)+":"+id)
|
||||
|
||||
if _, ok := s.FullPhash[phash]; !ok {
|
||||
s.FullPhash[phash] = make([]string, 0, 3)
|
||||
}
|
||||
s.FullPhash[phash] = ch.Insert(s.FullPhash[phash], string(domain)+":"+id)
|
||||
|
||||
for i, partialHash := range ch.SplitHash(ahash) {
|
||||
s.PartialAhash[i][partialHash] = append(s.PartialAhash[i][partialHash], ahash)
|
||||
}
|
||||
for i, partialHash := range ch.SplitHash(dhash) {
|
||||
s.PartialDhash[i][partialHash] = append(s.PartialDhash[i][partialHash], dhash)
|
||||
}
|
||||
for i, partialHash := range ch.SplitHash(phash) {
|
||||
s.PartialPhash[i][partialHash] = append(s.PartialPhash[i][partialHash], phash)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) initHashes() {
|
||||
for i := range s.PartialAhash {
|
||||
s.PartialAhash[i] = make(map[uint8][]uint64)
|
||||
}
|
||||
for i := range s.PartialDhash {
|
||||
s.PartialDhash[i] = make(map[uint8][]uint64)
|
||||
}
|
||||
for i := range s.PartialPhash {
|
||||
s.PartialPhash[i] = make(map[uint8][]uint64)
|
||||
}
|
||||
s.FullAhash = make(map[uint64][]string)
|
||||
s.FullDhash = make(map[uint64][]string)
|
||||
s.FullPhash = make(map[uint64][]string)
|
||||
s.ids = make(map[ch.Source]map[string]struct{})
|
||||
}
|
||||
|
||||
func (s *Server) mapper(done func()) {
|
||||
defer done()
|
||||
for hash := range s.mappingQueue {
|
||||
s.MapHashes(hash)
|
||||
s.hashes.MapHashes(hash)
|
||||
}
|
||||
}
|
||||
|
||||
@ -575,7 +476,7 @@ func (s *Server) hasher(workerID int, done func()) {
|
||||
start := time.Now()
|
||||
|
||||
hash := ch.HashImage(image)
|
||||
if hash.Domain == "" {
|
||||
if hash.ID.Domain == "" || hash.ID.ID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
@ -588,7 +489,7 @@ func (s *Server) hasher(workerID int, done func()) {
|
||||
}
|
||||
|
||||
elapsed := time.Since(start)
|
||||
log.Printf("Hashing took %v: worker: %v. path: %s ahash: %064b id: %s\n", elapsed, workerID, image.Path, hash.Ahash.GetHash(), hash.ID)
|
||||
log.Printf("Hashing took %v: worker: %v. path: %s %s: %064b id: %s\n", elapsed, workerID, image.Path, hash.Hashes[0].Kind, hash.Hashes[0].Hash, hash.ID)
|
||||
}
|
||||
}
|
||||
|
||||
@ -605,7 +506,11 @@ func (s *Server) reader(workerID int, done func()) {
|
||||
}
|
||||
file.Close()
|
||||
|
||||
im := ch.Im{Im: i, Format: format, Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path)), Path: path}
|
||||
im := ch.Im{
|
||||
Im: i, Format: format,
|
||||
ID: ch.ID{Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path))},
|
||||
Path: path,
|
||||
}
|
||||
select {
|
||||
case <-s.quit:
|
||||
log.Println("Recieved quit")
|
||||
@ -616,94 +521,48 @@ func (s *Server) reader(workerID int, done func()) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) encodeHashes(e Encoder) ([]byte, error) {
|
||||
hashes := make(savedHashes)
|
||||
for source, ids := range s.ids {
|
||||
hashes[source] = make(map[string][3]uint64, len(ids))
|
||||
}
|
||||
for hash, idlist := range s.FullAhash {
|
||||
for _, id := range idlist {
|
||||
sourceID := strings.SplitN(id, ":", 2)
|
||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
||||
h[0] = hash
|
||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
||||
}
|
||||
}
|
||||
for hash, idlist := range s.FullDhash {
|
||||
for _, id := range idlist {
|
||||
sourceID := strings.SplitN(id, ":", 2)
|
||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
||||
h[1] = hash
|
||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
||||
}
|
||||
|
||||
}
|
||||
for hash, idlist := range s.FullPhash {
|
||||
for _, id := range idlist {
|
||||
sourceID := strings.SplitN(id, ":", 2)
|
||||
h := hashes[ch.Source(sourceID[0])][sourceID[1]]
|
||||
h[2] = hash
|
||||
hashes[ch.Source(sourceID[0])][sourceID[1]] = h
|
||||
}
|
||||
|
||||
}
|
||||
return e(hashes)
|
||||
}
|
||||
|
||||
// EncodeHashes must have a lock to s.hashMutex
|
||||
func (s *Server) EncodeHashes(format Format) ([]byte, error) {
|
||||
var encoder Encoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
return s.encodeHashes(msgpack.Marshal)
|
||||
encoder = msgpack.Marshal
|
||||
case JSON:
|
||||
return s.encodeHashes(json.Marshal)
|
||||
|
||||
encoder = json.Marshal
|
||||
default:
|
||||
return nil, fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) decodeHashes(d Decoder, hashes []byte) error {
|
||||
loadedHashes := make(savedHashes)
|
||||
err := d(hashes, &loadedHashes)
|
||||
hashes, err := s.hashes.EncodeHashes()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for domain, ids := range loadedHashes {
|
||||
for id := range ids {
|
||||
if _, ok := s.ids[domain]; ok {
|
||||
s.ids[domain][id] = struct{}{}
|
||||
} else {
|
||||
s.ids[domain] = make(map[string]struct{})
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, sourceHashes := range loadedHashes {
|
||||
s.FullAhash = make(map[uint64][]string, len(sourceHashes))
|
||||
s.FullDhash = make(map[uint64][]string, len(sourceHashes))
|
||||
s.FullPhash = make(map[uint64][]string, len(sourceHashes))
|
||||
break
|
||||
}
|
||||
for domain, sourceHashes := range loadedHashes {
|
||||
for id, h := range sourceHashes {
|
||||
s.mapHashes(h[0], h[1], h[2], domain, id)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return encoder(hashes)
|
||||
}
|
||||
|
||||
// DecodeHashes must have a lock to s.hashMutex
|
||||
func (s *Server) DecodeHashes(format Format, hashes []byte) error {
|
||||
var decoder Decoder
|
||||
switch format {
|
||||
case Msgpack:
|
||||
return s.decodeHashes(msgpack.Unmarshal, hashes)
|
||||
decoder = msgpack.Unmarshal
|
||||
case JSON:
|
||||
return s.decodeHashes(json.Unmarshal, hashes)
|
||||
decoder = json.Unmarshal
|
||||
|
||||
default:
|
||||
return fmt.Errorf("Unknown format: %v", format)
|
||||
}
|
||||
loadedHashes := ch.SavedHashes{}
|
||||
err := decoder(hashes, &loadedHashes)
|
||||
if err != nil || len(loadedHashes.IDs) == 0 {
|
||||
fmt.Println("Failed to load hashes, checking if they are old hashes", err)
|
||||
oldHashes := make(ch.OldSavedHashes)
|
||||
if err = decoder(hashes, &oldHashes); err != nil {
|
||||
return err
|
||||
}
|
||||
loadedHashes = ch.ConvertSavedHashes(oldHashes)
|
||||
}
|
||||
|
||||
return s.hashes.DecodeHashes(loadedHashes)
|
||||
}
|
||||
|
||||
func (s *Server) HashLocalImages(opts Opts) {
|
||||
@ -718,10 +577,10 @@ func (s *Server) HashLocalImages(opts Opts) {
|
||||
log.Println("Recieved quit")
|
||||
}
|
||||
err := s.httpServer.Shutdown(context.TODO())
|
||||
fmt.Println("Err:", err)
|
||||
log.Println("Err:", err)
|
||||
return
|
||||
}
|
||||
fmt.Println("Hashing covers at ", opts.coverPath)
|
||||
log.Println("Hashing covers at ", opts.coverPath)
|
||||
start := time.Now()
|
||||
err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
@ -747,7 +606,7 @@ func (s *Server) HashLocalImages(opts Opts) {
|
||||
return nil
|
||||
})
|
||||
elapsed := time.Since(start)
|
||||
fmt.Println("Err:", err, "local hashing took", elapsed)
|
||||
log.Println("Err:", err, "local hashing took", elapsed)
|
||||
|
||||
sig := <-s.signalQueue
|
||||
if !alreadyQuit {
|
||||
@ -758,6 +617,22 @@ func (s *Server) HashLocalImages(opts Opts) {
|
||||
}()
|
||||
}
|
||||
|
||||
func initializeStorage(opts Opts) (ch.HashStorage, error) {
|
||||
switch opts.storageType {
|
||||
case Map:
|
||||
return ch.NewMapStorage()
|
||||
case BasicMap:
|
||||
return ch.NewBasicMapStorage()
|
||||
case Sqlite:
|
||||
return ch.NewSqliteStorage("sqlite", opts.sqlitePath)
|
||||
case Sqlite3:
|
||||
return ch.NewSqliteStorage("sqlite3", opts.sqlitePath)
|
||||
case VPTree:
|
||||
return ch.NewVPStorage()
|
||||
}
|
||||
return nil, errors.New("Unknown storage type provided")
|
||||
}
|
||||
|
||||
func startServer(opts Opts) {
|
||||
if opts.cpuprofile != "" {
|
||||
f, err := os.Create(opts.cpuprofile)
|
||||
@ -769,13 +644,13 @@ func startServer(opts Opts) {
|
||||
}
|
||||
|
||||
mux := http.NewServeMux()
|
||||
|
||||
server := Server{
|
||||
// token: make(chan *oidc.Tokens),
|
||||
quit: make(chan struct{}),
|
||||
signalQueue: make(chan os.Signal, 1),
|
||||
readerQueue: make(chan string, 1120130), // Number gotten from checking queue size
|
||||
readerQueue: make(chan string, 100),
|
||||
hashingQueue: make(chan ch.Im),
|
||||
mappingQueue: make(chan ch.Hash),
|
||||
mappingQueue: make(chan ch.ImageHash),
|
||||
mux: mux,
|
||||
httpServer: &http.Server{
|
||||
Addr: ":8080",
|
||||
@ -786,29 +661,33 @@ func startServer(opts Opts) {
|
||||
},
|
||||
}
|
||||
Notify(server.signalQueue)
|
||||
imaging.SetMaxProcs(1)
|
||||
fmt.Println("init hashes")
|
||||
server.initHashes()
|
||||
// server.setupOauthHandlers()
|
||||
fmt.Println("init handlers")
|
||||
var err error
|
||||
log.Println("init hashes")
|
||||
server.hashes, err = initializeStorage(opts)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
log.Println("init handlers")
|
||||
server.setupAppHandlers()
|
||||
fmt.Println("init hashers")
|
||||
|
||||
log.Println("init hashers")
|
||||
rwg := sync.WaitGroup{}
|
||||
for i := range 10 {
|
||||
rwg.Add(1)
|
||||
go server.reader(i, func() { fmt.Println("Reader completed"); rwg.Done() })
|
||||
go server.reader(i, func() { log.Println("Reader completed"); rwg.Done() })
|
||||
}
|
||||
|
||||
hwg := sync.WaitGroup{}
|
||||
for i := range 10 {
|
||||
hwg.Add(1)
|
||||
go server.hasher(i, func() { fmt.Println("Hasher completed"); hwg.Done() })
|
||||
go server.hasher(i, func() { log.Println("Hasher completed"); hwg.Done() })
|
||||
}
|
||||
|
||||
fmt.Println("init mapper")
|
||||
log.Println("init mapper")
|
||||
mwg := sync.WaitGroup{}
|
||||
mwg.Add(1)
|
||||
go server.mapper(func() { fmt.Println("Mapper completed"); mwg.Done() })
|
||||
go server.mapper(func() { log.Println("Mapper completed"); mwg.Done() })
|
||||
|
||||
if opts.loadEmbeddedHashes && len(ch.Hashes) != 0 {
|
||||
var err error
|
||||
@ -829,7 +708,7 @@ func startServer(opts Opts) {
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded embedded %s hashes ahashes: %d dhashes: %d phashes: %d\n", format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
|
||||
fmt.Printf("Loaded embedded %s hashes\n", format)
|
||||
} else {
|
||||
if f, err := os.Open(opts.hashesPath); err == nil {
|
||||
var buf io.Reader = f
|
||||
@ -854,35 +733,35 @@ func startServer(opts Opts) {
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err))
|
||||
}
|
||||
fmt.Printf("Loaded hashes from %q %s hashes ahashes: %d dhashes: %d phashes: %d\n", opts.hashesPath, format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash))
|
||||
fmt.Printf("Loaded hashes from %q %s\n", opts.hashesPath, format)
|
||||
} else {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
fmt.Println("No saved hashes to load")
|
||||
log.Println("No saved hashes to load")
|
||||
} else {
|
||||
fmt.Println("Unable to load saved hashes", err)
|
||||
log.Println("Unable to load saved hashes", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
server.HashLocalImages(opts)
|
||||
|
||||
fmt.Println("Listening on ", server.httpServer.Addr)
|
||||
err := server.httpServer.ListenAndServe()
|
||||
log.Println("Listening on ", server.httpServer.Addr)
|
||||
err = server.httpServer.ListenAndServe()
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
log.Println(err)
|
||||
}
|
||||
close(server.readerQueue)
|
||||
fmt.Println("waiting on readers")
|
||||
log.Println("waiting on readers")
|
||||
rwg.Wait()
|
||||
for range server.readerQueue {
|
||||
}
|
||||
close(server.hashingQueue)
|
||||
fmt.Println("waiting on hashers")
|
||||
log.Println("waiting on hashers")
|
||||
hwg.Wait()
|
||||
for range server.hashingQueue {
|
||||
}
|
||||
close(server.mappingQueue)
|
||||
fmt.Println("waiting on mapper")
|
||||
log.Println("waiting on mapper")
|
||||
mwg.Wait()
|
||||
for range server.mappingQueue {
|
||||
}
|
||||
@ -897,14 +776,14 @@ func startServer(opts Opts) {
|
||||
gzw := gzip.NewWriter(f)
|
||||
_, err := gzw.Write(encodedHashes)
|
||||
if err != nil {
|
||||
fmt.Println("Failed to write hashes", err)
|
||||
log.Println("Failed to write hashes", err)
|
||||
} else {
|
||||
fmt.Println("Successfully saved hashes")
|
||||
log.Println("Successfully saved hashes")
|
||||
}
|
||||
gzw.Close()
|
||||
f.Close()
|
||||
} else {
|
||||
fmt.Println("Unabled to save hashes", err)
|
||||
log.Println("Unabled to save hashes", err)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Unable to encode hashes as %v: %v", opts.format, err)
|
||||
|
17
cmd/comic-hasher/tmp.go
Normal file
17
cmd/comic-hasher/tmp.go
Normal file
@ -0,0 +1,17 @@
|
||||
//go:build main
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
tmp := make([]string, 0, 932456)
|
||||
for range 932460 {
|
||||
tmp = append(tmp, "comicvine.gamespot.com:123456")
|
||||
}
|
||||
fmt.Println(len(tmp))
|
||||
time.Sleep(time.Minute)
|
||||
}
|
@ -106,9 +106,9 @@ func main() {
|
||||
debugImage(debugim, 8, 8)
|
||||
}
|
||||
|
||||
hash := ch.HashImage(ch.Im{Im: im, Format: format, Domain: ch.Source(ch.ComicVine), ID: "nothing"})
|
||||
hash := ch.HashImage(ch.Im{Im: im, Format: format, ID: ch.ID{Domain: ch.Source(ch.ComicVine), ID: "nothing"}})
|
||||
|
||||
fmt.Println("ahash: ", hash.Ahash.BinString())
|
||||
fmt.Println("dhash: ", hash.Dhash.BinString())
|
||||
fmt.Println("phash: ", hash.Phash.BinString())
|
||||
fmt.Println("ahash: ", goimagehash.NewImageHash(hash.Hashes[0].Hash, hash.Hashes[0].Kind).BinString())
|
||||
fmt.Println("dhash: ", goimagehash.NewImageHash(hash.Hashes[1].Hash, hash.Hashes[1].Kind).BinString())
|
||||
fmt.Println("phash: ", goimagehash.NewImageHash(hash.Hashes[2].Hash, hash.Hashes[2].Kind).BinString())
|
||||
}
|
||||
|
27
go.mod
27
go.mod
@ -1,16 +1,18 @@
|
||||
module gitea.narnian.us/lordwelch/comic-hasher
|
||||
|
||||
go 1.22.1
|
||||
|
||||
toolchain go1.22.2
|
||||
go 1.23.0
|
||||
|
||||
require (
|
||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00
|
||||
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09
|
||||
github.com/fmartingr/go-comicinfo/v2 v2.0.2
|
||||
github.com/kr/pretty v0.1.0
|
||||
github.com/mattn/go-sqlite3 v1.14.22
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8
|
||||
github.com/ncruces/go-sqlite3 v0.18.1
|
||||
golang.org/x/image v0.19.0
|
||||
golang.org/x/text v0.17.0
|
||||
gonum.org/v1/gonum v0.15.1
|
||||
modernc.org/sqlite v1.32.0
|
||||
)
|
||||
|
||||
require (
|
||||
@ -24,19 +26,36 @@ require (
|
||||
github.com/bodgit/sevenzip v1.3.0 // indirect
|
||||
github.com/bodgit/windows v1.0.0 // indirect
|
||||
github.com/connesc/cipherio v0.2.1 // indirect
|
||||
github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 // indirect
|
||||
github.com/dsnet/compress v0.0.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/golang/mock v1.6.0 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.0.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/klauspost/compress v1.15.9 // indirect
|
||||
github.com/klauspost/pgzip v1.2.5 // indirect
|
||||
github.com/kr/text v0.1.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||
github.com/ncruces/julianday v1.0.0 // indirect
|
||||
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.15 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/tetratelabs/wazero v1.8.0 // indirect
|
||||
github.com/therootcompany/xz v1.0.1 // indirect
|
||||
github.com/ulikunitz/xz v0.5.10 // indirect
|
||||
go4.org v0.0.0-20200411211856-f5505b9728dd // indirect
|
||||
golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
|
||||
golang.org/x/sys v0.24.0 // indirect
|
||||
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
|
||||
modernc.org/libc v1.55.3 // indirect
|
||||
modernc.org/mathutil v1.6.0 // indirect
|
||||
modernc.org/memory v1.8.0 // indirect
|
||||
modernc.org/strutil v1.2.0 // indirect
|
||||
modernc.org/token v1.1.0 // indirect
|
||||
)
|
||||
|
||||
replace golang.org/x/text v0.17.0 => github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f
|
||||
|
61
go.sum
61
go.sum
@ -42,6 +42,8 @@ github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09/go.mod h1
|
||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
|
||||
github.com/fmartingr/go-comicinfo/v2 v2.0.2 h1:VppvrHr8C4+iktBTOd7vzTMNbVecZ7F/Ji1kPTOIGg4=
|
||||
@ -75,7 +77,11 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi
|
||||
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo=
|
||||
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
|
||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
|
||||
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
|
||||
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
|
||||
@ -84,6 +90,8 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
@ -94,13 +102,25 @@ github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHU
|
||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
|
||||
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk=
|
||||
github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM=
|
||||
github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A=
|
||||
github.com/ncruces/go-sqlite3 v0.18.1 h1:iN8IMZV5EMxpH88NUac9vId23eTKNFUhP7jgY0EBbNc=
|
||||
github.com/ncruces/go-sqlite3 v0.18.1/go.mod h1:eEOyZnW1dGTJ+zDpMuzfYamEUBtdFz5zeYhqLBtHxvM=
|
||||
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
|
||||
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
|
||||
github.com/ncruces/julianday v1.0.0/go.mod h1:Dusn2KvZrrovOMJuOt0TNXL6tB7U2E8kvza5fFc9G7g=
|
||||
github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk=
|
||||
github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY=
|
||||
github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0=
|
||||
@ -108,6 +128,8 @@ github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFu
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
@ -115,6 +137,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/tetratelabs/wazero v1.8.0 h1:iEKu0d4c2Pd+QSRieYbnQC9yiFlMS9D+Jr0LsRmcF4g=
|
||||
github.com/tetratelabs/wazero v1.8.0/go.mod h1:yAI0XTsMBhREkM/YDAK/zNou3GoiAce1P6+rp/wQhjs=
|
||||
github.com/therootcompany/xz v1.0.1 h1:CmOtsn1CbtmyYiusbfmhmkpAAETj0wBIH6kCYaX+xzw=
|
||||
github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0BWbMn8qNMY=
|
||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||
@ -166,6 +190,8 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
@ -193,6 +219,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
|
||||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@ -209,6 +237,9 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
|
||||
golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
@ -242,10 +273,14 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK
|
||||
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||
@ -291,6 +326,32 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh
|
||||
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
|
||||
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
|
||||
modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ=
|
||||
modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
|
||||
modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y=
|
||||
modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s=
|
||||
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
|
||||
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
|
||||
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
|
||||
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
|
||||
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=
|
||||
modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=
|
||||
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
|
||||
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
|
||||
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
|
||||
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
|
||||
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
|
||||
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
|
||||
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
|
||||
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
|
||||
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
|
||||
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
|
||||
modernc.org/sqlite v1.32.0 h1:6BM4uGza7bWypsw4fdLRsLxut6bHe4c58VeqjRgST8s=
|
||||
modernc.org/sqlite v1.32.0/go.mod h1:UqoylwmTb9F+IqXERT8bW9zzOWN8qwAIcLdzeBZs4hA=
|
||||
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
|
||||
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
|
||||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
||||
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
|
||||
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
|
||||
|
165
hashing.go
165
hashing.go
@ -50,35 +50,124 @@ type Match struct {
|
||||
}
|
||||
|
||||
type ID struct {
|
||||
Domain, ID string
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
IDs []string // domain:id
|
||||
Distance int
|
||||
Hash ImageHash
|
||||
}
|
||||
|
||||
type Im struct {
|
||||
Im image.Image
|
||||
Format string
|
||||
Domain Source
|
||||
ID, Path string
|
||||
}
|
||||
|
||||
type Hash struct {
|
||||
Ahash *goimagehash.ImageHash
|
||||
Dhash *goimagehash.ImageHash
|
||||
Phash *goimagehash.ImageHash
|
||||
Domain Source
|
||||
ID string
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
IDs IDList
|
||||
Distance int
|
||||
Hash Hash
|
||||
}
|
||||
|
||||
type Im struct {
|
||||
Im image.Image
|
||||
Format string
|
||||
Path string
|
||||
ID ID
|
||||
}
|
||||
|
||||
type ImageHash struct {
|
||||
Hashes []Hash
|
||||
ID ID
|
||||
}
|
||||
|
||||
type Hash struct {
|
||||
Hash uint64
|
||||
Kind goimagehash.Kind
|
||||
}
|
||||
|
||||
// IDList is a map of domain to ID eg IDs["comicvine.gamespot.com"] = []string{"1235"}
|
||||
// Maps are extremely expensive in go for small maps this should only be used to return info to a user no internal code should use this
|
||||
type IDList map[Source][]string
|
||||
|
||||
type OldSavedHashes map[Source]map[string][3]uint64
|
||||
|
||||
type SavedHashes struct {
|
||||
IDs [][]ID
|
||||
Hashes [3]map[uint64]int
|
||||
}
|
||||
|
||||
func ToIDList(ids []ID) IDList {
|
||||
idlist := IDList{}
|
||||
for _, id := range ids {
|
||||
idlist[id.Domain] = Insert(idlist[id.Domain], id.ID)
|
||||
}
|
||||
return idlist
|
||||
}
|
||||
func InsertID(ids []ID, id ID) []ID {
|
||||
index, itemFound := slices.BinarySearchFunc(ids, id, func(e ID, t ID) int {
|
||||
return cmp.Or(
|
||||
cmp.Compare(e.Domain, t.Domain),
|
||||
cmp.Compare(e.ID, t.ID),
|
||||
)
|
||||
})
|
||||
if itemFound {
|
||||
return ids
|
||||
}
|
||||
return slices.Insert(ids, index, id)
|
||||
}
|
||||
func (s *SavedHashes) InsertHash(hash Hash, id ID) {
|
||||
for i, h := range s.Hashes {
|
||||
if h == nil {
|
||||
s.Hashes[i] = make(map[uint64]int)
|
||||
}
|
||||
}
|
||||
|
||||
hashType := int(hash.Kind) - 1
|
||||
idx, hashFound := s.Hashes[hashType][hash.Hash]
|
||||
if !hashFound {
|
||||
idx = len(s.IDs)
|
||||
s.IDs = append(s.IDs, make([]ID, 0, 3))
|
||||
}
|
||||
s.IDs[idx] = InsertID(s.IDs[idx], id)
|
||||
s.Hashes[hashType][hash.Hash] = idx
|
||||
}
|
||||
|
||||
func ConvertSavedHashes(oldHashes OldSavedHashes) SavedHashes {
|
||||
t := SavedHashes{}
|
||||
idcount := 0
|
||||
for _, ids := range oldHashes {
|
||||
idcount += len(ids)
|
||||
}
|
||||
t.IDs = make([][]ID, 0, idcount)
|
||||
t.Hashes[0] = make(map[uint64]int, idcount)
|
||||
t.Hashes[1] = make(map[uint64]int, idcount)
|
||||
t.Hashes[2] = make(map[uint64]int, idcount)
|
||||
for domain, sourceHashes := range oldHashes {
|
||||
for id, hashes := range sourceHashes {
|
||||
idx := len(t.IDs)
|
||||
t.IDs = append(t.IDs, []ID{{domain, id}})
|
||||
for hashType, hash := range hashes {
|
||||
t.Hashes[hashType][hash] = idx
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("Expected number of IDs", idcount)
|
||||
idcount = 0
|
||||
for _, ids := range t.IDs {
|
||||
idcount += len(ids)
|
||||
}
|
||||
fmt.Println("length of hashes", len(t.Hashes[0])+len(t.Hashes[1])+len(t.Hashes[2]))
|
||||
fmt.Println("Length of ID lists", len(t.IDs))
|
||||
fmt.Println("Total number of IDs", idcount)
|
||||
return t
|
||||
}
|
||||
|
||||
type NewIDs struct {
|
||||
OldID ID
|
||||
NewID ID
|
||||
}
|
||||
|
||||
type HashStorage interface {
|
||||
GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error)
|
||||
MapHashes(ImageHash)
|
||||
DecodeHashes(hashes SavedHashes) error
|
||||
EncodeHashes() (SavedHashes, error)
|
||||
AssociateIDs(newIDs []NewIDs)
|
||||
GetIDs(id ID) IDList
|
||||
}
|
||||
|
||||
func Atleast(maxDistance int, searchHash uint64, hashes []uint64) []Match {
|
||||
matchingHashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
|
||||
for _, storedHash := range hashes {
|
||||
@ -98,47 +187,49 @@ func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
|
||||
return slices.Insert(slice, index, item)
|
||||
}
|
||||
|
||||
func InsertIdx[S ~[]E, E cmp.Ordered](slice S, item E) (S, int) {
|
||||
index, itemFound := slices.BinarySearch(slice, item)
|
||||
if itemFound {
|
||||
return slice, index
|
||||
}
|
||||
return slices.Insert(slice, index, item), index
|
||||
}
|
||||
|
||||
func MemStats() uint64 {
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
return m.Alloc
|
||||
}
|
||||
|
||||
func HashImage(i Im) Hash {
|
||||
func HashImage(i Im) ImageHash {
|
||||
if i.Format == "webp" {
|
||||
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
|
||||
}
|
||||
|
||||
var (
|
||||
err error = nil
|
||||
ahash *goimagehash.ImageHash
|
||||
dhash *goimagehash.ImageHash
|
||||
phash *goimagehash.ImageHash
|
||||
err error
|
||||
)
|
||||
|
||||
ahash, err = goimagehash.AverageHash(i.Im)
|
||||
ahash, err := goimagehash.AverageHash(i.Im)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
|
||||
log.Println(msg)
|
||||
return Hash{}
|
||||
return ImageHash{}
|
||||
}
|
||||
dhash, err = goimagehash.DifferenceHash(i.Im)
|
||||
dhash, err := goimagehash.DifferenceHash(i.Im)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
|
||||
log.Println(msg)
|
||||
return Hash{}
|
||||
return ImageHash{}
|
||||
}
|
||||
phash, err = goimagehash.PerceptionHash(i.Im)
|
||||
phash, err := goimagehash.PerceptionHash(i.Im)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Failed to phash Image: %s", err)
|
||||
log.Println(msg)
|
||||
return Hash{}
|
||||
return ImageHash{}
|
||||
}
|
||||
return Hash{
|
||||
Ahash: ahash,
|
||||
Dhash: dhash,
|
||||
Phash: phash,
|
||||
Domain: i.Domain,
|
||||
return ImageHash{
|
||||
Hashes: []Hash{{ahash.GetHash(), ahash.GetKind()}, {dhash.GetHash(), dhash.GetKind()}, {phash.GetHash(), phash.GetKind()}},
|
||||
ID: i.ID,
|
||||
}
|
||||
}
|
||||
@ -155,5 +246,3 @@ func SplitHash(hash uint64) [8]uint8 {
|
||||
uint8((hash & H0) >> Shift0),
|
||||
}
|
||||
}
|
||||
|
||||
type IDList map[Source][]string // IDs is a map of domain to ID eg IDs['comicvine.gamespot.com'] = []string{"1235"}
|
||||
|
147
map.go
Normal file
147
map.go
Normal file
@ -0,0 +1,147 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type MapStorage struct {
|
||||
basicMapStorage
|
||||
partialHash [3][8]map[uint8][]uint64
|
||||
}
|
||||
|
||||
func (m *MapStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var foundMatches []Result
|
||||
m.hashMutex.RLock()
|
||||
defer m.hashMutex.RUnlock()
|
||||
resetTime()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
for _, hash := range hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
idlist := m.hashes[hashType][hash.Hash]
|
||||
if idlist != nil && len(*idlist) > 0 {
|
||||
foundMatches = append(foundMatches, Result{
|
||||
Distance: 0,
|
||||
Hash: hash,
|
||||
IDs: ToIDList(*idlist),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// If we have exact matches don't bother with other matches
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
logTime("Search Exact")
|
||||
}
|
||||
|
||||
totalPartialHashes := 0
|
||||
for _, searchHash := range hashes {
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
hashType := int(searchHash.Kind) - 1
|
||||
for i, partialHash := range SplitHash(searchHash.Hash) {
|
||||
partialHashes := m.partialHash[hashType][i][partialHash]
|
||||
totalPartialHashes += len(partialHashes)
|
||||
for _, match := range Atleast(max, searchHash.Hash, partialHashes) {
|
||||
_, alreadyMatched := foundHashes[match.Hash]
|
||||
if matchedResults, ok := m.hashes[hashType][match.Hash]; ok && !alreadyMatched {
|
||||
foundHashes[match.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, Result{IDs: ToIDList(*matchedResults), Distance: match.Distance, Hash: Hash{Hash: match.Hash, Kind: searchHash.Kind}})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("Total partial hashes tested:", totalPartialHashes)
|
||||
logTime("Search Complete")
|
||||
go m.printSizes()
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
func (m *MapStorage) MapHashes(hash ImageHash) {
|
||||
m.basicMapStorage.MapHashes(hash)
|
||||
for _, hash := range hash.Hashes {
|
||||
hashType := int(hash.Kind) - 1
|
||||
for i, partialHash := range SplitHash(hash.Hash) {
|
||||
m.partialHash[hashType][i][partialHash] = Insert(m.partialHash[hashType][i][partialHash], hash.Hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MapStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
m.hashes[hashType] = make(map[uint64]*[]ID, len(sourceHashes))
|
||||
for savedHash, idlistLocation := range sourceHashes {
|
||||
for i, partialHash := range SplitHash(savedHash) {
|
||||
m.partialHash[hashType][i][partialHash] = append(m.partialHash[hashType][i][partialHash], savedHash)
|
||||
}
|
||||
m.hashes[hashType][savedHash] = &hashes.IDs[idlistLocation]
|
||||
}
|
||||
}
|
||||
m.printSizes()
|
||||
for _, partialHashes := range m.partialHash {
|
||||
for _, partMap := range partialHashes {
|
||||
for part, hashes := range partMap {
|
||||
slices.Sort(hashes)
|
||||
partMap[part] = slices.Compact(hashes)
|
||||
}
|
||||
}
|
||||
}
|
||||
m.printSizes()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MapStorage) printSizes() {
|
||||
fmt.Println("Length of hashes:", len(m.hashes[0])+len(m.hashes[1])+len(m.hashes[2]))
|
||||
// fmt.Println("Size of", "hashes:", size.Of(m.hashes)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "ids:", size.Of(m.ids)/1024/1024, "MB")
|
||||
// fmt.Println("Size of", "MapStorage:", size.Of(m)/1024/1024, "MB")
|
||||
|
||||
}
|
||||
|
||||
func NewMapStorage() (HashStorage, error) {
|
||||
storage := &MapStorage{
|
||||
basicMapStorage: basicMapStorage{
|
||||
hashMutex: sync.RWMutex{},
|
||||
hashes: [3]map[uint64]*[]ID{
|
||||
make(map[uint64]*[]ID),
|
||||
make(map[uint64]*[]ID),
|
||||
make(map[uint64]*[]ID),
|
||||
},
|
||||
},
|
||||
partialHash: [3][8]map[uint8][]uint64{
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
{
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
make(map[uint8][]uint64),
|
||||
},
|
||||
},
|
||||
}
|
||||
return storage, nil
|
||||
}
|
465
sqlite.go
Normal file
465
sqlite.go
Normal file
@ -0,0 +1,465 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/bits"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
type sqliteStorage struct {
|
||||
db *sql.DB
|
||||
}
|
||||
type sqliteHash struct {
|
||||
hashid int
|
||||
Result
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, items ...interface{}) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
hashes := []sqliteHash{}
|
||||
rows, err := statement.Query(items...)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var (
|
||||
r = sqliteHash{Result: Result{IDs: make(IDList)}}
|
||||
h int64
|
||||
)
|
||||
err = rows.Scan(&r.hashid, &h, &r.Hash.Kind)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return hashes, err
|
||||
}
|
||||
r.Hash.Hash = uint64(h)
|
||||
hashes = append(hashes, r)
|
||||
}
|
||||
rows.Close()
|
||||
statement, err = s.db.PrepareContext(context.Background(), `SELECT IDS.domain, IDs.id FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid=?) ORDER BY IDs.domain, IDs.ID;`)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
for _, hash := range hashes {
|
||||
rows, err := statement.Query(hash.hashid)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
for rows.Next() {
|
||||
var source Source
|
||||
var id string
|
||||
err := rows.Scan(&source, &id)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
hash.IDs[source] = append(hash.IDs[source], id)
|
||||
}
|
||||
rows.Close()
|
||||
}
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
hashes := []sqliteHash{}
|
||||
statement, err := s.db.PrepareContext(context.Background(), `SELECT rowid,hash,kind FROM Hashes WHERE (kind=?) AND (((hash >> (0 * 8) & 0xFF)=(?2 >> (0 * 8) & 0xFF)) OR ((hash >> (1 * 8) & 0xFF)=(?2 >> (1 * 8) & 0xFF)) OR ((hash >> (2 * 8) & 0xFF)=(?2 >> (2 * 8) & 0xFF)) OR ((hash >> (3 * 8) & 0xFF)=(?2 >> (3 * 8) & 0xFF)) OR ((hash >> (4 * 8) & 0xFF)=(?2 >> (4 * 8) & 0xFF)) OR ((hash >> (5 * 8) & 0xFF)=(?2 >> (5 * 8) & 0xFF)) OR ((hash >> (6 * 8) & 0xFF)=(?2 >> (6 * 8) & 0xFF)) OR ((hash >> (7 * 8) & 0xFF)=(?2 >> (7 * 8) & 0xFF)));`)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
rows, err := statement.Query(kind, int64(search_hash))
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var (
|
||||
r = sqliteHash{Result: Result{IDs: make(IDList)}}
|
||||
h int64
|
||||
)
|
||||
err = rows.Scan(&r.hashid, &h, &r.Hash.Kind)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return hashes, err
|
||||
}
|
||||
r.Hash.Hash = uint64(h)
|
||||
r.Distance = bits.OnesCount64(uint64(search_hash) ^ r.Hash.Hash)
|
||||
if r.Distance <= max {
|
||||
hashes = append(hashes, r)
|
||||
}
|
||||
}
|
||||
rows.Close()
|
||||
logTime("Filter partial " + kind.String())
|
||||
|
||||
statement, err = s.db.PrepareContext(context.Background(), `SELECT DISTINCT IDS.domain, IDs.id, id_hash.hashid FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid in (`+strings.TrimRight(strings.Repeat("?,", len(hashes)), ",")+`)) ORDER BY IDs.domain, IDs.ID;`)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
|
||||
var ids []any
|
||||
for _, hash := range hashes {
|
||||
ids = append(ids, hash.hashid)
|
||||
}
|
||||
rows, err = statement.Query(ids...)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
for rows.Next() {
|
||||
var source Source
|
||||
var id string
|
||||
var hashid int
|
||||
err := rows.Scan(&source, &id, &hashid)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
for _, hash := range hashes {
|
||||
if hash.hashid == hashid {
|
||||
hash.IDs[source] = append(hash.IDs[source], id)
|
||||
}
|
||||
}
|
||||
}
|
||||
rows.Close()
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) dropIndexes() error {
|
||||
_, err := s.db.Exec(`
|
||||
|
||||
DROP INDEX IF EXISTS hash_index;
|
||||
DROP INDEX IF EXISTS hash_1_index;
|
||||
DROP INDEX IF EXISTS hash_2_index;
|
||||
DROP INDEX IF EXISTS hash_3_index;
|
||||
DROP INDEX IF EXISTS hash_4_index;
|
||||
DROP INDEX IF EXISTS hash_5_index;
|
||||
DROP INDEX IF EXISTS hash_6_index;
|
||||
DROP INDEX IF EXISTS hash_7_index;
|
||||
DROP INDEX IF EXISTS hash_8_index;
|
||||
|
||||
DROP INDEX IF EXISTS id_domain;
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) createIndexes() error {
|
||||
_, err := s.db.Exec(`
|
||||
|
||||
CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash);
|
||||
CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF));
|
||||
CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF));
|
||||
|
||||
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
|
||||
PRAGMA shrink_memory;
|
||||
ANALYZE;
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
total time.Duration
|
||||
t = time.Now()
|
||||
)
|
||||
|
||||
func resetTime() {
|
||||
total = 0
|
||||
t = time.Now()
|
||||
}
|
||||
|
||||
func logTime(log string) {
|
||||
n := time.Now()
|
||||
s := n.Sub(t)
|
||||
t = n
|
||||
total += s
|
||||
fmt.Printf("total: %v, %s: %v\n", total, log, s)
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var (
|
||||
foundMatches []Result
|
||||
)
|
||||
resetTime()
|
||||
|
||||
if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate
|
||||
|
||||
statement, err := s.db.Prepare(`SELECT rowid,hash,kind FROM Hashes WHERE ` + strings.TrimSuffix(strings.Repeat("(hash=? AND kind=?) OR", len(hashes)), "OR") + `ORDER BY kind,hash;`)
|
||||
if err != nil {
|
||||
logTime("Fail exact")
|
||||
return foundMatches, err
|
||||
}
|
||||
|
||||
args := make([]interface{}, 0, len(hashes)*2)
|
||||
for _, hash := range hashes {
|
||||
if hash.Hash != 0 {
|
||||
args = append(args, int64(hash.Hash), hash.Kind)
|
||||
}
|
||||
}
|
||||
hashes, err := s.findExactHashes(statement, args...)
|
||||
if err != nil {
|
||||
return foundMatches, err
|
||||
}
|
||||
for _, hash := range hashes {
|
||||
foundMatches = append(foundMatches, hash.Result)
|
||||
}
|
||||
|
||||
// If we have exact matches don't bother with other matches
|
||||
if len(foundMatches) > 0 && exactOnly {
|
||||
return foundMatches, nil
|
||||
}
|
||||
logTime("Search Exact")
|
||||
}
|
||||
|
||||
foundHashes := make(map[uint64]struct{})
|
||||
|
||||
for _, hash := range hashes {
|
||||
hashes, err := s.findPartialHashes(max, int64(hash.Hash), hash.Kind)
|
||||
if err != nil {
|
||||
return foundMatches, err
|
||||
}
|
||||
logTime("Search partial " + hash.Kind.String())
|
||||
|
||||
for _, hash := range hashes {
|
||||
if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched {
|
||||
foundHashes[hash.Hash.Hash] = struct{}{}
|
||||
foundMatches = append(foundMatches, hash.Result)
|
||||
} else {
|
||||
log.Println("Hash already found", hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return foundMatches, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) MapHashes(hash ImageHash) {
|
||||
tx, err := s.db.BeginTx(context.Background(), nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
insertHashes, err := tx.Prepare(`
|
||||
INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO UPDATE SET hash=?1 RETURNING hashid
|
||||
`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows, err := tx.Query(`
|
||||
INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO UPDATE SET domain=?1 RETURNING idid
|
||||
`, hash.ID.Domain, hash.ID.ID)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if !rows.Next() {
|
||||
panic("Unable to insert IDs")
|
||||
}
|
||||
var id_id int64
|
||||
err = rows.Scan(&id_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows.Close()
|
||||
hash_ids := []int64{}
|
||||
for _, hash := range hash.Hashes {
|
||||
rows, err := insertHashes.Query(int64(hash.Hash), hash.Kind)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if !rows.Next() {
|
||||
panic("Unable to insert IDs")
|
||||
}
|
||||
var id int64
|
||||
err = rows.Scan(&id)
|
||||
rows.Close()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
hash_ids = append(hash_ids, id)
|
||||
}
|
||||
var ids []any
|
||||
for _, hash_id := range hash_ids {
|
||||
ids = append(ids, hash_id, id_id)
|
||||
}
|
||||
_, err = tx.Exec(`INSERT INTO id_hash (hashid,idid) VALUES `+strings.TrimSuffix(strings.Repeat("(?, ?),", len(hash_ids)), ",")+` ON CONFLICT DO NOTHING;`, ids...)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Failed inserting: %v,%v: %w", hash.ID.Domain, hash.ID.ID, err))
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
insertHashes.Close()
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error {
|
||||
err := s.dropIndexes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
hashKind := goimagehash.Kind(hashType + 1)
|
||||
for hash, idsLocations := range sourceHashes {
|
||||
for _, id := range hashes.IDs[idsLocations] {
|
||||
s.MapHashes(ImageHash{
|
||||
Hashes: []Hash{{hash, hashKind}},
|
||||
ID: id,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
err = s.createIndexes()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) {
|
||||
hashes := SavedHashes{}
|
||||
conn, err := s.db.Conn(context.Background())
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
defer conn.Close()
|
||||
rows, err := conn.QueryContext(context.Background(), "SELECT IDs.domain,IDs.id,Hashes.hash,Hashes.kind FROM Hashes JOIN id_hash ON id_hash.hashid = hashes.rowid JOIN IDs ON IDs.rowid = id_hash.idid ORDER BY IDs.ID,Hashes.kind,Hashes.hash;")
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return hashes, err
|
||||
}
|
||||
var (
|
||||
id ID
|
||||
hash Hash
|
||||
)
|
||||
err = rows.Scan(&id.Domain, &id.ID, &hash.Hash, &hash.Kind)
|
||||
if err != nil {
|
||||
return hashes, err
|
||||
}
|
||||
hashes.InsertHash(hash, id)
|
||||
|
||||
return hashes, nil
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) {
|
||||
for _, ids := range newIDs {
|
||||
var oldIDID, newIDID int
|
||||
_, err := s.db.Exec(`INSERT INTO IDs domain,id VALUES (?,?)`, ids.NewID.Domain, ids.NewID.ID)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.NewID.Domain, ids.NewID.ID)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
panic(err)
|
||||
}
|
||||
if rows.Next() {
|
||||
rows.Scan(&newIDID)
|
||||
} else {
|
||||
panic("Unable to insert New ID into database")
|
||||
}
|
||||
rows.Close()
|
||||
rows, err = s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.OldID.Domain, ids.OldID.ID)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
panic(err)
|
||||
}
|
||||
if rows.Next() {
|
||||
rows.Scan(&oldIDID)
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
_, err = s.db.Exec(`INSERT INTO id_hash (hashid, id_id) SELECT hashid,? FROM id_hash where id_id=?`, newIDID, oldIDID)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sqliteStorage) GetIDs(id ID) IDList {
|
||||
var idid int
|
||||
rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, id.Domain, id.ID)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
panic(err)
|
||||
}
|
||||
if rows.Next() {
|
||||
rows.Scan(&idid)
|
||||
} else {
|
||||
return nil
|
||||
}
|
||||
rows, err = s.db.Query(`SELECT id_hash FROM id_hash WHERE id_id=?`, idid)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
panic(err)
|
||||
}
|
||||
var hashIDs []interface{}
|
||||
for rows.Next() {
|
||||
var hashID int
|
||||
rows.Scan(&hashID)
|
||||
hashIDs = append(hashIDs, hashID)
|
||||
}
|
||||
rows.Close()
|
||||
|
||||
IDs := make(IDList)
|
||||
rows, err = s.db.Query(`SELECT IDs.domain,IDs.id FROM id_hash JOIN IDs ON id_hash.idid==IDs.idid WHERE hash_id in (`+strings.TrimRight(strings.Repeat("?,", len(hashIDs)), ",")+`)`, hashIDs...)
|
||||
if err != nil && !errors.Is(err, sql.ErrNoRows) {
|
||||
panic(err)
|
||||
}
|
||||
for rows.Next() {
|
||||
var id ID
|
||||
rows.Scan(&id.Domain, id.ID)
|
||||
IDs[id.Domain] = append(IDs[id.Domain], id.ID)
|
||||
}
|
||||
return IDs
|
||||
}
|
||||
|
||||
func NewSqliteStorage(db, path string) (HashStorage, error) {
|
||||
sqlite := &sqliteStorage{}
|
||||
sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sqlite.db = sqlDB
|
||||
_, err = sqlite.db.Exec(`
|
||||
PRAGMA foreign_keys=ON;
|
||||
CREATE TABLE IF NOT EXISTS Hashes(
|
||||
hashid INTEGER PRIMARY KEY,
|
||||
hash INT NOT NULL,
|
||||
kind int NOT NULL,
|
||||
UNIQUE(kind, hash)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS IDs(
|
||||
id TEXT NOT NULL,
|
||||
domain TEXT NOT NULL,
|
||||
idid INTEGER PRIMARY KEY,
|
||||
UNIQUE (domain, id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS id_hash(
|
||||
hashid INTEGER,
|
||||
idid INTEGER,
|
||||
FOREIGN KEY(hashid) REFERENCES Hashes(hashid),
|
||||
FOREIGN KEY(idid) REFERENCES IDs(idid)
|
||||
UNIQUE (hashid, idid)
|
||||
);
|
||||
|
||||
`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
sqlite.createIndexes()
|
||||
sqlite.db.SetMaxOpenConns(1)
|
||||
return sqlite, nil
|
||||
}
|
7
sqlite_cgo.go
Normal file
7
sqlite_cgo.go
Normal file
@ -0,0 +1,7 @@
|
||||
//go:build cgo
|
||||
|
||||
package ch
|
||||
|
||||
import (
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
8
sqlite_no_cgo.go
Normal file
8
sqlite_no_cgo.go
Normal file
@ -0,0 +1,8 @@
|
||||
//go:build !cgo
|
||||
|
||||
package ch
|
||||
|
||||
import (
|
||||
_ "github.com/ncruces/go-sqlite3/driver"
|
||||
_ "github.com/ncruces/go-sqlite3/embed"
|
||||
)
|
105
vp-tree.go
Normal file
105
vp-tree.go
Normal file
@ -0,0 +1,105 @@
|
||||
package ch
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/bits"
|
||||
|
||||
"gitea.narnian.us/lordwelch/goimagehash"
|
||||
"gonum.org/v1/gonum/spatial/vptree"
|
||||
)
|
||||
|
||||
type VPTree struct {
|
||||
trees [3]*vptree.Tree
|
||||
hashes [3][]vptree.Comparable
|
||||
}
|
||||
type VPHash struct {
|
||||
Hash Hash
|
||||
IDs []ID
|
||||
}
|
||||
|
||||
func (h *VPHash) Distance(c vptree.Comparable) float64 {
|
||||
h2, ok := c.(*VPHash)
|
||||
if !ok {
|
||||
return -99
|
||||
}
|
||||
return float64(bits.OnesCount64(h.Hash.Hash ^ h2.Hash.Hash))
|
||||
}
|
||||
|
||||
func (v *VPTree) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) {
|
||||
var matches []Result
|
||||
var exactMatches []Result
|
||||
fmt.Println(hashes)
|
||||
for _, hash := range hashes {
|
||||
results := vptree.NewDistKeeper(float64(max))
|
||||
hashType := int(hash.Kind) - 1
|
||||
v.trees[hashType].NearestSet(results, &VPHash{Hash: hash})
|
||||
for _, result := range results.Heap {
|
||||
vphash := result.Comparable.(*VPHash)
|
||||
if result.Dist == 0 {
|
||||
exactMatches = append(exactMatches, Result{
|
||||
IDs: ToIDList(vphash.IDs),
|
||||
Distance: int(result.Dist),
|
||||
Hash: vphash.Hash,
|
||||
})
|
||||
} else {
|
||||
matches = append(matches, Result{
|
||||
IDs: ToIDList(vphash.IDs),
|
||||
Distance: int(result.Dist),
|
||||
Hash: vphash.Hash,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(exactMatches) > 0 && exactOnly {
|
||||
return exactMatches, nil
|
||||
}
|
||||
matches = append(exactMatches[:len(exactMatches):len(exactMatches)], matches...)
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
func (v *VPTree) MapHashes(ImageHash) {
|
||||
panic("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) DecodeHashes(hashes SavedHashes) error {
|
||||
var err error
|
||||
for hashType, sourceHashes := range hashes.Hashes {
|
||||
for hash, idsLocation := range sourceHashes {
|
||||
var (
|
||||
hashKind = goimagehash.Kind(hashType + 1)
|
||||
)
|
||||
hash := &VPHash{Hash{hash, hashKind}, hashes.IDs[idsLocation]}
|
||||
v.hashes[hashType] = append(v.hashes[hashType], hash)
|
||||
}
|
||||
}
|
||||
for hashType := range 3 {
|
||||
v.trees[hashType], err = vptree.New(v.hashes[hashType], 3, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (v *VPTree) EncodeHashes() (SavedHashes, error) {
|
||||
return SavedHashes{}, errors.New("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) AssociateIDs(newIDs []NewIDs) {
|
||||
panic("Not Implemented")
|
||||
}
|
||||
|
||||
func (v *VPTree) GetIDs(id ID) IDList {
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewVPStorage() (HashStorage, error) {
|
||||
|
||||
return &VPTree{
|
||||
hashes: [3][]vptree.Comparable{
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
make([]vptree.Comparable, 0, 1_000_000),
|
||||
},
|
||||
}, nil
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user