diff --git a/cmd/comic-hasher/main.go b/cmd/comic-hasher/main.go index a158e46..4e55ec4 100644 --- a/cmd/comic-hasher/main.go +++ b/cmd/comic-hasher/main.go @@ -2,30 +2,35 @@ package main import ( "bufio" + "bytes" "cmp" + "compress/gzip" "context" "encoding/json" + "errors" "flag" "fmt" "image" _ "image/gif" _ "image/jpeg" _ "image/png" + "io" "io/fs" "log" "net/http" _ "net/http/pprof" "net/url" "os" - "os/signal" "path/filepath" - "runtime" "runtime/pprof" "slices" "strconv" "strings" + "sync" "time" + "github.com/vmihailenco/msgpack/v5" + "github.com/disintegration/imaging" _ "golang.org/x/image/tiff" _ "golang.org/x/image/vp8" @@ -45,95 +50,103 @@ type Server struct { mux *http.ServeMux BaseURL *url.URL // token chan<- *oidc.Tokens - PartialAhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes - PartialDhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes - PartialPhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes - FullAhash map[uint64]ch.IDList // Maps ahash's to lists of ID's - FullDhash map[uint64]ch.IDList // Maps dhash's to lists of ID's - FullPhash map[uint64]ch.IDList // Maps phash's to lists of ID's - // IDToCover map[string]string // IDToCover is a map of domain:ID to an index to covers eg IDToCover['comicvine.gamespot.com:12345'] = 0 - // covers []ch.Cover + // Partial hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers + PartialAhash [8]map[uint8][]uint64 + PartialDhash [8]map[uint8][]uint64 + PartialPhash [8]map[uint8][]uint64 + FullAhash map[uint64][]string // Maps ahash's to lists of ID's domain:id + FullDhash map[uint64][]string // Maps dhash's to lists of ID's domain:id + FullPhash map[uint64][]string // Maps phash's to lists of ID's domain:id + ids map[ch.Source]map[string]struct{} + hashMutex sync.RWMutex + quit chan struct{} + signalQueue chan os.Signal readerQueue chan string hashingQueue chan ch.Im mappingQueue chan ch.Hash - // hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers } // var key = []byte(uuid.New().String())[:16] -var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") + +type savedHashes map[ch.Source]map[string][3]uint64 + +type Format int + +const ( + Msgpack = iota + 1 + JSON +) + +var formatNames = map[Format]string{ + JSON: "json", + Msgpack: "msgpack", +} + +var formatValues = map[string]Format{ + "json": JSON, + "msgpack": Msgpack, +} + +func (f Format) String() string { + if name, known := formatNames[f]; known { + return name + } + return "Unknown" +} + +type Encoder func(any) ([]byte, error) +type Decoder func([]byte, interface{}) error + +func (f *Format) Set(s string) error { + if format, known := formatValues[strings.ToLower(s)]; known { + *f = format + } else { + return fmt.Errorf("Unknown format: %d", f) + } + return nil +} + +type Opts struct { + cpuprofile string + coverPath string + loadEmbeddedHashes bool + saveEmbeddedHashes bool + format Format + hashesPath string +} func main() { + opts := Opts{format: Msgpack} // flag is weird go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() + flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file") - // mustDropPrivileges() - coverPath := flag.String("cover_path", "", "path to covers to add to hash database") + flag.StringVar(&opts.coverPath, "cover-path", "", "Path to covers to add to hash database. must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif") + flag.BoolVar(&opts.loadEmbeddedHashes, "use-embedded-hashes", true, "Use hashes embedded in the application as a starting point") + flag.BoolVar(&opts.saveEmbeddedHashes, "save-embedded-hashes", false, "Save hashes even if we loaded the embedded hashes") + flag.StringVar(&opts.hashesPath, "hashes", "hashes.gz", "Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option") + flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)") flag.Parse() - if *coverPath == "" { - log.Fatal("You must supply a path") + + if opts.coverPath != "" { + _, err := os.Stat(opts.coverPath) + if err != nil { + panic(err) + } } - st, err := os.Stat(*coverPath) - if err != nil { - panic(err) - } - fmt.Println(st) - startServer(*coverPath) + startServer(opts) } func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) { return strings.TrimSpace("lordwelch"), true } -// func (s *Server) setupOauthHandlers() error { -// redirectURI := *s.BaseURL -// redirectURI.Path = "/oauth/callback" -// successURI := *s.BaseURL -// successURI.Path = "/success" -// failURI := *s.BaseURL -// failURI.RawQuery = url.Values{"auth": []string{"fail"}}.Encode() - -// cookieHandler := httphelper.NewCookieHandler(key, key, httphelper.WithUnsecure()) - -// options := []rp.Option{ -// rp.WithCookieHandler(cookieHandler), -// rp.WithVerifierOpts(rp.WithIssuedAtOffset(5 * time.Second)), -// } - -// provider, err := rp.NewRelyingPartyOIDC(os.Getenv("COMICHASHER_PROVIDER_URL"), os.Getenv("COMICHASHER_CLIENT_ID"), os.Getenv("COMICHASHER_CLIENT_SECRET"), redirectURI.String(), strings.Split(os.Getenv("COMICHASHER_SCOPES"), ","), options...) -// if err != nil { -// return fmt.Errorf("error creating provider: %w", err) -// } - -// // generate some state (representing the state of the user in your application, -// // e.g. the page where he was before sending him to login -// state := func() string { -// return uuid.New().String() -// } - -// // register the AuthURLHandler at your preferred path -// // the AuthURLHandler creates the auth request and redirects the user to the auth server -// // including state handling with secure cookie and the possibility to use PKCE -// s.mux.Handle("/login", rp.AuthURLHandler(state, provider)) - -// // for demonstration purposes the returned userinfo response is written as JSON object onto response -// marshalUserinfo := func(w http.ResponseWriter, r *http.Request, tokens *oidc.Tokens, state string, rp rp.RelyingParty) { -// s.token <- tokens -// w.Header().Add("location", successURI.String()) -// w.WriteHeader(301) -// } - -// // register the CodeExchangeHandler at the callbackPath -// // the CodeExchangeHandler handles the auth response, creates the token request and calls the callback function -// // with the returned tokens from the token endpoint -// s.mux.Handle(redirectURI.Path, rp.CodeExchangeHandler(marshalUserinfo, provider)) -// return nil -// } - func (s *Server) setupAppHandlers() { - // s.mux.HandleFunc("/add_cover", s.addCover) // s.mux.HandleFunc("/get_cover", s.getCover) + s.mux.HandleFunc("/add_cover", s.addCover) s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash) + s.mux.HandleFunc("/associate_ids", s.associateIDs) } func (s *Server) getCover(w http.ResponseWriter, r *http.Request) { @@ -168,44 +181,132 @@ func (s *Server) getCover(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, "Not implemented") } -func (s *Server) getMatches(ahash, dhash, phash uint64) []ch.Result { +func (s *Server) associateIDs(w http.ResponseWriter, r *http.Request) { + user, authed := s.authenticated(w, r) + if !authed || user == "" { + http.Error(w, "Invalid Auth", http.StatusForbidden) + return + } + var ( + values = r.URL.Query() + domain = strings.TrimSpace(values.Get("domain")) + ID = strings.TrimSpace(values.Get("id")) + newDomain = strings.TrimSpace(values.Get("newDomain")) + newID = strings.TrimSpace(values.Get("newID")) + ) + if ID == "" { + msg := "No ID Provided" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + return + } + if domain == "" { + msg := "No domain Provided" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + return + } + if newID == "" { + msg := "No newID Provided" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + return + } + if newDomain == "" { + msg := "No newDomain Provided" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + return + } + if newDomain == domain { + msg := "newDomain cannot be the same as the existing domain" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + return + } + if _, domainExists := s.ids[ch.Source(domain)]; !domainExists { + msg := "No IDs belonging to " + domain + "exist on this server" + log.Println(msg) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) + } + log.Printf("Attempting to associate %s:%s to %s:%s", domain, ID, newDomain, newID) + found := false + for _, hash := range []map[uint64][]string{s.FullAhash, s.FullDhash, s.FullPhash} { + for i, idlist := range hash { + if _, found_in_hash := slices.BinarySearch(idlist, domain+":"+ID); found_in_hash { + found = true + hash[i] = ch.Insert(idlist, newDomain+":"+newID) + if _, ok := s.ids[ch.Source(newDomain)]; !ok { + s.ids[ch.Source(newDomain)] = make(map[string]struct{}) + } + s.ids[ch.Source(newDomain)][newID] = struct{}{} + } + } + } + if found { + writeJson(w, http.StatusOK, result{Msg: "New ID added"}) + } else { + writeJson(w, http.StatusOK, result{Msg: "Old ID not found"}) + } +} + +func (s *Server) getMatches(ahash, dhash, phash uint64, max int, skipNonExact bool) []ch.Result { var foundMatches []ch.Result + s.hashMutex.RLock() + defer s.hashMutex.RUnlock() - if matchedResults, ok := s.FullAhash[ahash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}}) - } - if matchedResults, ok := s.FullDhash[dhash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.DHash}}) - } - if matchedResults, ok := s.FullPhash[phash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.PHash}}) + if skipNonExact { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate + if matchedResults, ok := s.FullAhash[ahash]; ok && ahash != 0 { + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}}) + } + if matchedResults, ok := s.FullDhash[dhash]; ok && dhash != 0 { + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: dhash, Kind: goimagehash.DHash}}) + } + if matchedResults, ok := s.FullPhash[phash]; ok && phash != 0 { + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: phash, Kind: goimagehash.PHash}}) + } + + // If we have exact matches don't bother with other matches + if len(foundMatches) > 0 && skipNonExact { + return foundMatches + } } - // If we have exact matches don't bother with other matches - if len(foundMatches) > 0 { - return foundMatches - } - - for i, partialHash := range ch.SplitHash(ahash) { - for _, match := range ch.Atleast(8, ahash, s.PartialAhash[i][partialHash]) { - if matchedResults, ok := s.FullAhash[match.Hash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}}) + foundHashes := make(map[uint64]struct{}) + if ahash != 0 { + for i, partialHash := range ch.SplitHash(ahash) { + for _, match := range ch.Atleast(max, ahash, s.PartialAhash[i][partialHash]) { + _, alreadyMatched := foundHashes[match.Hash] + if matchedResults, ok := s.FullAhash[match.Hash]; ok && !alreadyMatched { + foundHashes[match.Hash] = struct{}{} + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}}) + } } } } - for i, partialHash := range ch.SplitHash(dhash) { - for _, match := range ch.Atleast(8, dhash, s.PartialDhash[i][partialHash]) { - if matchedResults, ok := s.FullDhash[match.Hash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}}) + foundHashes = make(map[uint64]struct{}) + if dhash != 0 { + for i, partialHash := range ch.SplitHash(dhash) { + for _, match := range ch.Atleast(max, dhash, s.PartialDhash[i][partialHash]) { + _, alreadyMatched := foundHashes[match.Hash] + if matchedResults, ok := s.FullDhash[match.Hash]; ok && !alreadyMatched { + foundHashes[match.Hash] = struct{}{} + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}}) + } } } } - for i, partialHash := range ch.SplitHash(phash) { - for _, match := range ch.Atleast(8, phash, s.PartialPhash[i][partialHash]) { - if matchedResults, ok := s.FullPhash[match.Hash]; ok { - foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}}) + foundHashes = make(map[uint64]struct{}) + if phash != 0 { + for i, partialHash := range ch.SplitHash(phash) { + for _, match := range ch.Atleast(max, phash, s.PartialPhash[i][partialHash]) { + _, alreadyMatched := foundHashes[match.Hash] + if matchedResults, ok := s.FullPhash[match.Hash]; ok && !alreadyMatched { + foundHashes[match.Hash] = struct{}{} + foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}}) + } } } } @@ -219,36 +320,88 @@ type SimpleResult struct { } func getSimpleResults(fullResults []ch.Result) []SimpleResult { - simpleMap := make(map[string]int, len(fullResults)) + simpleResult := make([]SimpleResult, 0, len(fullResults)) + slices.SortFunc(fullResults, func(a, b ch.Result) int { return cmp.Compare(a.Distance, b.Distance) }) + // Deduplicate IDs + idToDistance := make(map[string]int) for _, fullResult := range fullResults { - for _, id := range fullResult.IDs[ch.ComicVine] { - simpleDistance, ok := simpleMap[id] - if !ok { - simpleDistance = 99 - } - if simpleDistance > fullResult.Distance { - simpleMap[id] = fullResult.Distance + for _, id := range fullResult.IDs { + if distance, ok := idToDistance[id]; !ok || fullResult.Distance < distance { + idToDistance[id] = fullResult.Distance } } } - simpleList := make([]SimpleResult, 0, len(simpleMap)) - distanceMap := make(map[int][]string) - for id, distance := range simpleMap { - distanceMap[distance] = ch.Insert(distanceMap[distance], id) + // Group by distance + distanceMap := make(map[int]SimpleResult) + for id, distance := range idToDistance { + var ( + sr SimpleResult + ok bool + ) + if sr, ok = distanceMap[distance]; !ok { + sr.IDList = make(ch.IDList) + } + sourceID := strings.SplitN(id, ":", 2) + sr.Distance = distance + sr.IDList[ch.Source(sourceID[0])] = append(sr.IDList[ch.Source(sourceID[0])], sourceID[1]) + distanceMap[distance] = sr } - for distance, idlist := range distanceMap { - simpleList = append(simpleList, SimpleResult{ - Distance: distance, - IDList: ch.IDList{ch.ComicVine: idlist}, - }) + + // turn into array + for _, sr := range distanceMap { + simpleResult = append(simpleResult, sr) } - fmt.Println(simpleList) - return simpleList + return simpleResult +} + +type APIResult struct { + IDList ch.IDList + Distance int + Hash ch.ImageHash +} + +func getResults(fullResults []ch.Result) []APIResult { + apiResults := make([]APIResult, 0, len(fullResults)) + for _, res := range fullResults { + idlist := make(ch.IDList) + for _, id := range res.IDs { + sourceID := strings.SplitN(id, ":", 2) + idlist[ch.Source(sourceID[0])] = append(idlist[ch.Source(sourceID[0])], sourceID[1]) + } + apiResults = append(apiResults, + APIResult{ + Distance: res.Distance, + Hash: res.Hash, + IDList: idlist, + }, + ) + } + return apiResults +} + +type result struct { + Results any `json:"results,omitempty"` + Msg string `json:"msg,omitempty"` +} + +func writeJson(w http.ResponseWriter, status int, res result) { + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.Header().Set("X-Content-Type-Options", "nosniff") + var ( + bytes []byte + err error + ) + if bytes, err = json.Marshal(res); err != nil { + bytes, _ = json.Marshal(result{Msg: fmt.Sprintf("Failed to create json: %s", err)}) + } + w.WriteHeader(status) + _, _ = w.Write(bytes) + _, _ = w.Write([]byte("\n")) } func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) { @@ -258,49 +411,61 @@ func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) { return } var ( - values = r.URL.Query() - ahashStr = strings.TrimSpace(values.Get("ahash")) - dhashStr = strings.TrimSpace(values.Get("dhash")) - phashStr = strings.TrimSpace(values.Get("phash")) - simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true" - ahash uint64 - dhash uint64 - phash uint64 - err error + values = r.URL.Query() + ahashStr = strings.TrimSpace(values.Get("ahash")) + dhashStr = strings.TrimSpace(values.Get("dhash")) + phashStr = strings.TrimSpace(values.Get("phash")) + maxStr = strings.TrimSpace(values.Get("max")) + skipNonExact = strings.ToLower(strings.TrimSpace(values.Get("skipNonExact"))) != "false" + simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true" + ahash uint64 + dhash uint64 + phash uint64 + max int = 8 + max_tmp int + err error ) + if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" { log.Printf("could not parse ahash: %s", ahashStr) - http.Error(w, "parse fail", http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"}) return } if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" { log.Printf("could not parse dhash: %s", dhashStr) - http.Error(w, "parse fail", http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"}) return } if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" { log.Printf("could not parse phash: %s", phashStr) - http.Error(w, "parse fail", http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: "hash parse failed"}) return } - matches := s.getMatches(ahash, dhash, phash) + if max_tmp, err = strconv.Atoi(maxStr); err != nil && maxStr != "" { + log.Printf("Invalid Max: %s", maxStr) + writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Invalid Max: %s", maxStr)}) + return + } + if maxStr != "" { + max = max_tmp + } + + if max > 8 { + log.Printf("Max must be less than 9: %d", max) + writeJson(w, http.StatusBadRequest, result{Msg: fmt.Sprintf("Max must be less than 9: %d", max)}) + return + } + matches := s.getMatches(ahash, dhash, phash, max, skipNonExact) if len(matches) > 0 { - var covers []byte if simple { - covers, err = json.Marshal(getSimpleResults(matches)) - } else { - covers, err = json.Marshal(matches) + writeJson(w, http.StatusOK, result{Results: getSimpleResults(matches)}) + return } - - log.Println(err) - w.Header().Add("Content-Type", "application/json") - w.Write(covers) - w.Write([]byte{'\n'}) + writeJson(w, http.StatusOK, result{Results: getResults(matches)}) return } - w.Header().Add("Content-Type", "application/json") - fmt.Fprintln(w, "{\"msg\":\"No hashes found\"}") + writeJson(w, http.StatusNotFound, result{Msg: "No hashes found"}) } func (s *Server) addCover(w http.ResponseWriter, r *http.Request) { @@ -316,50 +481,68 @@ func (s *Server) addCover(w http.ResponseWriter, r *http.Request) { ) if ID == "" { log.Println("No ID Provided") - http.Error(w, "No ID Provided", http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: "No ID Provided"}) return } if domain == "" { log.Println("No domain Provided") - http.Error(w, "No domain Provided", http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: "No Domain Provided"}) return } i, format, err := image.Decode(r.Body) if err != nil { msg := fmt.Sprintf("Failed to decode Image: %s", err) log.Println(msg) - http.Error(w, msg, http.StatusBadRequest) + writeJson(w, http.StatusBadRequest, result{Msg: msg}) return } log.Printf("Decoded %s image from %s", format, user) + select { + case <-s.quit: + log.Println("Recieved quit") + return + default: + } s.hashingQueue <- ch.Im{Im: i, Format: format, Domain: ch.Source(domain), ID: ID, Path: ""} - fmt.Fprintln(w, "Success") + writeJson(w, http.StatusOK, result{Msg: "Success"}) } -func (s *Server) mapHashes(hash ch.Hash) { - if _, ok := s.FullAhash[hash.Ahash.GetHash()]; !ok { - s.FullAhash[hash.Ahash.GetHash()] = make(ch.IDList) - } - s.FullAhash[hash.Ahash.GetHash()][hash.Domain] = ch.Insert(s.FullAhash[hash.Ahash.GetHash()][hash.Domain], hash.ID) +func (s *Server) MapHashes(hash ch.Hash) { + s.hashMutex.Lock() + defer s.hashMutex.Unlock() + s.mapHashes(hash.Ahash.GetHash(), hash.Dhash.GetHash(), hash.Phash.GetHash(), hash.Domain, hash.ID) +} - if _, ok := s.FullDhash[hash.Dhash.GetHash()]; !ok { - s.FullDhash[hash.Dhash.GetHash()] = make(ch.IDList) - } - s.FullDhash[hash.Dhash.GetHash()][hash.Domain] = ch.Insert(s.FullDhash[hash.Dhash.GetHash()][hash.Domain], hash.ID) +func (s *Server) mapHashes(ahash, dhash, phash uint64, domain ch.Source, id string) { - if _, ok := s.FullPhash[hash.Phash.GetHash()]; !ok { - s.FullPhash[hash.Phash.GetHash()] = make(ch.IDList) + if _, ok := s.ids[domain]; !ok { + s.ids[domain] = make(map[string]struct{}) } - s.FullPhash[hash.Phash.GetHash()][hash.Domain] = ch.Insert(s.FullPhash[hash.Phash.GetHash()][hash.Domain], hash.ID) + s.ids[domain][id] = struct{}{} - for i, partialHash := range ch.SplitHash(hash.Ahash.GetHash()) { - s.PartialAhash[i][partialHash] = ch.Insert(s.PartialAhash[i][partialHash], hash.Ahash.GetHash()) + if _, ok := s.FullAhash[ahash]; !ok { + s.FullAhash[ahash] = make([]string, 0, 3) } - for i, partialHash := range ch.SplitHash(hash.Dhash.GetHash()) { - s.PartialDhash[i][partialHash] = ch.Insert(s.PartialDhash[i][partialHash], hash.Dhash.GetHash()) + s.FullAhash[ahash] = ch.Insert(s.FullAhash[ahash], string(domain)+":"+id) + + if _, ok := s.FullDhash[dhash]; !ok { + s.FullDhash[dhash] = make([]string, 0, 3) } - for i, partialHash := range ch.SplitHash(hash.Phash.GetHash()) { - s.PartialPhash[i][partialHash] = ch.Insert(s.PartialPhash[i][partialHash], hash.Phash.GetHash()) + s.FullDhash[dhash] = ch.Insert(s.FullDhash[dhash], string(domain)+":"+id) + + if _, ok := s.FullPhash[phash]; !ok { + s.FullPhash[phash] = make([]string, 0, 3) + } + s.FullPhash[phash] = ch.Insert(s.FullPhash[phash], string(domain)+":"+id) + + for i, partialHash := range ch.SplitHash(ahash) { + s.PartialAhash[i][partialHash] = append(s.PartialAhash[i][partialHash], ahash) + } + for i, partialHash := range ch.SplitHash(dhash) { + s.PartialDhash[i][partialHash] = append(s.PartialDhash[i][partialHash], dhash) + } + for i, partialHash := range ch.SplitHash(phash) { + s.PartialPhash[i][partialHash] = append(s.PartialPhash[i][partialHash], phash) } } @@ -373,29 +556,21 @@ func (s *Server) initHashes() { for i := range s.PartialPhash { s.PartialPhash[i] = make(map[uint8][]uint64) } - s.FullAhash = make(map[uint64]ch.IDList) - s.FullDhash = make(map[uint64]ch.IDList) - s.FullPhash = make(map[uint64]ch.IDList) - // s.IDToCover = make(map[string]string) + s.FullAhash = make(map[uint64][]string) + s.FullDhash = make(map[uint64][]string) + s.FullPhash = make(map[uint64][]string) + s.ids = make(map[ch.Source]map[string]struct{}) } -func (s *Server) mapper() { - var total uint64 = 0 +func (s *Server) mapper(done func()) { + defer done() for hash := range s.mappingQueue { - if total%1000 == 0 { - mem := ch.MemStats() - if mem > 10*1024*1024*1024 { - fmt.Println("Forcing gc", mem, "G") - runtime.GC() - } - } - total++ - - s.mapHashes(hash) + s.MapHashes(hash) } } -func (s *Server) hasher(workerID int) { +func (s *Server) hasher(workerID int, done func()) { + defer done() for image := range s.hashingQueue { start := time.Now() @@ -404,17 +579,21 @@ func (s *Server) hasher(workerID int) { continue } - s.mappingQueue <- hash + select { + case <-s.quit: + log.Println("Recieved quit") + return + case s.mappingQueue <- hash: + default: + } elapsed := time.Since(start) - // fmt.Printf("%#064b\n", ahash.GetHash()) - // fmt.Printf("%#064b\n", dhash.GetHash()) - // fmt.Printf("%#064b\n", phash.GetHash()) log.Printf("Hashing took %v: worker: %v. path: %s ahash: %064b id: %s\n", elapsed, workerID, image.Path, hash.Ahash.GetHash(), hash.ID) } } -func (s *Server) reader(workerID int) { +func (s *Server) reader(workerID int, done func()) { + defer done() for path := range s.readerQueue { file, err := os.Open(path) if err != nil { @@ -426,28 +605,174 @@ func (s *Server) reader(workerID int) { } file.Close() - im := ch.Im{Im: i, Format: format, Domain: ch.ComicVine, ID: filepath.Base(filepath.Dir(path)), Path: path} - s.hashingQueue <- im + im := ch.Im{Im: i, Format: format, Domain: ch.Source(filepath.Base(filepath.Dir(filepath.Dir(path)))), ID: filepath.Base(filepath.Dir(path)), Path: path} + select { + case <-s.quit: + log.Println("Recieved quit") + return + case s.hashingQueue <- im: + default: + } } } -func (s *Server) FindHashes() { +func (s *Server) encodeHashes(e Encoder) ([]byte, error) { + hashes := make(savedHashes) + for source, ids := range s.ids { + hashes[source] = make(map[string][3]uint64, len(ids)) + } + for hash, idlist := range s.FullAhash { + for _, id := range idlist { + sourceID := strings.SplitN(id, ":", 2) + h := hashes[ch.Source(sourceID[0])][sourceID[1]] + h[0] = hash + hashes[ch.Source(sourceID[0])][sourceID[1]] = h + } + } + for hash, idlist := range s.FullDhash { + for _, id := range idlist { + sourceID := strings.SplitN(id, ":", 2) + h := hashes[ch.Source(sourceID[0])][sourceID[1]] + h[1] = hash + hashes[ch.Source(sourceID[0])][sourceID[1]] = h + } + + } + for hash, idlist := range s.FullPhash { + for _, id := range idlist { + sourceID := strings.SplitN(id, ":", 2) + h := hashes[ch.Source(sourceID[0])][sourceID[1]] + h[2] = hash + hashes[ch.Source(sourceID[0])][sourceID[1]] = h + } + + } + return e(hashes) } -func startServer(coverPath string) { - if *cpuprofile != "" { - f, err := os.Create(*cpuprofile) +// EncodeHashes must have a lock to s.hashMutex +func (s *Server) EncodeHashes(format Format) ([]byte, error) { + switch format { + case Msgpack: + return s.encodeHashes(msgpack.Marshal) + case JSON: + return s.encodeHashes(json.Marshal) + + default: + return nil, fmt.Errorf("Unknown format: %v", format) + } +} + +func (s *Server) decodeHashes(d Decoder, hashes []byte) error { + loadedHashes := make(savedHashes) + err := d(hashes, &loadedHashes) + if err != nil { + return err + } + + for domain, ids := range loadedHashes { + for id := range ids { + if _, ok := s.ids[domain]; ok { + s.ids[domain][id] = struct{}{} + } else { + s.ids[domain] = make(map[string]struct{}) + } + } + } + for _, sourceHashes := range loadedHashes { + s.FullAhash = make(map[uint64][]string, len(sourceHashes)) + s.FullDhash = make(map[uint64][]string, len(sourceHashes)) + s.FullPhash = make(map[uint64][]string, len(sourceHashes)) + break + } + for domain, sourceHashes := range loadedHashes { + for id, h := range sourceHashes { + s.mapHashes(h[0], h[1], h[2], domain, id) + } + } + return nil +} + +// DecodeHashes must have a lock to s.hashMutex +func (s *Server) DecodeHashes(format Format, hashes []byte) error { + switch format { + case Msgpack: + return s.decodeHashes(msgpack.Unmarshal, hashes) + case JSON: + return s.decodeHashes(json.Unmarshal, hashes) + + default: + return fmt.Errorf("Unknown format: %v", format) + } +} + +func (s *Server) HashLocalImages(opts Opts) { + go func() { + alreadyQuit := false + if opts.coverPath == "" { + select { + case sig := <-s.signalQueue: + log.Printf("Signal: %v\n", sig) + close(s.quit) + case <-s.quit: + log.Println("Recieved quit") + } + err := s.httpServer.Shutdown(context.TODO()) + fmt.Println("Err:", err) + return + } + fmt.Println("Hashing covers at ", opts.coverPath) + start := time.Now() + err := filepath.WalkDir(opts.coverPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + select { + case signal := <-s.signalQueue: + err = s.httpServer.Shutdown(context.TODO()) + alreadyQuit = true + close(s.quit) + return fmt.Errorf("signal: %v, %w", signal, err) + case <-s.quit: + log.Println("Recieved quit") + err = s.httpServer.Shutdown(context.TODO()) + return fmt.Errorf("Recieved quit: %w", err) + default: + } + if d.IsDir() { + return nil + } + + s.readerQueue <- path + return nil + }) + elapsed := time.Since(start) + fmt.Println("Err:", err, "local hashing took", elapsed) + + sig := <-s.signalQueue + if !alreadyQuit { + close(s.quit) + } + err = s.httpServer.Shutdown(context.TODO()) + log.Printf("Signal: %v, error: %v", sig, err) + }() +} + +func startServer(opts Opts) { + if opts.cpuprofile != "" { + f, err := os.Create(opts.cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } - sig := make(chan os.Signal, 1) - signal.Notify(sig, os.Interrupt) + mux := http.NewServeMux() server := Server{ // token: make(chan *oidc.Tokens), + quit: make(chan struct{}), + signalQueue: make(chan os.Signal, 1), readerQueue: make(chan string, 1120130), // Number gotten from checking queue size hashingQueue: make(chan ch.Im), mappingQueue: make(chan ch.Hash), @@ -460,6 +785,7 @@ func startServer(coverPath string) { MaxHeaderBytes: 1 << 20, }, } + Notify(server.signalQueue) imaging.SetMaxProcs(1) fmt.Println("init hashes") server.initHashes() @@ -467,71 +793,121 @@ func startServer(coverPath string) { fmt.Println("init handlers") server.setupAppHandlers() fmt.Println("init hashers") - go server.reader(1) - go server.reader(2) - go server.reader(3) - go server.reader(4) - go server.reader(5) - go server.reader(6) - go server.reader(7) - go server.reader(8) - go server.reader(9) - go server.reader(10) + rwg := sync.WaitGroup{} + for i := range 10 { + rwg.Add(1) + go server.reader(i, func() { fmt.Println("Reader completed"); rwg.Done() }) + } - go server.hasher(1) - go server.hasher(2) - go server.hasher(3) - go server.hasher(4) - go server.hasher(5) - go server.hasher(6) - go server.hasher(7) - go server.hasher(8) - go server.hasher(9) - go server.hasher(10) + hwg := sync.WaitGroup{} + for i := range 10 { + hwg.Add(1) + go server.hasher(i, func() { fmt.Println("Hasher completed"); hwg.Done() }) + } fmt.Println("init mapper") - go server.mapper() + mwg := sync.WaitGroup{} + mwg.Add(1) + go server.mapper(func() { fmt.Println("Mapper completed"); mwg.Done() }) - fmt.Println("Starting local hashing go routine") - go func() { - fmt.Println("Hashing covers at ", coverPath) - start := time.Now() - err := filepath.WalkDir(coverPath, func(path string, d fs.DirEntry, err error) error { - select { - case signal := <-sig: - server.httpServer.Shutdown(context.TODO()) - return fmt.Errorf("signal: %v", signal) - default: + if opts.loadEmbeddedHashes && len(ch.Hashes) != 0 { + var err error + hashes := ch.Hashes + if gr, err := gzip.NewReader(bytes.NewReader(ch.Hashes)); err == nil { + hashes, err = io.ReadAll(gr) + if err != nil { + panic(fmt.Sprintf("Failed to read embedded hashes: %s", err)) } - if d.IsDir() || !strings.Contains(path, "thumb") { - return nil - } - fmt.Println(len(server.readerQueue)) - server.readerQueue <- path - return nil - }) - elapsed := time.Since(start) - fmt.Println("Err:", err, "local hashing took", elapsed) + } - s := <-sig - err = server.httpServer.Shutdown(context.TODO()) - log.Printf("Signal: %v, error: %s", s, err) - }() + var format Format + for _, format = range []Format{Msgpack, JSON} { + if err = server.DecodeHashes(format, hashes); err == nil { + break + } + } + if err != nil { + panic(fmt.Sprintf("Failed to decode embedded hashes: %s", err)) + } + fmt.Printf("Loaded embedded %s hashes ahashes: %d dhashes: %d phashes: %d\n", format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash)) + } else { + if f, err := os.Open(opts.hashesPath); err == nil { + var buf io.Reader = f + if gr, err := gzip.NewReader(buf); err == nil { + buf = bufio.NewReader(gr) + } else { + _, _ = f.Seek(0, io.SeekStart) + } + hashes, err := io.ReadAll(buf) + f.Close() + if err != nil { + panic(fmt.Sprintf("Failed to load hashes from disk: %s", err)) + } + + var format Format + for _, format = range []Format{Msgpack, JSON} { + if err = server.DecodeHashes(format, hashes); err == nil { + break + } + } + + if err != nil { + panic(fmt.Sprintf("Failed to decode hashes from disk: %s", err)) + } + fmt.Printf("Loaded hashes from %q %s hashes ahashes: %d dhashes: %d phashes: %d\n", opts.hashesPath, format, len(server.FullAhash), len(server.FullDhash), len(server.FullPhash)) + } else { + if errors.Is(err, os.ErrNotExist) { + fmt.Println("No saved hashes to load") + } else { + fmt.Println("Unable to load saved hashes", err) + } + } + } + + server.HashLocalImages(opts) fmt.Println("Listening on ", server.httpServer.Addr) err := server.httpServer.ListenAndServe() if err != nil { fmt.Println(err) } - f, er := os.Create("memprofile") - if er != nil { - fmt.Println("Error in creating file for writing memory profile to: ", er) - return + close(server.readerQueue) + fmt.Println("waiting on readers") + rwg.Wait() + for range server.readerQueue { } - defer f.Close() - runtime.GC() - if e := pprof.WriteHeapProfile(f); e != nil { - fmt.Println("Error in writing memory profile: ", e) - return + close(server.hashingQueue) + fmt.Println("waiting on hashers") + hwg.Wait() + for range server.hashingQueue { + } + close(server.mappingQueue) + fmt.Println("waiting on mapper") + mwg.Wait() + for range server.mappingQueue { + } + close(server.signalQueue) + for range server.signalQueue { + } + + if !opts.loadEmbeddedHashes || opts.saveEmbeddedHashes { + encodedHashes, err := server.EncodeHashes(opts.format) + if err == nil { + if f, err := os.Create(opts.hashesPath); err == nil { + gzw := gzip.NewWriter(f) + _, err := gzw.Write(encodedHashes) + if err != nil { + fmt.Println("Failed to write hashes", err) + } else { + fmt.Println("Successfully saved hashes") + } + gzw.Close() + f.Close() + } else { + fmt.Println("Unabled to save hashes", err) + } + } else { + fmt.Printf("Unable to encode hashes as %v: %v", opts.format, err) + } } } diff --git a/cmd/comic-hasher/main_not_unix.go b/cmd/comic-hasher/main_not_unix.go new file mode 100644 index 0000000..d9bd43b --- /dev/null +++ b/cmd/comic-hasher/main_not_unix.go @@ -0,0 +1,12 @@ +//go:build !unix + +package main + +import ( + "os" + "os/signal" +) + +func Notify(sig chan os.Signal) { + signal.Notify(sig, os.Interrupt, os.Kill) +} diff --git a/cmd/comic-hasher/main_unix.go b/cmd/comic-hasher/main_unix.go new file mode 100644 index 0000000..dc041f1 --- /dev/null +++ b/cmd/comic-hasher/main_unix.go @@ -0,0 +1,13 @@ +//go:build unix + +package main + +import ( + "os" + "os/signal" + "syscall" +) + +func Notify(sig chan os.Signal) { + signal.Notify(sig, os.Interrupt, syscall.SIGABRT, syscall.SIGQUIT, syscall.SIGTERM) +} diff --git a/cmd/quick_tag.py b/cmd/quick_tag.py index 54f5acb..7b01539 100644 --- a/cmd/quick_tag.py +++ b/cmd/quick_tag.py @@ -2,10 +2,14 @@ from __future__ import annotations import argparse import itertools +import json import logging import pathlib from datetime import datetime +from enum import auto from io import BytesIO +from typing import Any +from typing import cast from typing import TypedDict from urllib.parse import urljoin @@ -19,7 +23,15 @@ from comicapi import merge from comicapi import utils from comicapi.genericmetadata import GenericMetadata from comicapi.issuestring import IssueString +from comictalker.comiccacher import ComicCacher +from comictalker.comiccacher import Issue +from comictalker.comiccacher import Series +from comictalker.comictalker import ComicSeries from comictalker.talker_utils import cleanup_html +from comictalker.talkers.comicvine import ComicVineTalker +from comictalker.talkers.comicvine import CVIssue +from comictalker.talkers.comicvine import CVResult +from comictalker.talkers.comicvine import CVSeries from PIL import Image logger = logging.getLogger('quick_tag') @@ -27,12 +39,169 @@ logger = logging.getLogger('quick_tag') __version__ = '0.1' +class CV(ComicVineTalker): + def fetch_comics(self, *, issue_ids: list[str]) -> list[GenericMetadata]: + # before we search online, look in our cache, since we might already have this info + cvc = ComicCacher(self.cache_folder, self.version) + cached_results: list[GenericMetadata] = [] + needed_issues: list[int] = [] + for issue_id in issue_ids: + cached_issue = cvc.get_issue_info(issue_id, self.id) + + if cached_issue and cached_issue[1]: + cached_results.append( + self._map_comic_issue_to_metadata( + json.loads(cached_issue[0].data), self._fetch_series([int(cached_issue[0].series_id)])[0][0], + ), + ) + else: + needed_issues.append(int(issue_id)) # CV uses integers for it's IDs + + if not needed_issues: + return cached_results + issue_filter = "" + for iid in needed_issues: + issue_filter += str(iid) + "|" + flt = "id:" + issue_filter.rstrip('|') + + issue_url = urljoin(self.api_url, "issues/") + params: dict[str, Any] = { + "api_key": self.api_key, + "format": "json", + "filter": flt, + } + cv_response: CVResult[list[CVIssue]] = self._get_cv_content(issue_url, params) + + issue_results = cv_response["results"] + page = 1 + offset = 0 + current_result_count = cv_response["number_of_page_results"] + total_result_count = cv_response["number_of_total_results"] + + # see if we need to keep asking for more pages... + while current_result_count < total_result_count: + page += 1 + offset += cv_response["number_of_page_results"] + + params["offset"] = offset + cv_response = self._get_cv_content(issue_url, params) + + issue_results.extend(cv_response["results"]) + current_result_count += cv_response["number_of_page_results"] + + series_info = {s[0].id: s[0] for s in self._fetch_series([int(i["volume"]["id"]) for i in issue_results])} + + for issue in issue_results: + cvc.add_issues_info( + self.id, + [ + Issue( + id=str(issue["id"]), + series_id=str(issue["volume"]["id"]), + data=json.dumps(issue).encode("utf-8"), + ), + ], + True, + ) + cached_results.append( + self._map_comic_issue_to_metadata(issue, series_info[str(issue["volume"]["id"])]), + ) + + return cached_results + + def _fetch_series(self, series_ids: list[int]) -> list[tuple[ComicSeries, bool]]: + # before we search online, look in our cache, since we might already have this info + cvc = ComicCacher(self.cache_folder, self.version) + cached_results: list[tuple[ComicSeries, bool]] = [] + needed_series: list[int] = [] + for series_id in series_ids: + cached_series = cvc.get_series_info(str(series_id), self.id) + if cached_series is not None: + cached_results.append((self._format_series(json.loads(cached_series[0].data)), cached_series[1])) + else: + needed_series.append(series_id) + + if needed_series == []: + return cached_results + + series_filter = "" + for vid in needed_series: + series_filter += str(vid) + "|" + flt = "id:" + series_filter.rstrip('|') # CV uses volume to mean series + + series_url = urljoin(self.api_url, "volumes/") # CV uses volume to mean series + params: dict[str, Any] = { + "api_key": self.api_key, + "format": "json", + "filter": flt, + } + cv_response: CVResult[list[CVSeries]] = self._get_cv_content(series_url, params) + + series_results = cv_response["results"] + page = 1 + offset = 0 + current_result_count = cv_response["number_of_page_results"] + total_result_count = cv_response["number_of_total_results"] + + # see if we need to keep asking for more pages... + while current_result_count < total_result_count: + page += 1 + offset += cv_response["number_of_page_results"] + + params["offset"] = offset + cv_response = self._get_cv_content(series_url, params) + + series_results.extend(cv_response["results"]) + current_result_count += cv_response["number_of_page_results"] + + if series_results: + for series in series_results: + cvc.add_series_info( + self.id, Series(id=str(series["id"]), data=json.dumps(series).encode("utf-8")), True, + ) + cached_results.append((self._format_series(series), True)) + + return cached_results + + +class HashType(utils.StrEnum): + AHASH = auto() + DHASH = auto() + PHASH = auto() + + class SimpleResult(TypedDict): Distance: int # Mapping of domains (eg comicvine.gamespot.com) to IDs IDList: dict[str, list[str]] +class Hash(TypedDict): + Hash: int + Kind: str + + +class Result(TypedDict): + # Mapping of domains (eg comicvine.gamespot.com) to IDs + IDList: dict[str, list[str]] + Distance: int + Hash: Hash + + +def ihash(types: str) -> list[str]: + result = [] + types = types.casefold() + choices = ", ".join(HashType) + for typ in utils.split(types, ","): + if typ not in list(HashType): + raise argparse.ArgumentTypeError(f"invalid choice: {typ} (choose from {choices.upper()})") + result.append(HashType[typ.upper()]) + + if not result: + raise argparse.ArgumentTypeError(f"invalid choice: {types} (choose from {choices.upper()})") + return result + + def settings(manager: settngs.Manager): manager.add_setting( '--url', '-u', default='https://comic-hasher.narnian.us', @@ -43,7 +212,7 @@ def settings(manager: settngs.Manager): help='Maximum score to allow. Lower score means more accurate', ) manager.add_setting( - '--simple', '-s', default=True, action=argparse.BooleanOptionalAction, + '--simple', '-s', default=False, action=argparse.BooleanOptionalAction, help='Whether to retrieve simple results or full results', ) manager.add_setting( @@ -54,11 +223,19 @@ def settings(manager: settngs.Manager): '--aggressive-filtering', '-a', default=False, action=argparse.BooleanOptionalAction, help='Will filter out worse matches if better matches are found', ) + manager.add_setting( + '--hash', default=['ahash', 'dhash', 'phash'], type=ihash, + help='Pick what hashes you want to use to search', + ) + manager.add_setting( + '--skip-non-exact', default=True, action=argparse.BooleanOptionalAction, + help='Skip non-exact matches if we have exact matches', + ) manager.add_setting('--cv-api-key', '-c') manager.add_setting('comic_archive', type=pathlib.Path) -def SearchHashes(url: str, simple: bool, max: int, ahash: str, dhash: str, phash: str) -> list[SimpleResult]: +def SearchHashes(url: str, simple: bool, max: int, ahash: str, dhash: str, phash: str, skip_non_exact: bool) -> list[SimpleResult] | list[Result]: resp = requests.get( urljoin(url, '/match_cover_hash'), { @@ -67,49 +244,116 @@ def SearchHashes(url: str, simple: bool, max: int, ahash: str, dhash: str, phash 'ahash': ahash, 'dhash': dhash, 'phash': phash, + 'skipNonExact': skip_non_exact, }, ) if resp.status_code != 200: - logger.error('bad response from server: %s', resp.text) + try: + text = resp.json()['msg'] + except Exception: + text = resp.text + logger.error('message from server: %s', text) raise SystemExit(3) - return resp.json() + return resp.json()['results'] def get_simple_results(results: list[SimpleResult], cv_api_key: str | None = None) -> list[tuple[int, GenericMetadata]]: - from comictalker.talkers.comicvine import ComicVineTalker cache_dir = pathlib.Path(appdirs.user_cache_dir('quick_tag')) cache_dir.mkdir(parents=True, exist_ok=True) - cv = ComicVineTalker(f"quick_tag/{__version__}", cache_dir) + cv = CV(f"quick_tag/{__version__}", cache_dir) cv.parse_settings({ 'comicvine_key': cv_api_key, 'cv_use_series_start_as_volume': True, }) md_results: list[tuple[int, GenericMetadata]] = [] results.sort(key=lambda r: r['Distance']) - for result in results: - for cv_id in result['IDList']['comicvine.gamespot.com']: - for md in cv.fetch_comics(issue_ids=result['IDList']['comicvine.gamespot.com']): - md_results.append((result['Distance'], md)) + all_cv_ids = set() + for res in results: + all_cv_ids.update(res['IDList']['comicvine.gamespot.com']) + # Do a bulk feth of basic issue data + mds = cv.fetch_comics(issue_ids=list(all_cv_ids)) + + # Re-associate the md to the distance + for res in results: + for md in mds: + if md.issue_id in res['IDList']['comicvine.gamespot.com']: + md_results.append((res['Distance'], md)) + return md_results + + +def get_results(results: list[Result], cv_api_key: str | None = None) -> list[tuple[int, Hash, GenericMetadata]]: + cache_dir = pathlib.Path(appdirs.user_cache_dir('quick_tag')) + cache_dir.mkdir(parents=True, exist_ok=True) + cv = CV(f"quick_tag/{__version__}", cache_dir) + cv.parse_settings({ + 'comicvine_key': cv_api_key, + 'cv_use_series_start_as_volume': True, + }) + md_results: list[tuple[int, Hash, GenericMetadata]] = [] + results.sort(key=lambda r: r['Distance']) + all_cv_ids = set() + for res in results: + all_cv_ids.update(res['IDList']['comicvine.gamespot.com']) + # Do a bulk feth of basic issue data + mds = cv.fetch_comics(issue_ids=list(all_cv_ids)) + + # Re-associate the md to the distance + for res in results: + for md in mds: + if md.issue_id in res['IDList']['comicvine.gamespot.com']: + md_results.append((res['Distance'], res['Hash'], md)) return md_results def filter_simple_results(results: list[SimpleResult], force_interactive=True, aggressive_filtering=False) -> list[SimpleResult]: if not force_interactive: + # If there is a single exact match return it exact = [r for r in results if r['Distance'] == 0] if len(exact) == 1: return exact - if len(results) > 4: - dist: list[tuple[int, list[SimpleResult]]] = [] - filtered_results: list[SimpleResult] = [] - for distance, group in itertools.groupby(results, key=lambda r: r['Distance']): + + # If ther are more than 4 results and any are better than 6 return the first group of results + if len(results) > 4: + dist: list[tuple[int, list[SimpleResult]]] = [] + filtered_results: list[SimpleResult] = [] + for distance, group in itertools.groupby(results, key=lambda r: r['Distance']): + dist.append((distance, list(group))) + if aggressive_filtering and dist[0][0] < 6: + for _, res in dist[:1]: + filtered_results.extend(res) + return filtered_results + + return results + + +def filter_results(results: list[Result], force_interactive=True, aggressive_filtering=False) -> list[Result]: + ahash_results = sorted([r for r in results if r['Hash']['Kind'] == 'ahash'], key=lambda r: r['Distance']) + dhash_results = sorted([r for r in results if r['Hash']['Kind'] == 'dhash'], key=lambda r: r['Distance']) + phash_results = sorted([r for r in results if r['Hash']['Kind'] == 'phash'], key=lambda r: r['Distance']) + hash_results = [phash_results, dhash_results, ahash_results] + if not force_interactive: + # If any of the hash types have a single exact match return it. Prefer phash for no particular reason + for hashed_results in (phash_results, dhash_results, ahash_results): + exact = [r for r in hashed_results if r['Distance'] == 0] + if len(exact) == 1: + return exact + + # If any of the hash types have more than 4 results and they have results better than 6 return the first group of results for each hash type + for i, hashed_results in enumerate(hash_results): + filtered_results: list[Result] = [] + if len(hashed_results) > 4: + dist: list[tuple[int, list[Result]]] = [] + for distance, group in itertools.groupby(hashed_results, key=lambda r: r['Distance']): dist.append((distance, list(group))) + if aggressive_filtering and dist[0][0] < 6: for _, res in dist[:1]: filtered_results.extend(res) - return filtered_results + if filtered_results: + hash_results[i] = filtered_results - return results + return list(itertools.chain(*hash_results)) def display_simple_results(md_results: list[tuple[int, GenericMetadata]], ca: comictaggerlib.cli.ComicArchive, force_interactive=True) -> GenericMetadata: @@ -132,17 +376,19 @@ def display_simple_results(md_results: list[tuple[int, GenericMetadata]], ca: co series_match.append(md) if len(series_match) == 1: return series_match[0] + + md_results.sort(key=lambda r: (r[0], len(r[1].publisher or ''))) for counter, r in enumerate(md_results, 1): print( - ' {}. {} #{} [{}] ({}/{}) - {} score: {}'.format( + ' {:2}. score: {} [{:15}] ({:02}/{:04}) - {} #{} - {}'.format( counter, + r[0], + r[1].publisher, + r[1].month or 0, + r[1].year or 0, r[1].series, r[1].issue, - r[1].publisher, - r[1].month, - r[1].year, r[1].title, - r[0], ), ) while True: @@ -158,6 +404,66 @@ def display_simple_results(md_results: list[tuple[int, GenericMetadata]], ca: co return md_results[int(i) - 1][1] +def display_results(md_results: list[tuple[int, Hash, GenericMetadata]], ca: comictaggerlib.cli.ComicArchive, force_interactive=True) -> GenericMetadata: + filename_md = ca.metadata_from_filename(utils.Parser.COMICFN2DICT) + if len(md_results) < 1: + logger.warning('No results found for comic') + raise SystemExit(4) + if not force_interactive: + if len(md_results) == 1 and md_results[0][0] <= 4: + return md_results[0][2] + series_match = [] + for score, hash, md in md_results: + if ( + score < 10 + and filename_md.series + and md.series + and utils.titles_match(filename_md.series, md.series) + and IssueString(filename_md.issue).as_string() == IssueString(md.issue).as_string() + ): + series_match.append(md) + if len(series_match) == 1: + return series_match[0] + md_results.sort(key=lambda r: (r[0], len(r[2].publisher or ''), r[1]["Kind"])) + for counter, r in enumerate(md_results, 1): + print( + ' {:2}. score: {} {}: {:064b} [{:15}] ({:02}/{:04}) - {} #{} - {}'.format( + counter, + r[0], + r[1]["Kind"], + r[1]["Hash"], + r[2].publisher, + r[2].month or 0, + r[2].year or 0, + r[2].series, + r[2].issue, + r[2].title, + ), + ) + while True: + i = input( + f'Please select a result to tag the comic with or "q" to quit: [1-{len(md_results)}] ', + ).casefold() + if (i.isdigit() and int(i) in range(1, len(md_results) + 1)): + break + if i == 'q': + logger.warning('User quit without saving metadata') + raise SystemExit(4) + + return md_results[int(i) - 1][2] + + +def fetch_full_issue_data(md: GenericMetadata, cv_api_key: str | None = None) -> GenericMetadata: + cache_dir = pathlib.Path(appdirs.user_cache_dir('quick_tag')) + cache_dir.mkdir(parents=True, exist_ok=True) + cv = CV(f"quick_tag/{__version__}", cache_dir) + cv.parse_settings({ + 'comicvine_key': cv_api_key, + 'cv_use_series_start_as_volume': True, + }) + return cv.fetch_comic_data(issue_id=md.issue_id) + + def prepare_metadata(md: GenericMetadata, new_md: GenericMetadata, clear_tags: bool, auto_imprint: bool, remove_html_tables: bool) -> GenericMetadata: final_md = md.copy() @@ -190,12 +496,9 @@ def main(): manager.add_group('runtime', settings) opts, _ = manager.parse_cmdline() url: utils.Url = opts['runtime']['url'] - print(url) max_hamming_distance: int = opts['runtime']['max'] simple: bool = opts['runtime']['simple'] - if not simple: - logger.error('Full results not implemented yet') - raise SystemExit(1) + ca = comicarchive.ComicArchive(opts['runtime']['comic_archive']) if not ca.seems_to_be_a_comic_archive(): logger.error('Could not open %s as an archive', ca.path) @@ -211,23 +514,30 @@ def main(): print('Tagging: ', ca.path) print("hashing cover") - ahash = imagehash.average_hash(cover_image) - dhash = imagehash.dhash(cover_image) - phash = imagehash.phash(cover_image) + phash = dhash = ahash = '' + if HashType.AHASH in opts['runtime']['hash']: + ahash = imagehash.average_hash(cover_image) + if HashType.DHASH in opts['runtime']['hash']: + dhash = imagehash.dhash(cover_image) + if HashType.PHASH in opts['runtime']['hash']: + phash = imagehash.phash(cover_image) print("Searching hashes") - results = SearchHashes(url.url, simple, max_hamming_distance, str(ahash), str(dhash), str(phash)) + results = SearchHashes(url.url, simple, max_hamming_distance, str(ahash), str(dhash), str(phash), opts['runtime']['skip_non_exact']) - print("Retrieving ComicVine data") + print("Retrieving basic ComicVine data") if simple: - filtered_results = filter_simple_results(results, opts['runtime']['force_interactive'], opts['runtime']['aggressive_filtering']) + filtered_results = filter_simple_results(cast(list[SimpleResult], results), opts['runtime']['force_interactive'], opts['runtime']['aggressive_filtering']) metadata_results = get_simple_results(filtered_results, opts['runtime']['cv_api_key']) chosen_result = display_simple_results(metadata_results, ca, opts['runtime']['force_interactive']) else: - metadata_results = get_full_results(results) - chosen_result = display_full_results(metadata_results) + filtered_results = filter_results(cast(list[Result], results), opts['runtime']['force_interactive'], opts['runtime']['aggressive_filtering']) + metadata_results = get_results(filtered_results, opts['runtime']['cv_api_key']) + chosen_result = display_results(metadata_results, ca, opts['runtime']['force_interactive']) - if ca.write_tags(prepare_metadata(GenericMetadata(), chosen_result, clear_tags=False, auto_imprint=True, remove_html_tables=True), 'cr'): + full_cv_md = fetch_full_issue_data(chosen_result, opts['runtime']['cv_api_key']) + + if ca.write_tags(prepare_metadata(tags, full_cv_md, clear_tags=False, auto_imprint=True, remove_html_tables=True), 'cr'): print(f'successfully saved metadata to {ca.path}') raise SystemExit(0) logger.error('Failed to save metadata to %s', ca.path) diff --git a/go.mod b/go.mod index e443af2..d035163 100644 --- a/go.mod +++ b/go.mod @@ -5,11 +5,17 @@ go 1.22.1 toolchain go1.22.2 require ( - gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240502010648-cb5a8237c420 + gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00 github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 github.com/fmartingr/go-comicinfo/v2 v2.0.2 github.com/mholt/archiver/v4 v4.0.0-alpha.8 - golang.org/x/image v0.7.0 + golang.org/x/image v0.19.0 + golang.org/x/text v0.17.0 +) + +require ( + github.com/vmihailenco/msgpack/v5 v5.4.1 + github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect ) require ( @@ -30,11 +36,7 @@ require ( github.com/therootcompany/xz v1.0.1 // indirect github.com/ulikunitz/xz v0.5.10 // indirect go4.org v0.0.0-20200411211856-f5505b9728dd // indirect - golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f // indirect + golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect ) -require golang.org/x/text v0.14.0 - -replace golang.org/x/text v0.14.0 => /home/timmy/build/source/text/ - -replace gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240502010648-cb5a8237c420 => ../goimagehash +replace golang.org/x/text v0.17.0 => github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f diff --git a/go.sum b/go.sum index 0921230..46c0491 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+ cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00 h1:RNqy72W8N/mlnZGxvPoC9ch+zI3GlAGVYbBGpXOHmuY= +gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00/go.mod h1:kLCabSskchnLGV41s6YVXZdnLYwAxKwdXPlEuyFhC9E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= @@ -95,6 +97,8 @@ github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk= +github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM= github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= @@ -116,8 +120,11 @@ github.com/therootcompany/xz v1.0.1/go.mod h1:3K3UH1yCKgBneZYhuQUvJ9HPD19UEXEI0B github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= +github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= +github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= @@ -128,7 +135,6 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -137,13 +143,13 @@ golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f h1:99ci1mjWVBWwJiEKYY6jWa4d2nTQVIEhZIptnrVb1XY= -golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f/go.mod h1:/lliqkxwWAhPjf5oSOIJup2XcqJaw8RGS6k3TGEc7GI= +golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI= +golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= -golang.org/x/image v0.7.0 h1:gzS29xtG1J5ybQlv0PuyfE3nmc6R4qB73m6LUUmvFuw= -golang.org/x/image v0.7.0/go.mod h1:nd/q4ef1AKKYl/4kft7g+6UyGbdiqWqTP1ZAbRoV7Rg= +golang.org/x/image v0.19.0 h1:D9FX4QWkLfkeqaC62SonffIIuYdOk/UE2XKUBgRIBIQ= +golang.org/x/image v0.19.0/go.mod h1:y0zrRqlQRWQ5PXaYCOMLTW2fpsxZ8Qh9I/ohnInJEys= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -160,8 +166,6 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -176,10 +180,7 @@ golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -192,8 +193,6 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -210,21 +209,12 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -252,8 +242,6 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/hashes.gz b/hashes.gz new file mode 100644 index 0000000..a596e4a Binary files /dev/null and b/hashes.gz differ diff --git a/main.go b/hashing.go similarity index 95% rename from main.go rename to hashing.go index b2dd8dd..de74e42 100644 --- a/main.go +++ b/hashing.go @@ -2,6 +2,7 @@ package ch import ( "cmp" + _ "embed" "fmt" "image" "log" @@ -12,6 +13,9 @@ import ( "gitea.narnian.us/lordwelch/goimagehash" ) +//go:embed hashes.gz +var Hashes []byte + const ( H0 uint64 = 0b11111111 << (8 * iota) H1 @@ -45,8 +49,12 @@ type Match struct { Hash uint64 } +type ID struct { + Domain, ID string +} + type Result struct { - IDs IDList + IDs []string // domain:id Distance int Hash ImageHash } diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..22cc467 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4"] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +local_scheme = "no-local-version" diff --git a/setup.cfg b/setup.cfg index 0ddbeb4..1b9455e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,14 +16,16 @@ classifiers = Programming Language :: Python :: Implementation :: PyPy [options] -packages = find: +py_modules = quick_tag install_requires = - comictagger>=1.6.0a21 + comictagger==1.6.0a20 + imagehash python_requires = >=3.9 -include_package_data = True +package_dir = + =cmd -[options.package_data] -settngs = py.typed +[options.entry_points] +console_scripts = quick-tag=quick_tag:main [pep8] ignore = E265,E501