Compare commits

...

5 Commits

Author SHA1 Message Date
aa7b613f4b Add an option to return simple results 2024-08-05 13:54:00 -07:00
c078c60f29 static check fixes 2024-08-04 18:26:43 -07:00
f6631a01a2 more stuff 2024-08-04 18:12:00 -07:00
921019b0d4 stuff 2024-07-31 11:35:17 -07:00
04f3ae1e64 add output from inital image hash comparison 2024-05-10 14:12:39 -07:00
11 changed files with 1253 additions and 598 deletions

537
cmd/comic-hasher/main.go Normal file
View File

@ -0,0 +1,537 @@
package main
import (
"bufio"
"cmp"
"context"
"encoding/json"
"flag"
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"io/fs"
"log"
"net/http"
_ "net/http/pprof"
"net/url"
"os"
"os/signal"
"path/filepath"
"runtime"
"runtime/pprof"
"slices"
"strconv"
"strings"
"time"
"github.com/disintegration/imaging"
_ "golang.org/x/image/tiff"
_ "golang.org/x/image/vp8"
_ "golang.org/x/image/vp8l"
_ "golang.org/x/image/webp"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
// "github.com/google/uuid"
// "github.com/zitadel/oidc/pkg/client/rp"
// httphelper "github.com/zitadel/oidc/pkg/http"
// "github.com/zitadel/oidc/pkg/oidc"
)
type Server struct {
httpServer *http.Server
mux *http.ServeMux
BaseURL *url.URL
// token chan<- *oidc.Tokens
PartialAhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes
PartialDhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes
PartialPhash [8]map[uint8][]uint64 // Maps partial hashes to their potential full hashes
FullAhash map[uint64]ch.IDList // Maps ahash's to lists of ID's
FullDhash map[uint64]ch.IDList // Maps dhash's to lists of ID's
FullPhash map[uint64]ch.IDList // Maps phash's to lists of ID's
// IDToCover map[string]string // IDToCover is a map of domain:ID to an index to covers eg IDToCover['comicvine.gamespot.com:12345'] = 0
// covers []ch.Cover
readerQueue chan string
hashingQueue chan ch.Im
mappingQueue chan ch.Hash
// hashes are a uint64 split into 8 pieces or a unint64 for quick lookup, the value is an index to covers
}
// var key = []byte(uuid.New().String())[:16]
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
func main() {
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
// mustDropPrivileges()
coverPath := flag.String("cover_path", "", "path to covers to add to hash database")
flag.Parse()
if *coverPath == "" {
log.Fatal("You must supply a path")
}
st, err := os.Stat(*coverPath)
if err != nil {
panic(err)
}
fmt.Println(st)
startServer(*coverPath)
}
func (s *Server) authenticated(w http.ResponseWriter, r *http.Request) (string, bool) {
return strings.TrimSpace("lordwelch"), true
}
// func (s *Server) setupOauthHandlers() error {
// redirectURI := *s.BaseURL
// redirectURI.Path = "/oauth/callback"
// successURI := *s.BaseURL
// successURI.Path = "/success"
// failURI := *s.BaseURL
// failURI.RawQuery = url.Values{"auth": []string{"fail"}}.Encode()
// cookieHandler := httphelper.NewCookieHandler(key, key, httphelper.WithUnsecure())
// options := []rp.Option{
// rp.WithCookieHandler(cookieHandler),
// rp.WithVerifierOpts(rp.WithIssuedAtOffset(5 * time.Second)),
// }
// provider, err := rp.NewRelyingPartyOIDC(os.Getenv("COMICHASHER_PROVIDER_URL"), os.Getenv("COMICHASHER_CLIENT_ID"), os.Getenv("COMICHASHER_CLIENT_SECRET"), redirectURI.String(), strings.Split(os.Getenv("COMICHASHER_SCOPES"), ","), options...)
// if err != nil {
// return fmt.Errorf("error creating provider: %w", err)
// }
// // generate some state (representing the state of the user in your application,
// // e.g. the page where he was before sending him to login
// state := func() string {
// return uuid.New().String()
// }
// // register the AuthURLHandler at your preferred path
// // the AuthURLHandler creates the auth request and redirects the user to the auth server
// // including state handling with secure cookie and the possibility to use PKCE
// s.mux.Handle("/login", rp.AuthURLHandler(state, provider))
// // for demonstration purposes the returned userinfo response is written as JSON object onto response
// marshalUserinfo := func(w http.ResponseWriter, r *http.Request, tokens *oidc.Tokens, state string, rp rp.RelyingParty) {
// s.token <- tokens
// w.Header().Add("location", successURI.String())
// w.WriteHeader(301)
// }
// // register the CodeExchangeHandler at the callbackPath
// // the CodeExchangeHandler handles the auth response, creates the token request and calls the callback function
// // with the returned tokens from the token endpoint
// s.mux.Handle(redirectURI.Path, rp.CodeExchangeHandler(marshalUserinfo, provider))
// return nil
// }
func (s *Server) setupAppHandlers() {
// s.mux.HandleFunc("/add_cover", s.addCover)
// s.mux.HandleFunc("/get_cover", s.getCover)
s.mux.HandleFunc("/match_cover_hash", s.matchCoverHash)
}
func (s *Server) getCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
http.Error(w, "No ID Provided", http.StatusBadRequest)
return
}
if domain == "" {
log.Println("No domain Provided")
http.Error(w, "No domain Provided", http.StatusBadRequest)
return
}
// if index, ok := s.IDToCover[domain+":"+ID]; ok {
// covers, err := json.Marshal(s.covers[index])
// if err == nil {
// w.Header().Add("Content-Type", "application/json")
// w.Write(covers)
// return
// }
// }
fmt.Fprintln(w, "Not implemented")
}
func (s *Server) getMatches(ahash, dhash, phash uint64) []ch.Result {
var foundMatches []ch.Result
if matchedResults, ok := s.FullAhash[ahash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.AHash}})
}
if matchedResults, ok := s.FullDhash[dhash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.DHash}})
}
if matchedResults, ok := s.FullPhash[phash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: 0, Hash: ch.ImageHash{Hash: ahash, Kind: goimagehash.PHash}})
}
// If we have exact matches don't bother with other matches
if len(foundMatches) > 0 {
return foundMatches
}
for i, partialHash := range ch.SplitHash(ahash) {
for _, match := range ch.Atleast(8, ahash, s.PartialAhash[i][partialHash]) {
if matchedResults, ok := s.FullAhash[match.Hash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.AHash}})
}
}
}
for i, partialHash := range ch.SplitHash(dhash) {
for _, match := range ch.Atleast(8, dhash, s.PartialDhash[i][partialHash]) {
if matchedResults, ok := s.FullDhash[match.Hash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.DHash}})
}
}
}
for i, partialHash := range ch.SplitHash(phash) {
for _, match := range ch.Atleast(8, phash, s.PartialPhash[i][partialHash]) {
if matchedResults, ok := s.FullPhash[match.Hash]; ok {
foundMatches = append(foundMatches, ch.Result{IDs: matchedResults, Distance: match.Distance, Hash: ch.ImageHash{Hash: match.Hash, Kind: goimagehash.PHash}})
}
}
}
return foundMatches
}
type SimpleResult struct {
Distance int
IDList ch.IDList
}
func getSimpleResults(fullResults []ch.Result) []SimpleResult {
simpleMap := make(map[string]int, len(fullResults))
slices.SortFunc(fullResults, func(a, b ch.Result) int {
return cmp.Compare(a.Distance, b.Distance)
})
for _, fullResult := range fullResults {
for _, id := range fullResult.IDs[ch.ComicVine] {
simpleDistance, ok := simpleMap[id]
if !ok {
simpleDistance = 99
}
if simpleDistance > fullResult.Distance {
simpleMap[id] = fullResult.Distance
}
}
}
simpleList := make([]SimpleResult, 0, len(simpleMap))
distanceMap := make(map[int][]string)
for id, distance := range simpleMap {
distanceMap[distance] = ch.Insert(distanceMap[distance], id)
}
for distance, idlist := range distanceMap {
simpleList = append(simpleList, SimpleResult{
Distance: distance,
IDList: ch.IDList{ch.ComicVine: idlist},
})
}
fmt.Println(simpleList)
return simpleList
}
func (s *Server) matchCoverHash(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
ahashStr = strings.TrimSpace(values.Get("ahash"))
dhashStr = strings.TrimSpace(values.Get("dhash"))
phashStr = strings.TrimSpace(values.Get("phash"))
simple = strings.ToLower(strings.TrimSpace(values.Get("simple"))) == "true"
ahash uint64
dhash uint64
phash uint64
err error
)
if ahash, err = strconv.ParseUint(ahashStr, 16, 64); err != nil && ahashStr != "" {
log.Printf("could not parse ahash: %s", ahashStr)
http.Error(w, "parse fail", http.StatusBadRequest)
return
}
if dhash, err = strconv.ParseUint(dhashStr, 16, 64); err != nil && dhashStr != "" {
log.Printf("could not parse dhash: %s", dhashStr)
http.Error(w, "parse fail", http.StatusBadRequest)
return
}
if phash, err = strconv.ParseUint(phashStr, 16, 64); err != nil && phashStr != "" {
log.Printf("could not parse phash: %s", phashStr)
http.Error(w, "parse fail", http.StatusBadRequest)
return
}
matches := s.getMatches(ahash, dhash, phash)
if len(matches) > 0 {
var covers []byte
if simple {
covers, err = json.Marshal(getSimpleResults(matches))
} else {
covers, err = json.Marshal(matches)
}
log.Println(err)
w.Header().Add("Content-Type", "application/json")
w.Write(covers)
w.Write([]byte{'\n'})
return
}
w.Header().Add("Content-Type", "application/json")
fmt.Fprintln(w, "{\"msg\":\"No hashes found\"}")
}
func (s *Server) addCover(w http.ResponseWriter, r *http.Request) {
user, authed := s.authenticated(w, r)
if !authed || user == "" {
http.Error(w, "Invalid Auth", http.StatusForbidden)
return
}
var (
values = r.URL.Query()
domain = strings.TrimSpace(values.Get("domain"))
ID = strings.TrimSpace(values.Get("id"))
)
if ID == "" {
log.Println("No ID Provided")
http.Error(w, "No ID Provided", http.StatusBadRequest)
return
}
if domain == "" {
log.Println("No domain Provided")
http.Error(w, "No domain Provided", http.StatusBadRequest)
return
}
i, format, err := image.Decode(r.Body)
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
http.Error(w, msg, http.StatusBadRequest)
return
}
log.Printf("Decoded %s image from %s", format, user)
s.hashingQueue <- ch.Im{Im: i, Format: format, Domain: ch.Source(domain), ID: ID, Path: ""}
fmt.Fprintln(w, "Success")
}
func (s *Server) mapHashes(hash ch.Hash) {
if _, ok := s.FullAhash[hash.Ahash.GetHash()]; !ok {
s.FullAhash[hash.Ahash.GetHash()] = make(ch.IDList)
}
s.FullAhash[hash.Ahash.GetHash()][hash.Domain] = ch.Insert(s.FullAhash[hash.Ahash.GetHash()][hash.Domain], hash.ID)
if _, ok := s.FullDhash[hash.Dhash.GetHash()]; !ok {
s.FullDhash[hash.Dhash.GetHash()] = make(ch.IDList)
}
s.FullDhash[hash.Dhash.GetHash()][hash.Domain] = ch.Insert(s.FullDhash[hash.Dhash.GetHash()][hash.Domain], hash.ID)
if _, ok := s.FullPhash[hash.Phash.GetHash()]; !ok {
s.FullPhash[hash.Phash.GetHash()] = make(ch.IDList)
}
s.FullPhash[hash.Phash.GetHash()][hash.Domain] = ch.Insert(s.FullPhash[hash.Phash.GetHash()][hash.Domain], hash.ID)
for i, partialHash := range ch.SplitHash(hash.Ahash.GetHash()) {
s.PartialAhash[i][partialHash] = ch.Insert(s.PartialAhash[i][partialHash], hash.Ahash.GetHash())
}
for i, partialHash := range ch.SplitHash(hash.Dhash.GetHash()) {
s.PartialDhash[i][partialHash] = ch.Insert(s.PartialDhash[i][partialHash], hash.Dhash.GetHash())
}
for i, partialHash := range ch.SplitHash(hash.Phash.GetHash()) {
s.PartialPhash[i][partialHash] = ch.Insert(s.PartialPhash[i][partialHash], hash.Phash.GetHash())
}
}
func (s *Server) initHashes() {
for i := range s.PartialAhash {
s.PartialAhash[i] = make(map[uint8][]uint64)
}
for i := range s.PartialDhash {
s.PartialDhash[i] = make(map[uint8][]uint64)
}
for i := range s.PartialPhash {
s.PartialPhash[i] = make(map[uint8][]uint64)
}
s.FullAhash = make(map[uint64]ch.IDList)
s.FullDhash = make(map[uint64]ch.IDList)
s.FullPhash = make(map[uint64]ch.IDList)
// s.IDToCover = make(map[string]string)
}
func (s *Server) mapper() {
var total uint64 = 0
for hash := range s.mappingQueue {
if total%1000 == 0 {
mem := ch.MemStats()
if mem > 10*1024*1024*1024 {
fmt.Println("Forcing gc", mem, "G")
runtime.GC()
}
}
total++
s.mapHashes(hash)
}
}
func (s *Server) hasher(workerID int) {
for image := range s.hashingQueue {
start := time.Now()
hash := ch.HashImage(image)
if hash.Domain == "" {
continue
}
s.mappingQueue <- hash
elapsed := time.Since(start)
// fmt.Printf("%#064b\n", ahash.GetHash())
// fmt.Printf("%#064b\n", dhash.GetHash())
// fmt.Printf("%#064b\n", phash.GetHash())
log.Printf("Hashing took %v: worker: %v. path: %s ahash: %064b id: %s\n", elapsed, workerID, image.Path, hash.Ahash.GetHash(), hash.ID)
}
}
func (s *Server) reader(workerID int) {
for path := range s.readerQueue {
file, err := os.Open(path)
if err != nil {
panic(err)
}
i, format, err := image.Decode(bufio.NewReader(file))
if err != nil {
continue // skip this image
}
file.Close()
im := ch.Im{Im: i, Format: format, Domain: ch.ComicVine, ID: filepath.Base(filepath.Dir(path)), Path: path}
s.hashingQueue <- im
}
}
func (s *Server) FindHashes() {
}
func startServer(coverPath string) {
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt)
mux := http.NewServeMux()
server := Server{
// token: make(chan *oidc.Tokens),
readerQueue: make(chan string, 1120130), // Number gotten from checking queue size
hashingQueue: make(chan ch.Im),
mappingQueue: make(chan ch.Hash),
mux: mux,
httpServer: &http.Server{
Addr: ":8080",
Handler: mux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
MaxHeaderBytes: 1 << 20,
},
}
imaging.SetMaxProcs(1)
fmt.Println("init hashes")
server.initHashes()
// server.setupOauthHandlers()
fmt.Println("init handlers")
server.setupAppHandlers()
fmt.Println("init hashers")
go server.reader(1)
go server.reader(2)
go server.reader(3)
go server.reader(4)
go server.reader(5)
go server.reader(6)
go server.reader(7)
go server.reader(8)
go server.reader(9)
go server.reader(10)
go server.hasher(1)
go server.hasher(2)
go server.hasher(3)
go server.hasher(4)
go server.hasher(5)
go server.hasher(6)
go server.hasher(7)
go server.hasher(8)
go server.hasher(9)
go server.hasher(10)
fmt.Println("init mapper")
go server.mapper()
fmt.Println("Starting local hashing go routine")
go func() {
fmt.Println("Hashing covers at ", coverPath)
start := time.Now()
err := filepath.WalkDir(coverPath, func(path string, d fs.DirEntry, err error) error {
select {
case signal := <-sig:
server.httpServer.Shutdown(context.TODO())
return fmt.Errorf("signal: %v", signal)
default:
}
if d.IsDir() || !strings.Contains(path, "thumb") {
return nil
}
fmt.Println(len(server.readerQueue))
server.readerQueue <- path
return nil
})
elapsed := time.Since(start)
fmt.Println("Err:", err, "local hashing took", elapsed)
s := <-sig
err = server.httpServer.Shutdown(context.TODO())
log.Printf("Signal: %v, error: %s", s, err)
}()
fmt.Println("Listening on ", server.httpServer.Addr)
err := server.httpServer.ListenAndServe()
if err != nil {
fmt.Println(err)
}
f, er := os.Create("memprofile")
if er != nil {
fmt.Println("Error in creating file for writing memory profile to: ", er)
return
}
defer f.Close()
runtime.GC()
if e := pprof.WriteHeapProfile(f); e != nil {
fmt.Println("Error in writing memory profile: ", e)
return
}
}

View File

@ -1,10 +1,11 @@
from typing import Collection, Sequence
from PIL import Image
import argparse,pathlib,numpy,imagehash
import argparse,pathlib,numpy,imagehash,sys
ap = argparse.ArgumentParser()
ap.add_argument("--file", type=pathlib.Path)
ap.add_argument("--debug", action='store_true')
opts = ap.parse_args()
opts.file = pathlib.Path(opts.file)
@ -18,38 +19,41 @@ resized = gray.copy().resize((hash_size, hash_size), Image.Resampling.LANCZOS)
def print_image(image: Image.Image) -> None:
for row in numpy.asarray(image):
print('[ ', end='')
print('[ ', end='', file=sys.stderr)
for i in row:
if isinstance(i, Collection):
print('{ ', end='')
print('{ ', end='', file=sys.stderr)
for idx, x in enumerate(i):
if idx == len(i)-1:
print(f'{int(x):03d} ', end='')
print(f'{int(x):03d} ', end='', file=sys.stderr)
else:
print(f'{int(x):03d}, ', end='')
print('}, ', end='')
print(f'{int(x):03d}, ', end='', file=sys.stderr)
print('}, ', end='', file=sys.stderr)
else:
print(f'{int(i):03d}, ', end='')
print(']')
print(f'{int(i):03d}, ', end='', file=sys.stderr)
print(']', file=sys.stderr)
def bin_str(hash):
return ''.join(str(b) for b in 1 * hash.hash.flatten())
print("rgb")
print_image(image)
print()
if opts.debug:
image.save("py.rgb.png")
print("rgb", file=sys.stderr)
print_image(image)
print(file=sys.stderr)
print("gray")
print_image(gray)
if opts.debug:
gray.save("py.gray.png")
print()
print("gray", file=sys.stderr)
print_image(gray)
print(file=sys.stderr)
print("resized")
print_image(resized)
if opts.debug:
resized.save("py.resized.png")
print()
print("resized", file=sys.stderr)
print_image(resized)
print(file=sys.stderr)
print('ahash: ', bin_str(imagehash.average_hash(image)))
print('dhash: ', bin_str(imagehash.dhash(image)))

View File

@ -0,0 +1,91 @@
package main
import (
"context"
"flag"
"fmt"
"io"
"log"
"os"
"strings"
"github.com/fmartingr/go-comicinfo/v2"
"github.com/mholt/archiver/v4"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func main() {
c := collate.New(language.English, collate.Loose, collate.Numeric, collate.Force)
fileArchive := flag.String("file", "", "archive to extract cover")
flag.Parse()
if fileArchive == nil || *fileArchive == "" {
flag.Usage()
os.Exit(1)
}
file, err := os.Open(*fileArchive)
if err != nil {
log.Printf("Failed to open file %s: %s", *fileArchive, err)
return
}
unrar := archiver.Rar{}
fileList := []string{}
err = unrar.Extract(context.TODO(), file, nil, func(ctx context.Context, f archiver.File) error {
if !strings.HasSuffix(f.NameInArchive, ".xml") {
fileList = append(fileList, f.NameInArchive)
}
return nil
})
if err != nil {
panic(err)
}
_, err = file.Seek(0, io.SeekStart)
if err != nil {
panic(err)
}
c.SortStrings(fileList)
var (
image []byte
issueID string
files = []string{"ComicInfo.xml", fileList[0]}
)
fmt.Printf("Extracting %s\n", fileList[0])
err = unrar.Extract(context.TODO(), file, files, func(ctx context.Context, f archiver.File) error {
r, err := f.Open()
if err != nil {
return err
}
if f.Name() == "ComicInfo.xml" {
ci, err := comicinfo.Read(r)
if err != nil {
return err
}
parts := strings.Split(strings.TrimRight(ci.Web, "/"), "/")
ids := strings.Split(parts[len(parts)-1], "-")
issueID = ids[1]
} else {
image, err = io.ReadAll(r)
if err != nil {
return err
}
}
return nil
})
if err != nil {
panic(err)
}
file.Close()
file, err = os.Create(*fileArchive + "." + issueID + ".image")
if err != nil {
panic(err)
}
defer file.Close()
_, err = file.Write(image)
if err != nil {
panic(err)
}
// os.Remove(*fileArchive)
// fmt.Println("removed " + *fileArchive)
}

View File

@ -1,27 +1,20 @@
package main
import (
"bufio"
"flag"
"fmt"
"image"
"image/draw"
_ "image/gif"
_ "image/jpeg"
// "github.com/pixiv/go-libjpeg/jpeg"
"image/png"
"log"
"os"
"strings"
"gitea.narnian.us/lordwelch/goimagehash"
"gitea.narnian.us/lordwelch/goimagehash/transforms"
"github.com/anthonynsimon/bild/transform"
_ "github.com/gen2brain/avif"
_ "github.com/spakin/netpbm"
_ "golang.org/x/image/bmp"
_ "golang.org/x/image/tiff"
_ "golang.org/x/image/webp"
ch "gitea.narnian.us/lordwelch/comic-hasher"
"gitea.narnian.us/lordwelch/goimagehash"
)
func init() {
@ -30,23 +23,9 @@ func init() {
// DisableBlockSmoothing: false,
// DCTMethod: jpeg.DCTFloat,
// })}, jpeg.DecodeConfig)
}
func ToGray(img image.Image) *image.Gray {
gray := image.NewGray(image.Rect(0, 0, img.Bounds().Dx(), img.Bounds().Dy()))
gray.Pix = transforms.Rgb2Gray(img)
return gray
}
func resize(img image.Image, w, h int) *image.Gray {
resized := transform.Resize(img, w, h, transform.Lanczos)
r_gray := image.NewGray(image.Rect(0, 0, resized.Bounds().Dx(), resized.Bounds().Dy()))
draw.Draw(r_gray, resized.Bounds(), resized, resized.Bounds().Min, draw.Src)
return r_gray
}
func save_image(im image.Image, name string) {
func saveImage(im image.Image, name string) {
file, err := os.Create(name)
if err != nil {
log.Printf("Failed to open file %s: %s", "tmp.png", err)
@ -80,22 +59,26 @@ func fmtImage(im image.Image) string {
}
func debugImage(im image.Image, width, height int) {
gray := ToGray(im)
resized := resize(gray, width, height)
gray := goimagehash.ToGray(im, nil)
resized := goimagehash.Resize(gray, width, height, nil)
fmt.Println("rgb")
fmt.Println(fmtImage(im))
save_image(im, "go.rgb.png")
fmt.Println("gray")
fmt.Println(fmtImage(gray))
save_image(gray, "go.gray.png")
fmt.Println("resized")
fmt.Println(fmtImage(resized))
save_image(resized, "go.resized.png")
saveImage(im, "go.rgb.png")
log.Println("rgb")
log.Println(fmtImage(im))
saveImage(gray, "go.gray.png")
log.Println("gray")
log.Println(fmtImage(gray))
saveImage(resized, "go.resized.png")
log.Println("resized")
log.Println(fmtImage(resized))
}
func main() {
log.SetFlags(0)
imPath := flag.String("file", "", "image file to hash")
debug := flag.Bool("debug", false, "Enable debug output")
flag.Parse()
if imPath == nil || *imPath == "" {
flag.Usage()
@ -108,47 +91,24 @@ func main() {
return
}
defer file.Close()
im, format, err := image.Decode(file)
im, format, err := image.Decode(bufio.NewReader(file))
if err != nil {
msg := fmt.Sprintf("Failed to decode Image: %s", err)
log.Println(msg)
return
}
debugim := im
if format == "webp" {
im = goimagehash.FancyUpscale(im.(*image.YCbCr))
debugim = goimagehash.FancyUpscale(im.(*image.YCbCr))
}
debugImage(im, 8, 8)
var (
ahash *goimagehash.ImageHash
dhash *goimagehash.ImageHash
phash *goimagehash.ImageHash
)
ahash, err = goimagehash.AverageHash(im)
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
log.Println(msg)
return
if *debug {
debugImage(debugim, 8, 8)
}
dhash, err = goimagehash.DifferenceHash(im)
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
log.Println(msg)
return
}
hash := ch.HashImage(ch.Im{Im: im, Format: format, Domain: ch.Source(ch.ComicVine), ID: "nothing"})
phash, err = goimagehash.PerceptionHash(im)
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
log.Println(msg)
return
}
fmt.Println("ahash: ", ahash.BinString())
fmt.Println("dhash: ", dhash.BinString())
fmt.Println("phash: ", phash.BinString())
fmt.Println("ahash: ", hash.Ahash.BinString())
fmt.Println("dhash: ", hash.Dhash.BinString())
fmt.Println("phash: ", hash.Phash.BinString())
}

34
cmd/hash/natsort/main.go Normal file
View File

@ -0,0 +1,34 @@
package main
import (
"fmt"
"strings"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func main() {
c := collate.New(language.English, collate.Loose, collate.Numeric, collate.Force)
list := []string{
"11.jpg",
"12.jpg",
"2.jpg",
"99999999999999999.jpg",
"02.jpg",
"00.jpg",
"0.jpg",
"00.jpg",
"1.jpg",
"01.jpg",
"Page3.gif",
"page0.jpg",
"Page1.jpeg",
"Page2.png",
"!cover.jpg", // Depending on locale punctuation or numbers might come first (Linux)
"page4.webp",
"page10.jpg",
}
c.SortStrings(list)
fmt.Println(strings.Join(list, "\n"))
}

154
cmd/quick_tag.py Normal file
View File

@ -0,0 +1,163 @@
import argparse
import logging
from typing import TypedDict
from urllib.parse import urljoin
from PIL import Image
import appdirs
from comicapi.genericmetadata import GenericMetadata
import pathlib, imagehash, requests
import settngs, comictaggerlib.cli
from io import BytesIO
from comicapi import comicarchive, merge
from datetime import datetime
from comicapi import utils
from comicapi.genericmetadata import GenericMetadata
from comictaggerlib import ctversion
from comictaggerlib.cbltransformer import CBLTransformer
from comictaggerlib.ctsettings.settngs_namespace import SettngsNS
from comictalker.talker_utils import cleanup_html
logger = logging.getLogger("quick_tag")
__version__ = '0.1'
class SimpleResult(TypedDict):
Distance: int
IDList: dict[str, list[str]] # Mapping of domains (eg comicvine.gamespot.com) to IDs
def settings(manager: settngs.Manager):
manager.add_setting("--url", '-u', default='https://comic-hasher.narnian.us', type=utils.parse_url, help='Website to use for searching cover hashes')
manager.add_setting("--max","-m", default=8, type=int, help='Maximum score to allow. Lower score means more accurate')
manager.add_setting("--simple", "-s", default=True, action=argparse.BooleanOptionalAction, help='Whether to retrieve simple results or full results')
manager.add_setting("--force-interactive", "-f", default=True, action=argparse.BooleanOptionalAction, help='When not set will automatically tag comics that have a single match with a score of 4 or lower')
manager.add_setting("--cv-api-key", "-c")
manager.add_setting("comic_archive", type=pathlib.Path)
def SearchHashes(url: str, simple: bool, max: int, ahash: str, dhash: str, phash: str) -> list[SimpleResult]:
resp = requests.get(urljoin(url, '/match_cover_hash'), {"simple": simple, "max": max, "ahash":ahash, "dhash": dhash, "phash": phash})
if resp.status_code != 200:
logger.error("bad response from server: %s", resp.text)
raise SystemExit(3)
return resp.json()
def get_simple_results(results: list[SimpleResult], cv_api_key: str | None = None) -> list[tuple[int, GenericMetadata]]:
from comictalker.talkers.comicvine import ComicVineTalker
cache_dir = pathlib.Path(appdirs.user_cache_dir('quick_tag'))
cache_dir.mkdir(parents=True, exist_ok=True)
cv = ComicVineTalker(f"quick_tag/{__version__}",cache_dir)
cv.parse_settings({"comicvine_key": cv_api_key})
md_results: list[tuple[int, GenericMetadata]] = []
results.sort(key=lambda r: r['Distance'])
for result in results:
for cv_id in result['IDList']['comicvine.gamespot.com']:
md_results.append((result['Distance'], cv.fetch_comic_data(issue_id=cv_id)))
return md_results
def display_simple_results(md_results: list[tuple[int, GenericMetadata]], force_interactive=True) -> GenericMetadata:
if len(md_results) == 1 and md_results[0][0] <= 4 and not force_interactive:
return md_results[0][1]
for counter, r in enumerate(md_results, 1):
print(
" {}. {} #{} [{}] ({}/{}) - {} score: {}".format(
counter,
r[1].series,
r[1].issue,
r[1].publisher,
r[1].month,
r[1].year,
r[1].title,
r[0]
),
)
while True:
i = input(f'Please select a result to tag the comic with or "q" to quit: [1-{len(md_results)}] ').casefold()
if (i.isdigit() and int(i) in range(1, len(md_results) + 1)):
break
if i == 'q':
logger.warning("User quit without saving metadata")
raise SystemExit(4)
return md_results[int(i)-1][1]
def prepare_metadata(md: GenericMetadata, new_md: GenericMetadata, clear_tags: bool, auto_imprint: bool, remove_html_tables: bool) -> GenericMetadata:
final_md = md.copy()
if clear_tags:
final_md = GenericMetadata()
final_md.overlay(new_md, merge.Mode.OVERLAY, True)
issue_id = ""
if final_md.issue_id:
issue_id = f" [Issue ID {final_md.issue_id}]"
origin = ""
if final_md.data_origin is not None:
origin = f" using info from {final_md.data_origin.name}"
notes = f"Tagged with quick_tag {__version__}{origin} on {datetime.now():%Y-%m-%d %H:%M:%S}.{issue_id}"
if auto_imprint:
final_md.fix_publisher()
return final_md.replace(
is_empty=False,
notes=utils.combine_notes(final_md.notes, notes, "Tagged with quick_tag"),
description=cleanup_html(final_md.description, remove_html_tables) or None,
)
def main():
manager = settngs.Manager('Simple comictagging script using ImageHash: https://pypi.org/project/ImageHash/')
manager.add_group("runtime", settings)
opts,_ = manager.parse_cmdline()
url: utils.Url = opts['runtime']['url']
print(url)
max_hamming_distance: int = opts['runtime']['max']
simple: bool = opts['runtime']['simple']
if not simple:
logger.error("Full results not implemented yet")
raise SystemExit(1)
ca = comicarchive.ComicArchive(opts['runtime']['comic_archive'])
if not ca.seems_to_be_a_comic_archive():
logger.error("Could not open %s as an archive", ca.path)
raise SystemExit(1)
try:
tags = ca.read_tags('cr')
cover_index = tags.get_cover_page_index_list()[0]
cover_image = Image.open(BytesIO(ca.get_page(cover_index)))
except Exception:
logger.exception("Unable to read cover image from archive")
raise SystemExit(2)
print('Tagging: ', ca.path)
ahash = imagehash.average_hash(cover_image)
dhash = imagehash.dhash(cover_image)
phash = imagehash.phash(cover_image)
results = SearchHashes(url.url, simple,max_hamming_distance,str(ahash),str(dhash),str(phash))
print(results)
if simple:
metadata_results = get_simple_results(results, opts['runtime']['cv_api_key'])
chosen_result = display_simple_results(metadata_results, opts['runtime']['force_interactive'])
else:
metadata_results = get_full_results(results)
chosen_result = display_full_results(metadata_results)
if ca.write_tags(prepare_metadata(GenericMetadata(), chosen_result, clear_tags=False, auto_imprint=True, remove_html_tables=True), 'cr'):