comic-hasher/main.go

148 lines
2.6 KiB
Go
Raw Normal View History

2024-08-04 18:12:00 -07:00
package ch
2024-05-01 18:09:02 -07:00
import (
2024-08-04 18:12:00 -07:00
"cmp"
2024-05-01 18:09:02 -07:00
"fmt"
"image"
"log"
2024-08-04 18:12:00 -07:00
"math/bits"
2024-07-31 11:35:17 -07:00
"runtime"
2024-08-04 18:12:00 -07:00
"slices"
2024-05-01 18:09:02 -07:00
2024-07-31 11:35:17 -07:00
"gitea.narnian.us/lordwelch/goimagehash"
2024-05-01 18:09:02 -07:00
)
const (
2024-08-04 18:12:00 -07:00
H_0 uint64 = 0b11111111 << (8 * iota)
H_1
H_2
H_3
H_4
H_5
H_6
H_7
2024-05-01 18:09:02 -07:00
)
const (
2024-08-04 18:12:00 -07:00
Shift_0 = (8 * iota)
Shift_1
Shift_2
Shift_3
Shift_4
Shift_5
Shift_6
Shift_7
2024-05-01 18:09:02 -07:00
)
2024-08-04 18:12:00 -07:00
type Source string
2024-05-01 18:09:02 -07:00
2024-08-04 18:12:00 -07:00
type Match struct {
Distance int
Hash uint64
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
type Result struct {
IDs IDList
Distance int
Hash ImageHash
2024-07-31 11:35:17 -07:00
}
2024-08-04 18:12:00 -07:00
type Im struct {
Im image.Image
Format string
Domain Source
ID, Path string
2024-07-31 11:35:17 -07:00
}
2024-08-04 18:12:00 -07:00
type Hash struct {
Ahash *goimagehash.ImageHash
Dhash *goimagehash.ImageHash
Phash *goimagehash.ImageHash
Domain Source
ID string
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
type ImageHash struct {
Hash uint64
Kind goimagehash.Kind
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
func Atleast(maxDistance int, search_hash uint64, hashes []uint64) []Match {
matching_hashes := make([]Match, 0, len(hashes)/2) // hope that we don't need all of them
for _, stored_hash := range hashes {
distance := bits.OnesCount64(search_hash ^ stored_hash)
if distance <= maxDistance {
matching_hashes = append(matching_hashes, Match{distance, stored_hash})
2024-05-01 18:09:02 -07:00
}
}
2024-08-04 18:12:00 -07:00
return matching_hashes
2024-07-31 11:35:17 -07:00
}
2024-08-04 18:12:00 -07:00
func Insert[S ~[]E, E cmp.Ordered](slice S, item E) S {
index, item_found := slices.BinarySearch(slice, item)
if item_found {
return slice
2024-07-31 11:35:17 -07:00
}
2024-08-04 18:12:00 -07:00
return slices.Insert(slice, index, item)
2024-07-31 11:35:17 -07:00
}
func MemStats() uint64 {
var m runtime.MemStats
runtime.ReadMemStats(&m)
return m.Alloc
}
2024-08-04 18:12:00 -07:00
func HashImage(i Im) Hash {
if i.Format == "webp" {
i.Im = goimagehash.FancyUpscale(i.Im.(*image.YCbCr))
2024-07-31 11:35:17 -07:00
}
2024-05-01 18:09:02 -07:00
var (
2024-07-31 11:35:17 -07:00
err error = nil
2024-05-01 18:09:02 -07:00
ahash *goimagehash.ImageHash
dhash *goimagehash.ImageHash
phash *goimagehash.ImageHash
)
2024-08-04 18:12:00 -07:00
ahash, err = goimagehash.AverageHash(i.Im)
2024-05-01 18:09:02 -07:00
if err != nil {
msg := fmt.Sprintf("Failed to ahash Image: %s", err)
log.Println(msg)
2024-08-04 18:12:00 -07:00
return Hash{}
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
dhash, err = goimagehash.DifferenceHash(i.Im)
2024-05-01 18:09:02 -07:00
if err != nil {
msg := fmt.Sprintf("Failed to dhash Image: %s", err)
log.Println(msg)
2024-08-04 18:12:00 -07:00
return Hash{}
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
phash, err = goimagehash.PerceptionHash(i.Im)
2024-05-01 18:09:02 -07:00
if err != nil {
msg := fmt.Sprintf("Failed to phash Image: %s", err)
log.Println(msg)
2024-08-04 18:12:00 -07:00
return Hash{}
2024-05-01 18:09:02 -07:00
}
2024-08-04 18:12:00 -07:00
return Hash{
Ahash: ahash,
Dhash: dhash,
Phash: phash,
Domain: i.Domain,
ID: i.ID,
2024-05-01 18:09:02 -07:00
}
}
func SplitHash(hash uint64) [8]uint8 {
return [8]uint8{
2024-08-04 18:12:00 -07:00
uint8((hash & H_7) >> Shift_7),
uint8((hash & H_6) >> Shift_6),
uint8((hash & H_5) >> Shift_5),
uint8((hash & H_4) >> Shift_4),
uint8((hash & H_3) >> Shift_3),
uint8((hash & H_2) >> Shift_2),
uint8((hash & H_1) >> Shift_1),
uint8((hash & H_0) >> Shift_0),
2024-05-01 18:09:02 -07:00
}
}
2024-08-04 18:12:00 -07:00
type IDList map[Source][]string // IDs is a map of domain to ID eg IDs['comicvine.gamespot.com'] = []string{"1235"}