248 lines
5.8 KiB
Go
248 lines
5.8 KiB
Go
package ch
|
|
|
|
import (
|
|
"cmp"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"slices"
|
|
"strings"
|
|
|
|
"gitea.narnian.us/lordwelch/goimagehash"
|
|
"github.com/vmihailenco/msgpack"
|
|
)
|
|
|
|
type Format int
|
|
|
|
const (
|
|
Msgpack Format = iota + 1
|
|
JSON
|
|
|
|
CurrentSavedHashesVersion int = 2
|
|
)
|
|
|
|
var versionMap map[int]versionDecoder
|
|
|
|
var formatNames = map[Format]string{
|
|
JSON: "json",
|
|
Msgpack: "msgpack",
|
|
}
|
|
|
|
var formatValues = map[string]Format{
|
|
"json": JSON,
|
|
"msgpack": Msgpack,
|
|
}
|
|
|
|
type OldSavedHashes map[Source]map[string][3]uint64
|
|
type SavedHashesv1 struct {
|
|
IDs [][]ID
|
|
Hashes [3]map[uint64]int
|
|
}
|
|
|
|
// SavedHashes The IDs and Hashes fields have no direct correlation
|
|
// It is perfectly valid to have an empty IDs or an empty Hashes field
|
|
// If two covers have identical hashes then they should be two entries in Hashes not a set in IDs with two IDs from the same source
|
|
type SavedHashes struct {
|
|
Version int
|
|
IDs [][]ID // List of sets of IDs that are the same across Sources, should generally only have one Source per set
|
|
Hashes []SavedHash // List of all known hashes, hashes will be duplicated for each source
|
|
}
|
|
|
|
type SavedHash struct {
|
|
Hash Hash
|
|
ID ID
|
|
}
|
|
type Encoder func(any) ([]byte, error)
|
|
type Decoder func([]byte, interface{}) error
|
|
type versionDecoder func(Decoder, []byte) (*SavedHashes, error)
|
|
|
|
var NoHashes = errors.New("no hashes")
|
|
var DecodeError = errors.New("decoder failure")
|
|
|
|
func (f Format) String() string {
|
|
if name, known := formatNames[f]; known {
|
|
return name
|
|
}
|
|
return "Unknown"
|
|
}
|
|
|
|
func (f *Format) Set(s string) error {
|
|
if format, known := formatValues[strings.ToLower(s)]; known {
|
|
*f = format
|
|
} else {
|
|
return fmt.Errorf("Unknown format: %d", f)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *SavedHashes) InsertHash(hash SavedHash) {
|
|
index, itemFound := slices.BinarySearchFunc(s.Hashes, hash, func(existing SavedHash, target SavedHash) int {
|
|
return cmp.Or(
|
|
cmp.Compare(existing.Hash.Hash, target.Hash.Hash),
|
|
cmp.Compare(existing.Hash.Kind, target.Hash.Kind),
|
|
cmp.Compare(existing.ID.Domain, target.ID.Domain),
|
|
cmp.Compare(existing.ID.ID, target.ID.ID),
|
|
)
|
|
})
|
|
if !itemFound {
|
|
s.Hashes = slices.Insert(s.Hashes, index, hash)
|
|
}
|
|
}
|
|
|
|
func ConvertHashesV0(oldHashes OldSavedHashes) *SavedHashes {
|
|
t := SavedHashes{}
|
|
idcount := 0
|
|
for _, ids := range oldHashes {
|
|
idcount += len(ids)
|
|
}
|
|
t.IDs = make([][]ID, 0, idcount)
|
|
t.Hashes = make([]SavedHash, 0, idcount)
|
|
for domain, sourceHashes := range oldHashes {
|
|
for id, hashes := range sourceHashes {
|
|
t.IDs = append(t.IDs, []ID{{domain, id}})
|
|
for hashType, hash := range hashes {
|
|
t.Hashes = append(t.Hashes, SavedHash{
|
|
Hash: Hash{
|
|
Kind: goimagehash.Kind(hashType + 1),
|
|
Hash: hash,
|
|
},
|
|
ID: ID{domain, id},
|
|
})
|
|
}
|
|
}
|
|
}
|
|
fmt.Println("length of hashes", len(t.Hashes))
|
|
fmt.Println("Length of ID lists", len(t.IDs))
|
|
return &t
|
|
}
|
|
|
|
func ConvertHashesV1(oldHashes SavedHashesv1) *SavedHashes {
|
|
t := SavedHashes{}
|
|
hashCount := 0
|
|
for _, hashes := range oldHashes.Hashes {
|
|
hashCount += len(hashes)
|
|
}
|
|
t.IDs = oldHashes.IDs
|
|
t.Hashes = make([]SavedHash, 0, hashCount)
|
|
for hashType, sourceHashes := range oldHashes.Hashes {
|
|
for hash, index := range sourceHashes {
|
|
for _, id := range oldHashes.IDs[index] {
|
|
t.Hashes = append(t.Hashes, SavedHash{
|
|
ID: id,
|
|
Hash: Hash{
|
|
Kind: goimagehash.Kind(hashType + 1),
|
|
Hash: hash,
|
|
},
|
|
})
|
|
}
|
|
}
|
|
}
|
|
fmt.Println("length of hashes", len(t.Hashes))
|
|
fmt.Println("Length of ID lists", len(t.IDs))
|
|
return &t
|
|
}
|
|
|
|
func DecodeHashesV0(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|
loadedHashes := OldSavedHashes{}
|
|
err := decode(hashes, &loadedHashes)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
|
}
|
|
if len(loadedHashes) == 0 {
|
|
return nil, NoHashes
|
|
}
|
|
return ConvertHashesV0(loadedHashes), nil
|
|
}
|
|
|
|
func DecodeHashesV1(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|
loadedHashes := SavedHashesv1{}
|
|
err := decode(hashes, &loadedHashes)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
|
}
|
|
hashesCount := 0
|
|
for _, hashes := range loadedHashes.Hashes {
|
|
hashesCount += len(hashes)
|
|
}
|
|
if hashesCount < 1 {
|
|
return nil, NoHashes
|
|
}
|
|
return ConvertHashesV1(loadedHashes), nil
|
|
}
|
|
|
|
func DecodeHashesV2(decode Decoder, hashes []byte) (*SavedHashes, error) {
|
|
loadedHashes := SavedHashes{}
|
|
err := decode(hashes, &loadedHashes)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%w: %w", DecodeError, err)
|
|
}
|
|
if len(loadedHashes.Hashes) < 1 && len(loadedHashes.IDs) < 1 {
|
|
return nil, NoHashes
|
|
}
|
|
|
|
return &loadedHashes, nil
|
|
}
|
|
|
|
func getSavedHashesVersion(decode Decoder, hashes []byte) (int, error) {
|
|
type version struct {
|
|
Version int
|
|
}
|
|
var savedVersion version
|
|
err := decode(hashes, &savedVersion)
|
|
if err != nil {
|
|
return -1, fmt.Errorf("%w: %w", DecodeError, err)
|
|
}
|
|
if savedVersion.Version > 1 {
|
|
return savedVersion.Version, nil
|
|
}
|
|
return -1, nil
|
|
}
|
|
func DecodeHashes(format Format, hashes []byte) (*SavedHashes, error) {
|
|
var decode Decoder
|
|
switch format {
|
|
case Msgpack:
|
|
decode = msgpack.Unmarshal
|
|
case JSON:
|
|
decode = json.Unmarshal
|
|
|
|
default:
|
|
return nil, fmt.Errorf("Unknown format: %v", format)
|
|
}
|
|
version, err := getSavedHashesVersion(decode, hashes)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if decodeVersion, knownVersion := versionMap[version]; knownVersion {
|
|
return decodeVersion(decode, hashes)
|
|
}
|
|
|
|
for _, decodeVersion := range []versionDecoder{
|
|
DecodeHashesV0,
|
|
DecodeHashesV1,
|
|
DecodeHashesV2,
|
|
} {
|
|
loadedHashes, err := decodeVersion(decode, hashes)
|
|
if err == nil {
|
|
return loadedHashes, nil
|
|
}
|
|
}
|
|
|
|
return nil, NoHashes
|
|
}
|
|
|
|
func EncodeHashes(hashes SavedHashes, format Format) ([]byte, error) {
|
|
var encoder Encoder
|
|
switch format {
|
|
case Msgpack:
|
|
encoder = msgpack.Marshal
|
|
case JSON:
|
|
encoder = json.Marshal
|
|
default:
|
|
return nil, fmt.Errorf("Unknown format: %v", format)
|
|
}
|
|
|
|
hashes.Version = CurrentSavedHashesVersion
|
|
return encoder(hashes)
|
|
}
|