From 1955444dcf825570ffa1a964c4eab52ed096ba53 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Sun, 1 Sep 2024 18:05:58 -0700 Subject: [PATCH] Add sqlite implementation --- cmd/comic-hasher/main.go | 2 + go.mod | 15 ++ go.sum | 51 +++++ sqlite.go | 439 +++++++++++++++++++++++++++++++++++++++ sqlite_cgo.go | 7 + 5 files changed, 514 insertions(+) create mode 100644 sqlite.go create mode 100644 sqlite_cgo.go diff --git a/cmd/comic-hasher/main.go b/cmd/comic-hasher/main.go index 2a3ffe7..a5a4c2c 100644 --- a/cmd/comic-hasher/main.go +++ b/cmd/comic-hasher/main.go @@ -93,6 +93,7 @@ func (f *Format) Set(s string) error { type Opts struct { cpuprofile string coverPath string + sqlitePath string loadEmbeddedHashes bool saveEmbeddedHashes bool format Format @@ -107,6 +108,7 @@ func main() { flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file") flag.StringVar(&opts.coverPath, "cover-path", "", "Path to covers to add to hash database. must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif") + flag.StringVar(&opts.sqlitePath, "sqlite-path", "tmp.sqlite", "Path to sqlite database to use for matching hashes, substantialy reduces memory usage") flag.BoolVar(&opts.loadEmbeddedHashes, "use-embedded-hashes", true, "Use hashes embedded in the application as a starting point") flag.BoolVar(&opts.saveEmbeddedHashes, "save-embedded-hashes", false, "Save hashes even if we loaded the embedded hashes") flag.StringVar(&opts.hashesPath, "hashes", "hashes.gz", "Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option") diff --git a/go.mod b/go.mod index 145d914..3f7a260 100644 --- a/go.mod +++ b/go.mod @@ -8,9 +8,11 @@ require ( gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00 github.com/fmartingr/go-comicinfo/v2 v2.0.2 github.com/kr/pretty v0.1.0 + github.com/mattn/go-sqlite3 v1.14.22 github.com/mholt/archiver/v4 v4.0.0-alpha.8 golang.org/x/image v0.19.0 golang.org/x/text v0.17.0 + modernc.org/sqlite v1.32.0 ) require ( @@ -26,19 +28,32 @@ require ( github.com/connesc/cipherio v0.2.1 // indirect github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09 // indirect github.com/dsnet/compress v0.0.1 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/golang/mock v1.6.0 // indirect github.com/golang/snappy v0.0.4 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/klauspost/compress v1.15.9 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/kr/text v0.1.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect github.com/nwaples/rardecode/v2 v2.0.0-beta.2 // indirect github.com/pierrec/lz4/v4 v4.1.15 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/therootcompany/xz v1.0.1 // indirect github.com/ulikunitz/xz v0.5.10 // indirect go4.org v0.0.0-20200411211856-f5505b9728dd // indirect golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect + golang.org/x/sys v0.22.0 // indirect + modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect + modernc.org/libc v1.55.3 // indirect + modernc.org/mathutil v1.6.0 // indirect + modernc.org/memory v1.8.0 // indirect + modernc.org/strutil v1.2.0 // indirect + modernc.org/token v1.1.0 // indirect ) replace golang.org/x/text v0.17.0 => github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f diff --git a/go.sum b/go.sum index 4535962..ae9d7be 100644 --- a/go.sum +++ b/go.sum @@ -42,6 +42,8 @@ github.com/disintegration/imaging v1.6.3-0.20201218193011-d40f48ce0f09/go.mod h1 github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fmartingr/go-comicinfo/v2 v2.0.2 h1:VppvrHr8C4+iktBTOd7vzTMNbVecZ7F/Ji1kPTOIGg4= @@ -75,7 +77,11 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo= +github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= @@ -84,6 +90,8 @@ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+l github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= @@ -101,8 +109,14 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f h1:RMKTfrT4gjJfmB/aWuvCcFxUSvWAJfOAc5khGL6ASjk= github.com/lordwelch/text v0.0.0-20240505231825-4893f344170f/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/mholt/archiver/v4 v4.0.0-alpha.8 h1:tRGQuDVPh66WCOelqe6LIGh0gwmfwxUrSSDunscGsRM= github.com/mholt/archiver/v4 v4.0.0-alpha.8/go.mod h1:5f7FUYGXdJWUjESffJaYR4R60VhnHxb2X3T1teMyv5A= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= @@ -110,6 +124,8 @@ github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFu github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -168,6 +184,8 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -195,6 +213,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -211,6 +231,9 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -244,6 +267,8 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -293,6 +318,32 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ= +modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ= +modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y= +modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s= +modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE= +modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ= +modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw= +modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI= +modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4= +modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U= +modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w= +modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4= +modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo= +modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E= +modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU= +modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc= +modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss= +modernc.org/sqlite v1.32.0 h1:6BM4uGza7bWypsw4fdLRsLxut6bHe4c58VeqjRgST8s= +modernc.org/sqlite v1.32.0/go.mod h1:UqoylwmTb9F+IqXERT8bW9zzOWN8qwAIcLdzeBZs4hA= +modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA= +modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/sqlite.go b/sqlite.go new file mode 100644 index 0000000..44c7cb0 --- /dev/null +++ b/sqlite.go @@ -0,0 +1,439 @@ +package ch + +import ( + "context" + "database/sql" + "errors" + "fmt" + "log" + "math/bits" + "strings" + + "gitea.narnian.us/lordwelch/goimagehash" + _ "modernc.org/sqlite" +) + +type sqliteStorage struct { + db *sql.DB +} +type sqliteHash struct { + hashid int + Result +} + +func (s *sqliteStorage) findExactHashes(statement *sql.Stmt, items ...interface{}) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate + hashes := []sqliteHash{} + rows, err := statement.Query(items...) + if err != nil { + return hashes, err + } + + for rows.Next() { + var ( + r = sqliteHash{Result: Result{IDs: make(IDList)}} + h int64 + ) + err = rows.Scan(&r.hashid, &h, &r.Hash.Kind) + if err != nil { + rows.Close() + return hashes, err + } + r.Hash.Hash = uint64(h) + hashes = append(hashes, r) + } + rows.Close() + statement, err = s.db.PrepareContext(context.Background(), `SELECT IDS.domain, IDs.id FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid=?) ORDER BY IDs.domain, IDs.ID;`) + if err != nil { + return hashes, err + } + for _, hash := range hashes { + rows, err := statement.Query(hash.hashid) + if err != nil { + return hashes, err + } + for rows.Next() { + var source Source + var id string + err := rows.Scan(&source, &id) + if err != nil { + return hashes, err + } + hash.IDs[source] = append(hash.IDs[source], id) + } + rows.Close() + } + return hashes, nil +} + +func (s *sqliteStorage) findPartialHashes(max int, search_hash int64, kind goimagehash.Kind) ([]sqliteHash, error) { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate + hashes := []sqliteHash{} + statement, err := s.db.PrepareContext(context.Background(), `SELECT rowid,hash,kind FROM Hashes WHERE (kind=?) AND (((hash >> (0 * 8) & 0xFF)=(? >> (0 * 8) & 0xFF)) OR ((hash >> (1 * 8) & 0xFF)=(? >> (1 * 8) & 0xFF)) OR ((hash >> (2 * 8) & 0xFF)=(? >> (2 * 8) & 0xFF)) OR ((hash >> (3 * 8) & 0xFF)=(? >> (3 * 8) & 0xFF)) OR ((hash >> (4 * 8) & 0xFF)=(? >> (4 * 8) & 0xFF)) OR ((hash >> (5 * 8) & 0xFF)=(? >> (5 * 8) & 0xFF)) OR ((hash >> (6 * 8) & 0xFF)=(? >> (6 * 8) & 0xFF)) OR ((hash >> (7 * 8) & 0xFF)=(? >> (7 * 8) & 0xFF))) ORDER BY kind,hash;`) + if err != nil { + return hashes, err + } + rows, err := statement.Query(kind, int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash), int64(search_hash)) + if err != nil { + return hashes, err + } + + for rows.Next() { + var ( + r = sqliteHash{Result: Result{IDs: make(IDList)}} + h int64 + ) + err = rows.Scan(&r.hashid, &h, &r.Hash.Kind) + if err != nil { + rows.Close() + return hashes, err + } + r.Hash.Hash = uint64(h) + r.Distance = bits.OnesCount64(uint64(search_hash) ^ r.Hash.Hash) + if r.Distance <= max { + hashes = append(hashes, r) + } + } + rows.Close() + + statement, err = s.db.PrepareContext(context.Background(), `SELECT DISTINCT IDS.domain, IDs.id, id_hash.hashid FROM IDs JOIN id_hash ON IDs.rowid = id_hash.idid WHERE (id_hash.hashid in (`+strings.TrimRight(strings.Repeat("?,", len(hashes)), ",")+`)) ORDER BY IDs.domain, IDs.ID;`) + if err != nil { + return hashes, err + } + + var ids []any + for _, hash := range hashes { + ids = append(ids, hash.hashid) + } + rows, err = statement.Query(ids...) + if err != nil { + return hashes, err + } + for rows.Next() { + var source Source + var id string + var hashid int + err := rows.Scan(&source, &id, &hashid) + if err != nil { + return hashes, err + } + for _, hash := range hashes { + if hash.hashid == hashid { + hash.IDs[source] = append(hash.IDs[source], id) + } + } + } + rows.Close() + return hashes, nil +} + +func (s *sqliteStorage) dropIndexes() error { + _, err := s.db.Exec(` + +DROP INDEX IF EXISTS hash_index; +DROP INDEX IF EXISTS hash_1_index; +DROP INDEX IF EXISTS hash_2_index; +DROP INDEX IF EXISTS hash_3_index; +DROP INDEX IF EXISTS hash_4_index; +DROP INDEX IF EXISTS hash_5_index; +DROP INDEX IF EXISTS hash_6_index; +DROP INDEX IF EXISTS hash_7_index; +DROP INDEX IF EXISTS hash_8_index; + +DROP INDEX IF EXISTS id_domain; +`) + if err != nil { + return err + } + return nil +} + +func (s *sqliteStorage) createIndexes() error { + _, err := s.db.Exec(` + +CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash); +CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF)); + +CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id); +PRAGMA shrink_memory; +`) + if err != nil { + return err + } + return nil +} + +func (s *sqliteStorage) GetMatches(hashes []Hash, max int, exactOnly bool) ([]Result, error) { + var foundMatches []Result + + if exactOnly { // exact matches are also found by partial matches. Don't bother with exact matches so we don't have to de-duplicate + + statement, err := s.db.Prepare(`SELECT rowid,hash,kind FROM Hashes WHERE ` + strings.TrimSuffix(strings.Repeat("(hash=? AND kind=?) OR", len(hashes)), "OR") + `ORDER BY kind,hash;`) + if err != nil { + return foundMatches, err + } + args := make([]interface{}, 0, len(hashes)*2) + for _, hash := range hashes { + if hash.Hash != 0 { + args = append(args, int64(hash.Hash), hash.Kind) + } + } + hashes, err := s.findExactHashes(statement, args...) + if err != nil { + return foundMatches, err + } + for _, hash := range hashes { + foundMatches = append(foundMatches, hash.Result) + } + + // If we have exact matches don't bother with other matches + if len(foundMatches) > 0 && exactOnly { + return foundMatches, nil + } + } + + foundHashes := make(map[uint64]struct{}) + + for _, hash := range hashes { + hashes, err := s.findPartialHashes(max, int64(hash.Hash), hash.Kind) + if err != nil { + return foundMatches, err + } + + for _, hash := range hashes { + if _, alreadyMatched := foundHashes[hash.Hash.Hash]; !alreadyMatched { + foundHashes[hash.Hash.Hash] = struct{}{} + foundMatches = append(foundMatches, hash.Result) + } else { + log.Println("Hash already found", hash) + } + } + } + + return foundMatches, nil +} + +func (s *sqliteStorage) MapHashes(hash ImageHash) { + insertHashes, err := s.db.Prepare(` +INSERT INTO Hashes (hash,kind) VALUES (?,?) ON CONFLICT DO NOTHING; +`) + if err != nil { + panic(err) + } + IDInsertResult, err := s.db.Exec(` +INSERT INTO IDs (domain,id) VALUES (?,?) ON CONFLICT DO NOTHING; +`, hash.ID.Domain, hash.ID.Domain) + if err != nil { + panic(err) + } + id_id, err := IDInsertResult.LastInsertId() + if err != nil { + panic(err) + } + hash_ids := []int64{} + for _, hash := range hash.Hashes { + hashInsertResult, err := insertHashes.Exec(int64(hash.Hash), hash.Kind) + if err != nil { + panic(err) + } + id, err := hashInsertResult.LastInsertId() + if err != nil { + panic(err) + } + hash_ids = append(hash_ids, id) + } + for _, hash_id := range hash_ids { + _, err = s.db.Exec(`INSERT INTO id_hash VALUES (?, ?) ON CONFLICT DO NOTHING;`, hash_id, id_id) + if err != nil { + panic(fmt.Errorf("Failed inserting: %v,%v: %w", hash.ID.Domain, hash.ID.ID, err)) + } + } +} + +func (s *sqliteStorage) DecodeHashes(hashes SavedHashes) error { + err := s.dropIndexes() + if err != nil { + return err + } + + for domain, sourceHashes := range hashes { + for id, h := range sourceHashes { + s.MapHashes(ImageHash{[]Hash{{h[0], goimagehash.AHash}, {h[1], goimagehash.DHash}, {h[2], goimagehash.PHash}}, ID{domain, id}}) + } + } + err = s.createIndexes() + if err != nil { + return err + } + return nil +} + +func (s *sqliteStorage) EncodeHashes() (SavedHashes, error) { + hashes := make(SavedHashes) + conn, err := s.db.Conn(context.Background()) + if err != nil { + return hashes, err + } + defer conn.Close() + rows, err := conn.QueryContext(context.Background(), "SELECT DISTINCT (domain) FROM IDs ORDER BY domain;") + if err != nil { + return hashes, err + } + sources := make([]string, 0, 10) + for rows.Next() { + var source string + if err = rows.Scan(&source); err != nil { + rows.Close() + return hashes, err + } + sources = append(sources, source) + } + for _, source := range sources { + rows, err = conn.QueryContext(context.Background(), "SELECT IDs.id,Hashes.hash,Hashes.kind FROM Hashes JOIN id_hash ON id_hash.hashid = hashes.rowid JOIN IDs ON IDs.rowid = id_hash.idid WHERE IDs.domain = ? ORDER BY IDs.ID,Hashes.kind,Hashes.hash;", source) + if err != nil { + rows.Close() + return hashes, err + } + var ( + id string + hash int64 + typ goimagehash.Kind + ) + err = rows.Scan(&id, &hash, &typ) + if err != nil { + return hashes, err + } + _, ok := hashes[Source(source)] + if !ok { + hashes[Source(source)] = make(map[string][3]uint64) + } + h := hashes[Source(source)][id] + h[typ-1] = uint64(hash) + hashes[Source(source)][id] = h + } + return hashes, nil +} + +func (s *sqliteStorage) AssociateIDs(newIDs []NewIDs) { + for _, ids := range newIDs { + var oldIDID, newIDID int + _, err := s.db.Exec(`INSERT INTO IDs domain,id VALUES (?,?)`, ids.NewID.Domain, ids.NewID.ID) + if err != nil { + panic(err) + } + rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.NewID.Domain, ids.NewID.ID) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + panic(err) + } + if rows.Next() { + rows.Scan(&newIDID) + } else { + panic("Unable to insert New ID into database") + } + rows.Close() + rows, err = s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, ids.OldID.Domain, ids.OldID.ID) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + panic(err) + } + if rows.Next() { + rows.Scan(&oldIDID) + } else { + continue + } + _, err = s.db.Exec(`INSERT INTO id_hash (hashid, id_id) SELECT hashid,? FROM id_hash where id_id=?`, newIDID, oldIDID) + if err != nil { + panic(err) + } + } +} + +func (s *sqliteStorage) GetIDs(id ID) IDList { + var idid int + rows, err := s.db.Query(`SELECT idid FROM IDs WHERE domain=? AND id=?`, id.Domain, id.ID) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + panic(err) + } + if rows.Next() { + rows.Scan(&idid) + } else { + return nil + } + rows, err = s.db.Query(`SELECT id_hash FROM id_hash WHERE id_id=?`, idid) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + panic(err) + } + var hashIDs []interface{} + for rows.Next() { + var hashID int + rows.Scan(&hashID) + hashIDs = append(hashIDs, hashID) + } + rows.Close() + + IDs := make(IDList) + rows, err = s.db.Query(`SELECT IDs.domain,IDs.id FROM id_hash JOIN IDs ON id_hash.idid==IDs.idid WHERE hash_id in (`+strings.TrimRight(strings.Repeat("?,", len(hashIDs)), ",")+`)`, hashIDs...) + if err != nil && !errors.Is(err, sql.ErrNoRows) { + panic(err) + } + for rows.Next() { + var id ID + rows.Scan(&id.Domain, id.ID) + IDs[id.Domain] = append(IDs[id.Domain], id.ID) + } + return IDs +} + +func NewSqliteStorage(db, path string) (HashStorage, error) { + sqlite := &sqliteStorage{} + sqlDB, err := sql.Open(db, fmt.Sprintf("file://%s?_pragma=cache_size(-200000)&_pragma=busy_timeout(500)&_pragma=hard_heap_limit(1073741824)&_pragma=journal_mode(wal)&_pragma=soft_heap_limit(314572800)", path)) + if err != nil { + panic(err) + } + sqlite.db = sqlDB + _, err = sqlite.db.Exec(` +PRAGMA foreign_keys=ON; +CREATE TABLE IF NOT EXISTS Hashes( + hashid INTEGER PRIMARY KEY, + hash INT NOT NULL, + kind int NOT NULL, + UNIQUE(kind, hash) +); + +CREATE INDEX IF NOT EXISTS hash_index ON Hashes (kind, hash); +CREATE INDEX IF NOT EXISTS hash_1_index ON Hashes ((hash >> (0 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_2_index ON Hashes ((hash >> (1 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_3_index ON Hashes ((hash >> (2 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_4_index ON Hashes ((hash >> (3 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_5_index ON Hashes ((hash >> (4 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_6_index ON Hashes ((hash >> (5 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_7_index ON Hashes ((hash >> (6 * 8) & 0xFF)); +CREATE INDEX IF NOT EXISTS hash_8_index ON Hashes ((hash >> (7 * 8) & 0xFF)); + +CREATE TABLE IF NOT EXISTS IDs( + id TEXT NOT NULL, + domain TEXT NOT NULL, + idid INTEGER PRIMARY KEY, + UNIQUE (domain, id) +); +CREATE INDEX IF NOT EXISTS id_domain ON IDs (domain, id); + +CREATE TABLE IF NOT EXISTS id_hash( + hashid INTEGER, + idid INTEGER, + FOREIGN KEY(hashid) REFERENCES Hashes(hashid), + FOREIGN KEY(idid) REFERENCES IDs(idid) + UNIQUE (hashid, idid) +); + +`) + if err != nil { + panic(err) + } + sqlite.db.SetMaxOpenConns(1) + return sqlite, nil +} diff --git a/sqlite_cgo.go b/sqlite_cgo.go new file mode 100644 index 0000000..36f4489 --- /dev/null +++ b/sqlite_cgo.go @@ -0,0 +1,7 @@ +//go:build cgo + +package ch + +import ( + _ "github.com/mattn/go-sqlite3" +)