Compare commits
No commits in common. "5cf25089daef97e425a6cbc42bc48fd91e4cba17" and "cc4e973bf914da719c3bf5c856d7b511a3f6b40e" have entirely different histories.
5cf25089da
...
cc4e973bf9
@ -1,6 +1,6 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v5.0.0
|
rev: v4.6.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
args: [--markdown-linebreak-ext=.gitignore]
|
args: [--markdown-linebreak-ext=.gitignore]
|
||||||
@ -17,12 +17,12 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: golangci-lint
|
- id: golangci-lint
|
||||||
- repo: https://github.com/asottile/setup-cfg-fmt
|
- repo: https://github.com/asottile/setup-cfg-fmt
|
||||||
rev: v2.7.0
|
rev: v2.5.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: setup-cfg-fmt
|
- id: setup-cfg-fmt
|
||||||
|
|
||||||
- repo: https://github.com/asottile/reorder-python-imports
|
- repo: https://github.com/asottile/reorder-python-imports
|
||||||
rev: v3.14.0
|
rev: v3.13.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: reorder-python-imports
|
- id: reorder-python-imports
|
||||||
args: [--py38-plus, --add-import, 'from __future__ import annotations']
|
args: [--py38-plus, --add-import, 'from __future__ import annotations']
|
||||||
@ -31,7 +31,7 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: add-trailing-comma
|
- id: add-trailing-comma
|
||||||
- repo: https://github.com/asottile/pyupgrade
|
- repo: https://github.com/asottile/pyupgrade
|
||||||
rev: v3.19.1
|
rev: v3.17.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: pyupgrade
|
- id: pyupgrade
|
||||||
args: [--py38-plus]
|
args: [--py38-plus]
|
||||||
|
9
CHDB.go
9
CHDB.go
@ -71,15 +71,14 @@ func (s CHDB) PathDownloaded(path string) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s CHDB) AddPath(path string) {
|
func (s CHDB) AddPath(path string) {
|
||||||
relPath, _ := filepath.Rel(s.comicvinePath, path)
|
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||||
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath)
|
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err))
|
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", path, err))
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.deleteExisting {
|
if s.deleteExisting {
|
||||||
_ = os.Remove(path)
|
os.Remove(path)
|
||||||
_ = RmdirP(filepath.Dir(path))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +154,6 @@ type Opts struct {
|
|||||||
path string
|
path string
|
||||||
thumbOnly bool
|
thumbOnly bool
|
||||||
hashDownloaded bool
|
hashDownloaded bool
|
||||||
keepDownloaded bool
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,7 +187,6 @@ func main() {
|
|||||||
flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
|
flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
|
||||||
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
|
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
|
||||||
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
|
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
|
||||||
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
if opts.coverPath != "" {
|
if opts.coverPath != "" {
|
||||||
@ -785,18 +783,10 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
|||||||
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
|
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
var (
|
file, err := os.OpenFile(path.Dest, os.O_RDWR, 0666)
|
||||||
file io.ReadCloser
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
if path.Image == nil {
|
|
||||||
file, err = os.OpenFile(path.Dest, os.O_RDWR, 0666)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
file = io.NopCloser(bytes.NewBuffer(path.Image))
|
|
||||||
}
|
|
||||||
i, format, err := image.Decode(bufio.NewReader(file))
|
i, format, err := image.Decode(bufio.NewReader(file))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
file.Close()
|
file.Close()
|
||||||
@ -817,6 +807,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
|
|||||||
log.Println("Recieved quit")
|
log.Println("Recieved quit")
|
||||||
return
|
return
|
||||||
case server.hashingQueue <- im:
|
case server.hashingQueue <- im:
|
||||||
|
// log.Println("Sending:", im)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -893,7 +884,7 @@ func startServer(opts Opts) {
|
|||||||
|
|
||||||
log.Println("Init downloaders")
|
log.Println("Init downloaders")
|
||||||
dwg := sync.WaitGroup{}
|
dwg := sync.WaitGroup{}
|
||||||
finishedDownloadQueue := make(chan cv.Download, 10)
|
finishedDownloadQueue := make(chan cv.Download)
|
||||||
go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
|
go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
|
||||||
|
|
||||||
if opts.cv.downloadCovers {
|
if opts.cv.downloadCovers {
|
||||||
@ -902,7 +893,7 @@ func startServer(opts Opts) {
|
|||||||
if opts.cv.thumbOnly {
|
if opts.cv.thumbOnly {
|
||||||
imageTypes = append(imageTypes, "thumb_url")
|
imageTypes = append(imageTypes, "thumb_url")
|
||||||
}
|
}
|
||||||
cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue)
|
cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.hashDownloaded, finishedDownloadQueue)
|
||||||
go func() {
|
go func() {
|
||||||
defer dwg.Done()
|
defer dwg.Done()
|
||||||
cv.DownloadCovers(cvdownloader)
|
cv.DownloadCovers(cvdownloader)
|
||||||
|
80
cv/cv.go
80
cv/cv.go
@ -2,7 +2,6 @@ package cv
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
|
||||||
"cmp"
|
"cmp"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@ -30,7 +29,6 @@ type Download struct {
|
|||||||
URL string
|
URL string
|
||||||
Dest string
|
Dest string
|
||||||
IssueID string
|
IssueID string
|
||||||
Image []byte
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Issue struct {
|
type Issue struct {
|
||||||
@ -69,7 +67,6 @@ type CVDownloader struct {
|
|||||||
ImagePath string
|
ImagePath string
|
||||||
ImageTypes []string
|
ImageTypes []string
|
||||||
SendExistingImages bool
|
SendExistingImages bool
|
||||||
KeepDownloadedImages bool
|
|
||||||
Context context.Context
|
Context context.Context
|
||||||
FinishedDownloadQueue chan Download
|
FinishedDownloadQueue chan Download
|
||||||
|
|
||||||
@ -153,6 +150,22 @@ func (c *CVDownloader) updateIssues() {
|
|||||||
query.Add("api_key", c.APIKey)
|
query.Add("api_key", c.APIKey)
|
||||||
base_url.RawQuery = query.Encode()
|
base_url.RawQuery = query.Encode()
|
||||||
c.totalResults = max(c.totalResults, 1)
|
c.totalResults = max(c.totalResults, 1)
|
||||||
|
// IDs := make([]int, 0, 1_000_000)
|
||||||
|
// deleteIndexes := make([]int, 0, 100)
|
||||||
|
// CV sucks remove duplicate IDs so that we can try to get all the comics
|
||||||
|
// for i, issueList := range ssues {
|
||||||
|
// for _, issue := range issueList.Results {
|
||||||
|
// if _, found := slices.BinarySearch(IDs, issue.ID); found {
|
||||||
|
// deleteIndexes = append(deleteIndexes, i)
|
||||||
|
// slices.Sort(deleteIndexes)
|
||||||
|
// }
|
||||||
|
// IDs = append(IDs, issue.ID)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// slices.Reverse(deleteIndexes)
|
||||||
|
// for _, i := range deleteIndexes {
|
||||||
|
// issues = slices.Delete(issues, i, min(i+1, len(issues)-1))
|
||||||
|
// }
|
||||||
failCount := 0
|
failCount := 0
|
||||||
prev := -1
|
prev := -1
|
||||||
offset := 0
|
offset := 0
|
||||||
@ -303,6 +316,7 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
for i := range 5 {
|
for i := range 5 {
|
||||||
go func() {
|
go func() {
|
||||||
log.Println("starting downloader", i)
|
log.Println("starting downloader", i)
|
||||||
|
dir_created := make(map[string]bool)
|
||||||
for dl := range c.imageDownloads {
|
for dl := range c.imageDownloads {
|
||||||
if c.hasQuit() {
|
if c.hasQuit() {
|
||||||
c.imageWG.Done()
|
c.imageWG.Done()
|
||||||
@ -347,37 +361,19 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
cleanup()
|
cleanup()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !dir_created[dir] {
|
||||||
_ = os.MkdirAll(dir, 0o755)
|
_ = os.MkdirAll(dir, 0o755)
|
||||||
|
dir_created[dir] = true
|
||||||
if c.KeepDownloadedImages {
|
}
|
||||||
image, err := os.Create(dl.dest)
|
image, err := os.Create(dl.dest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("Unable to create image file", dl.dest, err)
|
log.Println("Unable to create image file", dl.dest, err)
|
||||||
os.Remove(dl.dest)
|
os.Remove(dl.dest)
|
||||||
image.Close()
|
|
||||||
cleanup()
|
cleanup()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Println("downloading", dl.dest)
|
log.Println("downloading", dl.dest)
|
||||||
_, err = io.Copy(image, resp.Body)
|
_, err = io.Copy(image, resp.Body)
|
||||||
image.Close()
|
|
||||||
if err != nil {
|
|
||||||
log.Println("Failed when downloading image", err)
|
|
||||||
os.Remove(dl.dest)
|
|
||||||
cleanup()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
c.FinishedDownloadQueue <- Download{
|
|
||||||
URL: dl.url,
|
|
||||||
Dest: dl.dest,
|
|
||||||
IssueID: strconv.Itoa(dl.issueID),
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
image := &bytes.Buffer{}
|
|
||||||
log.Println("downloading", dl.dest)
|
|
||||||
_, err = io.Copy(image, resp.Body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("Failed when downloading image", err)
|
log.Println("Failed when downloading image", err)
|
||||||
cleanup()
|
cleanup()
|
||||||
@ -389,8 +385,6 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
URL: dl.url,
|
URL: dl.url,
|
||||||
Dest: dl.dest,
|
Dest: dl.dest,
|
||||||
IssueID: strconv.Itoa(dl.issueID),
|
IssueID: strconv.Itoa(dl.issueID),
|
||||||
Image: image.Bytes(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
cleanup()
|
cleanup()
|
||||||
}
|
}
|
||||||
@ -485,7 +479,7 @@ func (c *CVDownloader) downloadImages() {
|
|||||||
beforeWait := time.Now()
|
beforeWait := time.Now()
|
||||||
c.imageWG.Wait()
|
c.imageWG.Wait()
|
||||||
waited := time.Since(beforeWait)
|
waited := time.Since(beforeWait)
|
||||||
// If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue (slow hashing can also cause it to wait longer), lets wait to give the CV servers a break
|
// If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue, lets wait to give the CV servers a break
|
||||||
if waited > time.Duration(7.4*float64(time.Second)) {
|
if waited > time.Duration(7.4*float64(time.Second)) {
|
||||||
t := 10 * time.Second
|
t := 10 * time.Second
|
||||||
log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited)
|
log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited)
|
||||||
@ -543,24 +537,7 @@ func (c *CVDownloader) hasQuit() bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *CVDownloader) cleanDirs() {
|
func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
||||||
_ = filepath.WalkDir(c.ImagePath, func(path string, d fs.DirEntry, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if d.IsDir() {
|
|
||||||
path, _ = filepath.Abs(path)
|
|
||||||
err := ch.RmdirP(path)
|
|
||||||
// The error is only for the first path value. EG ch.RmdirP("/test/t") will only return the error for os.Remove("/test/t") not os.Remove("test")
|
|
||||||
if err == nil {
|
|
||||||
return filepath.SkipDir
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
|
||||||
return &CVDownloader{
|
return &CVDownloader{
|
||||||
Context: ctx,
|
Context: ctx,
|
||||||
JSONPath: filepath.Join(workPath, "_json"),
|
JSONPath: filepath.Join(workPath, "_json"),
|
||||||
@ -571,7 +548,6 @@ func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string,
|
|||||||
notFound: make(chan download, 100),
|
notFound: make(chan download, 100),
|
||||||
FinishedDownloadQueue: finishedDownloadQueue,
|
FinishedDownloadQueue: finishedDownloadQueue,
|
||||||
SendExistingImages: sendExistingImages,
|
SendExistingImages: sendExistingImages,
|
||||||
KeepDownloadedImages: keepDownloadedImages,
|
|
||||||
ImageTypes: imageTypes,
|
ImageTypes: imageTypes,
|
||||||
chdb: chdb,
|
chdb: chdb,
|
||||||
}
|
}
|
||||||
@ -582,9 +558,6 @@ func DownloadCovers(c *CVDownloader) {
|
|||||||
err error
|
err error
|
||||||
)
|
)
|
||||||
os.MkdirAll(c.JSONPath, 0o777)
|
os.MkdirAll(c.JSONPath, 0o777)
|
||||||
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
|
|
||||||
f.Close()
|
|
||||||
c.cleanDirs()
|
|
||||||
c.fileList, err = os.ReadDir(c.JSONPath)
|
c.fileList, err = os.ReadDir(c.JSONPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("Unable to open path for json files: %w", err))
|
panic(fmt.Errorf("Unable to open path for json files: %w", err))
|
||||||
@ -616,8 +589,9 @@ func DownloadCovers(c *CVDownloader) {
|
|||||||
|
|
||||||
log.Println("Number of issues", issueCount, " expected:", c.totalResults)
|
log.Println("Number of issues", issueCount, " expected:", c.totalResults)
|
||||||
|
|
||||||
close(c.downloadQueue) // sends only happen in c.updateIssues which has already been called
|
close(c.downloadQueue) // sends only happen in c.updateIssues
|
||||||
// We don't drain here as we want to process them
|
for range c.downloadQueue {
|
||||||
|
}
|
||||||
|
|
||||||
log.Println("Waiting for downloaders")
|
log.Println("Waiting for downloaders")
|
||||||
dwg.Wait()
|
dwg.Wait()
|
||||||
@ -628,9 +602,5 @@ func DownloadCovers(c *CVDownloader) {
|
|||||||
for range c.notFound {
|
for range c.notFound {
|
||||||
}
|
}
|
||||||
|
|
||||||
// We drain this at the end because we need to wait for the images to download
|
|
||||||
for range c.downloadQueue {
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Println("Completed downloading images")
|
log.Println("Completed downloading images")
|
||||||
}
|
}
|
||||||
|
2
go.mod
2
go.mod
@ -1,6 +1,6 @@
|
|||||||
module gitea.narnian.us/lordwelch/comic-hasher
|
module gitea.narnian.us/lordwelch/comic-hasher
|
||||||
|
|
||||||
go 1.22
|
go 1.23.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00
|
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00
|
||||||
|
Loading…
x
Reference in New Issue
Block a user