Compare commits

..

No commits in common. "5cf25089daef97e425a6cbc42bc48fd91e4cba17" and "cc4e973bf914da719c3bf5c856d7b511a3f6b40e" have entirely different histories.

5 changed files with 60 additions and 100 deletions

View File

@ -1,6 +1,6 @@
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0 rev: v4.6.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
args: [--markdown-linebreak-ext=.gitignore] args: [--markdown-linebreak-ext=.gitignore]
@ -17,12 +17,12 @@ repos:
hooks: hooks:
- id: golangci-lint - id: golangci-lint
- repo: https://github.com/asottile/setup-cfg-fmt - repo: https://github.com/asottile/setup-cfg-fmt
rev: v2.7.0 rev: v2.5.0
hooks: hooks:
- id: setup-cfg-fmt - id: setup-cfg-fmt
- repo: https://github.com/asottile/reorder-python-imports - repo: https://github.com/asottile/reorder-python-imports
rev: v3.14.0 rev: v3.13.0
hooks: hooks:
- id: reorder-python-imports - id: reorder-python-imports
args: [--py38-plus, --add-import, 'from __future__ import annotations'] args: [--py38-plus, --add-import, 'from __future__ import annotations']
@ -31,7 +31,7 @@ repos:
hooks: hooks:
- id: add-trailing-comma - id: add-trailing-comma
- repo: https://github.com/asottile/pyupgrade - repo: https://github.com/asottile/pyupgrade
rev: v3.19.1 rev: v3.17.0
hooks: hooks:
- id: pyupgrade - id: pyupgrade
args: [--py38-plus] args: [--py38-plus]

View File

@ -71,15 +71,14 @@ func (s CHDB) PathDownloaded(path string) bool {
} }
func (s CHDB) AddPath(path string) { func (s CHDB) AddPath(path string) {
relPath, _ := filepath.Rel(s.comicvinePath, path) path, _ = filepath.Rel(s.comicvinePath, path)
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", relPath) _, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", path)
if err != nil { if err != nil {
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", relPath, err)) log.Println(fmt.Errorf("Failed to insert %v into paths: %w", path, err))
} }
if s.deleteExisting { if s.deleteExisting {
_ = os.Remove(path) os.Remove(path)
_ = RmdirP(filepath.Dir(path))
} }
} }

View File

@ -154,7 +154,6 @@ type Opts struct {
path string path string
thumbOnly bool thumbOnly bool
hashDownloaded bool hashDownloaded bool
keepDownloaded bool
} }
} }
@ -188,7 +187,6 @@ func main() {
flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine"))) flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine") flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images") flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
flag.Parse() flag.Parse()
if opts.coverPath != "" { if opts.coverPath != "" {
@ -785,17 +783,9 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed") // log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
continue continue
} }
var ( file, err := os.OpenFile(path.Dest, os.O_RDWR, 0666)
file io.ReadCloser if err != nil {
err error panic(err)
)
if path.Image == nil {
file, err = os.OpenFile(path.Dest, os.O_RDWR, 0666)
if err != nil {
panic(err)
}
} else {
file = io.NopCloser(bytes.NewBuffer(path.Image))
} }
i, format, err := image.Decode(bufio.NewReader(file)) i, format, err := image.Decode(bufio.NewReader(file))
if err != nil { if err != nil {
@ -817,6 +807,7 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser
log.Println("Recieved quit") log.Println("Recieved quit")
return return
case server.hashingQueue <- im: case server.hashingQueue <- im:
// log.Println("Sending:", im)
} }
} }
} }
@ -893,7 +884,7 @@ func startServer(opts Opts) {
log.Println("Init downloaders") log.Println("Init downloaders")
dwg := sync.WaitGroup{} dwg := sync.WaitGroup{}
finishedDownloadQueue := make(chan cv.Download, 10) finishedDownloadQueue := make(chan cv.Download)
go downloadProcessor(chdb, opts, finishedDownloadQueue, server) go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
if opts.cv.downloadCovers { if opts.cv.downloadCovers {
@ -902,7 +893,7 @@ func startServer(opts Opts) {
if opts.cv.thumbOnly { if opts.cv.thumbOnly {
imageTypes = append(imageTypes, "thumb_url") imageTypes = append(imageTypes, "thumb_url")
} }
cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue) cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.hashDownloaded, finishedDownloadQueue)
go func() { go func() {
defer dwg.Done() defer dwg.Done()
cv.DownloadCovers(cvdownloader) cv.DownloadCovers(cvdownloader)

120
cv/cv.go
View File

@ -2,7 +2,6 @@ package cv
import ( import (
"bufio" "bufio"
"bytes"
"cmp" "cmp"
"context" "context"
"encoding/json" "encoding/json"
@ -30,7 +29,6 @@ type Download struct {
URL string URL string
Dest string Dest string
IssueID string IssueID string
Image []byte
} }
type Issue struct { type Issue struct {
@ -69,7 +67,6 @@ type CVDownloader struct {
ImagePath string ImagePath string
ImageTypes []string ImageTypes []string
SendExistingImages bool SendExistingImages bool
KeepDownloadedImages bool
Context context.Context Context context.Context
FinishedDownloadQueue chan Download FinishedDownloadQueue chan Download
@ -153,6 +150,22 @@ func (c *CVDownloader) updateIssues() {
query.Add("api_key", c.APIKey) query.Add("api_key", c.APIKey)
base_url.RawQuery = query.Encode() base_url.RawQuery = query.Encode()
c.totalResults = max(c.totalResults, 1) c.totalResults = max(c.totalResults, 1)
// IDs := make([]int, 0, 1_000_000)
// deleteIndexes := make([]int, 0, 100)
// CV sucks remove duplicate IDs so that we can try to get all the comics
// for i, issueList := range ssues {
// for _, issue := range issueList.Results {
// if _, found := slices.BinarySearch(IDs, issue.ID); found {
// deleteIndexes = append(deleteIndexes, i)
// slices.Sort(deleteIndexes)
// }
// IDs = append(IDs, issue.ID)
// }
// }
// slices.Reverse(deleteIndexes)
// for _, i := range deleteIndexes {
// issues = slices.Delete(issues, i, min(i+1, len(issues)-1))
// }
failCount := 0 failCount := 0
prev := -1 prev := -1
offset := 0 offset := 0
@ -303,6 +316,7 @@ func (c *CVDownloader) start_downloader() {
for i := range 5 { for i := range 5 {
go func() { go func() {
log.Println("starting downloader", i) log.Println("starting downloader", i)
dir_created := make(map[string]bool)
for dl := range c.imageDownloads { for dl := range c.imageDownloads {
if c.hasQuit() { if c.hasQuit() {
c.imageWG.Done() c.imageWG.Done()
@ -347,50 +361,30 @@ func (c *CVDownloader) start_downloader() {
cleanup() cleanup()
continue continue
} }
_ = os.MkdirAll(dir, 0o755) if !dir_created[dir] {
_ = os.MkdirAll(dir, 0o755)
dir_created[dir] = true
}
image, err := os.Create(dl.dest)
if err != nil {
log.Println("Unable to create image file", dl.dest, err)
os.Remove(dl.dest)
cleanup()
continue
}
log.Println("downloading", dl.dest)
_, err = io.Copy(image, resp.Body)
if err != nil {
log.Println("Failed when downloading image", err)
cleanup()
os.Remove(dl.dest)
continue
}
if c.KeepDownloadedImages { c.FinishedDownloadQueue <- Download{
image, err := os.Create(dl.dest) URL: dl.url,
if err != nil { Dest: dl.dest,
log.Println("Unable to create image file", dl.dest, err) IssueID: strconv.Itoa(dl.issueID),
os.Remove(dl.dest)
image.Close()
cleanup()
continue
}
log.Println("downloading", dl.dest)
_, err = io.Copy(image, resp.Body)
image.Close()
if err != nil {
log.Println("Failed when downloading image", err)
os.Remove(dl.dest)
cleanup()
continue
}
c.FinishedDownloadQueue <- Download{
URL: dl.url,
Dest: dl.dest,
IssueID: strconv.Itoa(dl.issueID),
}
} else {
image := &bytes.Buffer{}
log.Println("downloading", dl.dest)
_, err = io.Copy(image, resp.Body)
if err != nil {
log.Println("Failed when downloading image", err)
cleanup()
os.Remove(dl.dest)
continue
}
c.FinishedDownloadQueue <- Download{
URL: dl.url,
Dest: dl.dest,
IssueID: strconv.Itoa(dl.issueID),
Image: image.Bytes(),
}
} }
cleanup() cleanup()
} }
@ -485,7 +479,7 @@ func (c *CVDownloader) downloadImages() {
beforeWait := time.Now() beforeWait := time.Now()
c.imageWG.Wait() c.imageWG.Wait()
waited := time.Since(beforeWait) waited := time.Since(beforeWait)
// If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue (slow hashing can also cause it to wait longer), lets wait to give the CV servers a break // If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue, lets wait to give the CV servers a break
if waited > time.Duration(7.4*float64(time.Second)) { if waited > time.Duration(7.4*float64(time.Second)) {
t := 10 * time.Second t := 10 * time.Second
log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited) log.Println("Waiting for", t, "at offset", list.Offset, "had to wait for", waited)
@ -543,24 +537,7 @@ func (c *CVDownloader) hasQuit() bool {
} }
} }
func (c *CVDownloader) cleanDirs() { func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
_ = filepath.WalkDir(c.ImagePath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() {
path, _ = filepath.Abs(path)
err := ch.RmdirP(path)
// The error is only for the first path value. EG ch.RmdirP("/test/t") will only return the error for os.Remove("/test/t") not os.Remove("test")
if err == nil {
return filepath.SkipDir
}
}
return nil
})
}
func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
return &CVDownloader{ return &CVDownloader{
Context: ctx, Context: ctx,
JSONPath: filepath.Join(workPath, "_json"), JSONPath: filepath.Join(workPath, "_json"),
@ -571,7 +548,6 @@ func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string,
notFound: make(chan download, 100), notFound: make(chan download, 100),
FinishedDownloadQueue: finishedDownloadQueue, FinishedDownloadQueue: finishedDownloadQueue,
SendExistingImages: sendExistingImages, SendExistingImages: sendExistingImages,
KeepDownloadedImages: keepDownloadedImages,
ImageTypes: imageTypes, ImageTypes: imageTypes,
chdb: chdb, chdb: chdb,
} }
@ -582,9 +558,6 @@ func DownloadCovers(c *CVDownloader) {
err error err error
) )
os.MkdirAll(c.JSONPath, 0o777) os.MkdirAll(c.JSONPath, 0o777)
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
f.Close()
c.cleanDirs()
c.fileList, err = os.ReadDir(c.JSONPath) c.fileList, err = os.ReadDir(c.JSONPath)
if err != nil { if err != nil {
panic(fmt.Errorf("Unable to open path for json files: %w", err)) panic(fmt.Errorf("Unable to open path for json files: %w", err))
@ -616,8 +589,9 @@ func DownloadCovers(c *CVDownloader) {
log.Println("Number of issues", issueCount, " expected:", c.totalResults) log.Println("Number of issues", issueCount, " expected:", c.totalResults)
close(c.downloadQueue) // sends only happen in c.updateIssues which has already been called close(c.downloadQueue) // sends only happen in c.updateIssues
// We don't drain here as we want to process them for range c.downloadQueue {
}
log.Println("Waiting for downloaders") log.Println("Waiting for downloaders")
dwg.Wait() dwg.Wait()
@ -628,9 +602,5 @@ func DownloadCovers(c *CVDownloader) {
for range c.notFound { for range c.notFound {
} }
// We drain this at the end because we need to wait for the images to download
for range c.downloadQueue {
}
log.Println("Completed downloading images") log.Println("Completed downloading images")
} }

2
go.mod
View File

@ -1,6 +1,6 @@
module gitea.narnian.us/lordwelch/comic-hasher module gitea.narnian.us/lordwelch/comic-hasher
go 1.22 go 1.23.0
require ( require (
gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00 gitea.narnian.us/lordwelch/goimagehash v0.0.0-20240812025715-33ff96e45f00