Wait every 200 downloads so CV doesn't get overloaded

Fix downloading images that are not hashed
2024-12-26 17:50:25 -08:00 · 2024-12-26 16:11:10 -08:00
3 changed files with 18 additions and 10 deletions
--- a/CHDB.go
+++ b/CHDB.go
@@ -57,11 +57,11 @@ func (s CHDB) PathHashed(path string) bool {
 }
 func (s CHDB) PathDownloaded(path string) bool {
-	path, _ = filepath.Rel(s.comicvinePath, path)
+	relPath, _ := filepath.Rel(s.comicvinePath, path)
 	dbPath := ""
-	_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
+	_ = s.sql.QueryRow("SELECT path FROM paths where path=?", relPath).Scan(&dbPath)
-	if dbPath != path {
+	if dbPath != relPath {
-		f, err := os.Open(filepath.Join(s.comicvinePath, path))
+		f, err := os.Open(path)
 		if err == nil {
 			defer f.Close()
 		}
--- a/cmd/comic-hasher/main.go
+++ b/cmd/comic-hasher/main.go
@@ -180,13 +180,13 @@ func main() {
 	flag.StringVar(&opts.hashesPath, "hashes", "", fmt.Sprintf("Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option (default %v)", filepath.Join(wd, "hashes.gz")))
 	flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)")
 	flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)")
-	flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run, implies -cv-thumb-only if -delete-hashed-images is set)")
+	flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run, implies -cv-thumb-only if -delete-hashed-images is true or -cv-keep-downloaded is false)")
 	flag.BoolVar(&opts.deleteHashedImages, "delete-hashed-images", false, "Deletes downloaded images after hashing them, useful to save space, paths are recorded in ch.sqlite")
 	flag.BoolVar(&opts.cv.downloadCovers, "cv-dl-covers", false, "Downloads all covers from ComicVine and adds them to the server")
 	flag.StringVar(&opts.cv.APIKey, "cv-api-key", "", "API Key to use to access the ComicVine API")
 	flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
-	flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
+	flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine, when false sets -only-hash-new-ids=false")
 	flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
 	flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
 	flag.Parse()
@@ -203,7 +203,7 @@ func main() {
 			log.Fatal("No ComicVine API Key provided")
 		}
 	}
-	opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && opts.deleteHashedImages)
+	opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && (opts.deleteHashedImages || !opts.cv.keepDownloaded))
 	opts.path, _ = filepath.Abs(opts.path)
 	if opts.hashesPath == "" {
 		opts.hashesPath = filepath.Join(opts.path, "hashes.gz")
@@ -219,6 +219,10 @@ func main() {
 	opts.cv.path, _ = filepath.Abs(opts.cv.path)
 	pretty.Log(opts)
 	if !opts.cv.keepDownloaded && opts.onlyHashNewIDs {
 		panic("You need to fix your -cv-keep-downloaded and -only-hash-new-ids flags")
 	}
 	startServer(opts)
 }
@@ -839,8 +843,8 @@ func startServer(opts Opts) {
 		cancel:       cancel,
 		signalQueue:  make(chan os.Signal, 1),
 		readerQueue:  make(chan string, 100),
-		hashingQueue: make(chan ch.Im),
+		hashingQueue: make(chan ch.Im, 1),
-		mappingQueue: make(chan ch.ImageHash),
+		mappingQueue: make(chan ch.ImageHash, 1),
 		mux:          mux,
 		httpServer: &http.Server{
 			Addr:           ":8080",
--- a/cv/cv.go
+++ b/cv/cv.go
@@ -481,10 +481,10 @@ func (c *CVDownloader) downloadImages() {
 			if added > 200 {
 				// On a clean single image type run each page would have 100 downloads of a single cover type but stuff happens so we only wait once we have sent 200 to the queue
 				log.Println("waiting for", added, "downloads at offset", list.Offset)
 				added = 0
 				beforeWait := time.Now()
 				c.imageWG.Wait()
 				waited := time.Since(beforeWait)
 				added = 0
 				// If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue (slow hashing can also cause it to wait longer), lets wait to give the CV servers a break
 				if waited > time.Duration(7.4*float64(time.Second)) {
 					t := 10 * time.Second
@@ -494,6 +494,10 @@ func (c *CVDownloader) downloadImages() {
 						return
 					case <-time.After(t):
 					}
 				} else {
 					// Things are too fast we can't depend CV being slow to manage our download speed
 					// We sleep for 3 seconds so we don't overload CV
 					time.Sleep(3 * time.Second)
 				}
 			}
 		}
Author	SHA1	Message	Date
Timmy Welch	260a13688a	Wait every 200 downloads so CV doesn't get overloaded	2024-12-26 17:50:25 -08:00
Timmy Welch	e04469938d	Fix downloading images that are not hashed	2024-12-26 16:11:10 -08:00