Compare commits

...

2 Commits

Author SHA1 Message Date
Timmy Welch
260a13688a Wait every 200 downloads so CV doesn't get overloaded 2024-12-26 17:50:25 -08:00
Timmy Welch
e04469938d Fix downloading images that are not hashed 2024-12-26 16:11:10 -08:00
3 changed files with 18 additions and 10 deletions

View File

@ -57,11 +57,11 @@ func (s CHDB) PathHashed(path string) bool {
} }
func (s CHDB) PathDownloaded(path string) bool { func (s CHDB) PathDownloaded(path string) bool {
path, _ = filepath.Rel(s.comicvinePath, path) relPath, _ := filepath.Rel(s.comicvinePath, path)
dbPath := "" dbPath := ""
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath) _ = s.sql.QueryRow("SELECT path FROM paths where path=?", relPath).Scan(&dbPath)
if dbPath != path { if dbPath != relPath {
f, err := os.Open(filepath.Join(s.comicvinePath, path)) f, err := os.Open(path)
if err == nil { if err == nil {
defer f.Close() defer f.Close()
} }

View File

@ -180,13 +180,13 @@ func main() {
flag.StringVar(&opts.hashesPath, "hashes", "", fmt.Sprintf("Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option (default %v)", filepath.Join(wd, "hashes.gz"))) flag.StringVar(&opts.hashesPath, "hashes", "", fmt.Sprintf("Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option (default %v)", filepath.Join(wd, "hashes.gz")))
flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)") flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)")
flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)") flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)")
flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run, implies -cv-thumb-only if -delete-hashed-images is set)") flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run, implies -cv-thumb-only if -delete-hashed-images is true or -cv-keep-downloaded is false)")
flag.BoolVar(&opts.deleteHashedImages, "delete-hashed-images", false, "Deletes downloaded images after hashing them, useful to save space, paths are recorded in ch.sqlite") flag.BoolVar(&opts.deleteHashedImages, "delete-hashed-images", false, "Deletes downloaded images after hashing them, useful to save space, paths are recorded in ch.sqlite")
flag.BoolVar(&opts.cv.downloadCovers, "cv-dl-covers", false, "Downloads all covers from ComicVine and adds them to the server") flag.BoolVar(&opts.cv.downloadCovers, "cv-dl-covers", false, "Downloads all covers from ComicVine and adds them to the server")
flag.StringVar(&opts.cv.APIKey, "cv-api-key", "", "API Key to use to access the ComicVine API") flag.StringVar(&opts.cv.APIKey, "cv-api-key", "", "API Key to use to access the ComicVine API")
flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine"))) flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine") flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine, when false sets -only-hash-new-ids=false")
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images") flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded") flag.BoolVar(&opts.cv.keepDownloaded, "cv-keep-downloaded", true, "Keep downloaded images. When set to false does not ever write to the filesystem, a crash or exiting can mean some images need to be re-downloaded")
flag.Parse() flag.Parse()
@ -203,7 +203,7 @@ func main() {
log.Fatal("No ComicVine API Key provided") log.Fatal("No ComicVine API Key provided")
} }
} }
opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && opts.deleteHashedImages) opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && (opts.deleteHashedImages || !opts.cv.keepDownloaded))
opts.path, _ = filepath.Abs(opts.path) opts.path, _ = filepath.Abs(opts.path)
if opts.hashesPath == "" { if opts.hashesPath == "" {
opts.hashesPath = filepath.Join(opts.path, "hashes.gz") opts.hashesPath = filepath.Join(opts.path, "hashes.gz")
@ -219,6 +219,10 @@ func main() {
opts.cv.path, _ = filepath.Abs(opts.cv.path) opts.cv.path, _ = filepath.Abs(opts.cv.path)
pretty.Log(opts) pretty.Log(opts)
if !opts.cv.keepDownloaded && opts.onlyHashNewIDs {
panic("You need to fix your -cv-keep-downloaded and -only-hash-new-ids flags")
}
startServer(opts) startServer(opts)
} }
@ -839,8 +843,8 @@ func startServer(opts Opts) {
cancel: cancel, cancel: cancel,
signalQueue: make(chan os.Signal, 1), signalQueue: make(chan os.Signal, 1),
readerQueue: make(chan string, 100), readerQueue: make(chan string, 100),
hashingQueue: make(chan ch.Im), hashingQueue: make(chan ch.Im, 1),
mappingQueue: make(chan ch.ImageHash), mappingQueue: make(chan ch.ImageHash, 1),
mux: mux, mux: mux,
httpServer: &http.Server{ httpServer: &http.Server{
Addr: ":8080", Addr: ":8080",

View File

@ -481,10 +481,10 @@ func (c *CVDownloader) downloadImages() {
if added > 200 { if added > 200 {
// On a clean single image type run each page would have 100 downloads of a single cover type but stuff happens so we only wait once we have sent 200 to the queue // On a clean single image type run each page would have 100 downloads of a single cover type but stuff happens so we only wait once we have sent 200 to the queue
log.Println("waiting for", added, "downloads at offset", list.Offset) log.Println("waiting for", added, "downloads at offset", list.Offset)
added = 0
beforeWait := time.Now() beforeWait := time.Now()
c.imageWG.Wait() c.imageWG.Wait()
waited := time.Since(beforeWait) waited := time.Since(beforeWait)
added = 0
// If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue (slow hashing can also cause it to wait longer), lets wait to give the CV servers a break // If we had to wait for the arbitrarily picked time of 7.4 seconds it means we had a backed up queue (slow hashing can also cause it to wait longer), lets wait to give the CV servers a break
if waited > time.Duration(7.4*float64(time.Second)) { if waited > time.Duration(7.4*float64(time.Second)) {
t := 10 * time.Second t := 10 * time.Second
@ -494,6 +494,10 @@ func (c *CVDownloader) downloadImages() {
return return
case <-time.After(t): case <-time.After(t):
} }
} else {
// Things are too fast we can't depend CV being slow to manage our download speed
// We sleep for 3 seconds so we don't overload CV
time.Sleep(3 * time.Second)
} }
} }
} }