From 5a93dacdad516dfe7444edec498a558a83f16ce0 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Wed, 5 Feb 2025 16:32:25 -0800 Subject: [PATCH] Fix checking for new covers --- CHDB.go | 27 +++++++++++++++++---------- cmd/comic-hasher/main.go | 28 ++++++++++++++++++++++++---- cv/cv.go | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 70 insertions(+), 22 deletions(-) diff --git a/CHDB.go b/CHDB.go index 341cc72..5b39075 100644 --- a/CHDB.go +++ b/CHDB.go @@ -48,19 +48,26 @@ CREATE TABLE IF NOT EXISTS bad_urls( func (s CHDB) PathHashed(path string) bool { path, _ = filepath.Rel(s.comicvinePath, path) dbPath := "" - _ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath) - if dbPath == path && s.deleteExisting { - os.Remove(filepath.Join(s.comicvinePath, path)) + if s.deleteExisting { + _ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath) + + if dbPath == path { + os.Remove(filepath.Join(s.comicvinePath, path)) + } + return dbPath == path } - return dbPath == path + count := 0 + _ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", path).Scan(&count) + return count > 0 } func (s CHDB) PathDownloaded(path string) bool { relPath, _ := filepath.Rel(s.comicvinePath, path) - dbPath := "" - _ = s.sql.QueryRow("SELECT path FROM paths where path=?", relPath).Scan(&dbPath) - if dbPath != relPath { + + count := 0 + _ = s.sql.QueryRow("SELECT count(path) FROM paths where path=?", relPath).Scan(&count) + if count != 1 { f, err := os.Open(path) if err == nil { defer f.Close() @@ -84,9 +91,9 @@ func (s CHDB) AddPath(path string) { } func (s CHDB) CheckURL(url string) bool { - dbURL := "" - _ = s.sql.QueryRow("SELECT url FROM bad_urls where url=?", url).Scan(&dbURL) - return dbURL == url + count := 0 + _ = s.sql.QueryRow("SELECT count(url) FROM bad_urls where url=?", url).Scan(&count) + return count > 0 } func (s CHDB) AddURL(url string) { diff --git a/cmd/comic-hasher/main.go b/cmd/comic-hasher/main.go index c0478b1..6de4b97 100644 --- a/cmd/comic-hasher/main.go +++ b/cmd/comic-hasher/main.go @@ -497,6 +497,8 @@ func (s *Server) addCover(w http.ResponseWriter, r *http.Request) { http.Error(w, "Invalid Auth", http.StatusForbidden) return } + w.WriteHeader(http.StatusNotImplemented) + return var ( values = r.URL.Query() domain = strings.TrimSpace(values.Get("domain")) @@ -700,6 +702,7 @@ func initializeStorage(opts Opts) (ch.HashStorage, error) { func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error) { if opts.loadEmbeddedHashes && len(ch.Hashes) != 0 { + fmt.Println("Loading embedded hashes") var err error hashes := ch.Hashes if gr, err := gzip.NewReader(bytes.NewReader(ch.Hashes)); err == nil { @@ -720,6 +723,7 @@ func loadHashes(opts Opts, decodeHashes func(format Format, hashes []byte) error } fmt.Printf("Loaded embedded %s hashes\n", format) } else { + fmt.Println("Loading saved hashes") if f, err := os.Open(opts.hashesPath); err == nil { var buf io.Reader = f if gr, err := gzip.NewReader(buf); err == nil { @@ -758,15 +762,26 @@ func saveHashes(opts Opts, encodeHashes func(format Format) ([]byte, error)) { encodedHashes, err := encodeHashes(opts.format) if err == nil { if f, err := os.Create(opts.hashesPath); err == nil { + failed := false gzw := gzip.NewWriter(f) _, err := gzw.Write(encodedHashes) if err != nil { log.Println("Failed to write hashes", err) - } else { + failed = true + } + err = gzw.Close() + if err != nil { + log.Println("Failed to write hashes", err) + failed = true + } + err = f.Close() + if err != nil { + log.Println("Failed to write hashes", err) + failed = true + } + if !failed { log.Println("Successfully saved hashes") } - gzw.Close() - f.Close() } else { log.Println("Unabled to save hashes", err) } @@ -809,7 +824,12 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser bufPool.Put(path.Image) } if err != nil { - log.Println("Reading image failed", path.Dest, err) + if len(path.URL) > 0 { + log.Println("Reading image failed, adding to known bad urls:", path.URL, err) + chdb.AddURL(path.URL) + } else { + log.Println("Reading image failed", path.Dest, err) + } continue // skip this image } chdb.AddPath(path.Dest) // Add to sqlite db and remove file if opts.deleteHashedImages is true diff --git a/cv/cv.go b/cv/cv.go index 457d4d0..8bc435f 100644 --- a/cv/cv.go +++ b/cv/cv.go @@ -129,7 +129,7 @@ func (c *CVDownloader) loadIssues(filename string) (*CVResult, error) { } func Get(ctx context.Context, url string) (*http.Response, error, func()) { - ctx, cancel := context.WithTimeout(ctx, time.Second*10) + ctx, cancel := context.WithTimeout(ctx, time.Second*20) req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { return nil, err, cancel @@ -180,7 +180,7 @@ func (c *CVDownloader) updateIssues() { offset -= 100 return failCount < 15 } - for offset = 0; offset < c.totalResults; offset += 100 { + for offset = 0; offset <= c.totalResults; offset += 100 { index := offset / 100 if c.hasQuit() { return @@ -192,11 +192,21 @@ func (c *CVDownloader) updateIssues() { prev = -1 failCount = 0 // When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns - select { - case <-c.Context.Done(): - case c.downloadQueue <- issue: + if c.totalResults == issue.Offset+issue.NumberOfPageResults { + if index != len(c.fileList)-1 { + log.Printf("Wrong index: expected %d got %d", len(c.fileList), index) + return + } + log.Println("Deleting the last page to detect new comics") + os.Remove(filepath.Join(c.JSONPath, c.fileList[index])) + c.fileList = slices.Delete(c.fileList, index, index+1) + } else { + select { + case <-c.Context.Done(): + case c.downloadQueue <- issue: + } + continue } - continue } else { log.Println("Failed to read page at offset", offset, issue, err) os.Remove(filepath.Join(c.JSONPath, c.fileList[index])) @@ -218,7 +228,17 @@ func (c *CVDownloader) updateIssues() { case <-c.Context.Done(): case c.downloadQueue <- issue: } - continue + if c.totalResults == issue.Offset+issue.NumberOfPageResults { + if index != len(c.fileList)-1 { + log.Printf("Wrong index: expected %d got %d", len(c.fileList), index) + return + } + log.Println("Deleting the last page to detect new comics") + os.Remove(filepath.Join(c.JSONPath, c.fileList[index])) + c.fileList = slices.Delete(c.fileList, index, index+1) + } else { + continue + } } else { log.Println("Failed to read page at offset", offset, issue, err) os.Remove(filepath.Join(c.JSONPath, c.fileList[index])) @@ -244,6 +264,7 @@ func (c *CVDownloader) updateIssues() { if retry(URI.String(), err) { continue } + // Fail and let comic-hasher try the whole thing again later return } if resp.StatusCode != 200 { @@ -288,6 +309,7 @@ func (c *CVDownloader) updateIssues() { return case c.downloadQueue <- issue: } + c.fileList = ch.Insert(c.fileList, fmt.Sprintf("cv-%v.json", offset)) log.Printf("Downloaded %s/cv-%v.json", c.JSONPath, offset) } } @@ -611,7 +633,6 @@ func DownloadCovers(c *CVDownloader) { if len(c.fileList) > 0 { c.totalResults = getOffset(c.fileList[len(c.fileList)-1]) } - c.totalResults += 100 log.Println("Number of pages", len(c.fileList), "Expected Pages:", c.totalResults/100) log.Println("Updating issues now")