Make deleting files efficient
This commit is contained in:
parent
7ede0dee72
commit
cc4e973bf9
100
CHDB.go
Normal file
100
CHDB.go
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
package ch
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
_ "modernc.org/sqlite"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CHDB struct {
|
||||||
|
comicvinePath string
|
||||||
|
sql *sql.DB
|
||||||
|
deleteExisting bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func OpenCHDB(path string, comicvinePath string, deleteExisting bool) (CHDB, error) {
|
||||||
|
path, _ = filepath.Abs(path)
|
||||||
|
err := os.MkdirAll(filepath.Dir(path), 0o755)
|
||||||
|
if err != nil {
|
||||||
|
panic("Unable to create directory " + filepath.Dir(path))
|
||||||
|
}
|
||||||
|
println(fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||||
|
sql, err := sql.Open("sqlite", fmt.Sprintf("file://%s?&_pragma=busy_timeout(500)&_pragma=journal_mode(wal)", path))
|
||||||
|
if err != nil {
|
||||||
|
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||||
|
}
|
||||||
|
err = sql.Ping()
|
||||||
|
if err != nil {
|
||||||
|
return CHDB{comicvinePath, sql, deleteExisting}, fmt.Errorf("Failed to open database: %w", err)
|
||||||
|
}
|
||||||
|
_, err = sql.Exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS paths(
|
||||||
|
path STRING PRIMARY KEY
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS bad_urls(
|
||||||
|
url STRING PRIMARY KEY
|
||||||
|
);
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("Failed to create table: %w", err)
|
||||||
|
}
|
||||||
|
return CHDB{comicvinePath, sql, deleteExisting}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) PathHashed(path string) bool {
|
||||||
|
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||||
|
dbPath := ""
|
||||||
|
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
|
||||||
|
|
||||||
|
if dbPath == path && s.deleteExisting {
|
||||||
|
os.Remove(filepath.Join(s.comicvinePath, path))
|
||||||
|
}
|
||||||
|
return dbPath == path
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) PathDownloaded(path string) bool {
|
||||||
|
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||||
|
dbPath := ""
|
||||||
|
_ = s.sql.QueryRow("SELECT path FROM paths where path=?", path).Scan(&dbPath)
|
||||||
|
if dbPath != path {
|
||||||
|
f, err := os.Open(filepath.Join(s.comicvinePath, path))
|
||||||
|
if err == nil {
|
||||||
|
defer f.Close()
|
||||||
|
}
|
||||||
|
return !os.IsNotExist(err)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) AddPath(path string) {
|
||||||
|
path, _ = filepath.Rel(s.comicvinePath, path)
|
||||||
|
_, err := s.sql.Exec("INSERT INTO paths VALUES(?) ON CONFLICT DO NOTHING", path)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(fmt.Errorf("Failed to insert %v into paths: %w", path, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.deleteExisting {
|
||||||
|
os.Remove(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) CheckURL(url string) bool {
|
||||||
|
dbURL := ""
|
||||||
|
_ = s.sql.QueryRow("SELECT url FROM bad_urls where url=?", url).Scan(&dbURL)
|
||||||
|
return dbURL == url
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) AddURL(url string) {
|
||||||
|
_, err := s.sql.Exec("INSERT INTO bad_urls VALUES(?) ON CONFLICT DO NOTHING", url)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(fmt.Errorf("Failed to insert %v into bad_urls: %w", url, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s CHDB) Close() error {
|
||||||
|
return s.sql.Close()
|
||||||
|
}
|
@ -145,7 +145,8 @@ type Opts struct {
|
|||||||
hashesPath string
|
hashesPath string
|
||||||
storageType Storage
|
storageType Storage
|
||||||
onlyHashNewIDs bool
|
onlyHashNewIDs bool
|
||||||
truncateHashedImages bool
|
deleteHashedImages bool
|
||||||
|
path string
|
||||||
|
|
||||||
cv struct {
|
cv struct {
|
||||||
downloadCovers bool
|
downloadCovers bool
|
||||||
@ -158,24 +159,32 @@ type Opts struct {
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
opts := Opts{format: Msgpack, storageType: BasicMap} // flag is weird
|
opts := Opts{format: Msgpack, storageType: BasicMap} // flag is weird
|
||||||
|
wd, err := os.Getwd()
|
||||||
|
fmt.Println(err)
|
||||||
|
if err != nil {
|
||||||
|
wd = "comic-hasher"
|
||||||
|
} else {
|
||||||
|
wd = filepath.Join(wd, "comic-hasher")
|
||||||
|
}
|
||||||
go func() {
|
go func() {
|
||||||
log.Println(http.ListenAndServe("localhost:6060", nil))
|
log.Println(http.ListenAndServe("localhost:6060", nil))
|
||||||
}()
|
}()
|
||||||
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
|
flag.StringVar(&opts.cpuprofile, "cpuprofile", "", "Write cpu profile to file")
|
||||||
|
|
||||||
|
flag.StringVar(&opts.path, "path", wd, "Path for comic-hasher to store files")
|
||||||
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
|
flag.StringVar(&opts.coverPath, "cover-path", "", "Path to local covers to add to hash database. Must be in the form '{cover-path}/{domain}/{id}/*' eg for --cover-path /covers it should look like /covers/comicvine.gamespot.com/10000/image.gif")
|
||||||
flag.StringVar(&opts.sqlitePath, "sqlite-path", "tmp.sqlite", "Path to sqlite database to use for matching hashes, substantialy reduces memory usage")
|
flag.StringVar(&opts.sqlitePath, "sqlite-path", "", fmt.Sprintf("Path to sqlite database to use for matching hashes, substantialy reduces memory usage (default %v)", filepath.Join(wd, "tmp.sqlite")))
|
||||||
flag.BoolVar(&opts.loadEmbeddedHashes, "use-embedded-hashes", true, "Use hashes embedded in the application as a starting point")
|
flag.BoolVar(&opts.loadEmbeddedHashes, "use-embedded-hashes", true, "Use hashes embedded in the application as a starting point")
|
||||||
flag.BoolVar(&opts.saveEmbeddedHashes, "save-embedded-hashes", false, "Save hashes even if we loaded the embedded hashes")
|
flag.BoolVar(&opts.saveEmbeddedHashes, "save-embedded-hashes", false, "Save hashes even if we loaded the embedded hashes")
|
||||||
flag.StringVar(&opts.hashesPath, "hashes", "hashes.gz", "Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option")
|
flag.StringVar(&opts.hashesPath, "hashes", "", fmt.Sprintf("Path to optionally gziped hashes in msgpack or json format. You must disable embedded hashes to use this option (default %v)", filepath.Join(wd, "hashes.gz")))
|
||||||
flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)")
|
flag.Var(&opts.format, "save-format", "Specify the format to export hashes to (json, msgpack)")
|
||||||
flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)")
|
flag.Var(&opts.storageType, "storage-type", "Specify the storage type used internally to search hashes (sqlite,sqlite3,map,basicmap,vptree)")
|
||||||
flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run)")
|
flag.BoolVar(&opts.onlyHashNewIDs, "only-hash-new-ids", true, "Only hashes new covers from CV/local path (Note: If there are multiple covers for the same ID they may get queued at the same time and hashed on the first run, implies -cv-thumb-only if -delete-hashed-images is set)")
|
||||||
flag.BoolVar(&opts.truncateHashedImages, "trucate-hashed-images", true, "Truncates downloaded images after hashing them, useful to save space, implies -only-hash-new-ids")
|
flag.BoolVar(&opts.deleteHashedImages, "delete-hashed-images", false, "Deletes downloaded images after hashing them, useful to save space, paths are recorded in ch.sqlite")
|
||||||
|
|
||||||
flag.BoolVar(&opts.cv.downloadCovers, "cv-dl-covers", false, "Downloads all covers from ComicVine and adds them to the server")
|
flag.BoolVar(&opts.cv.downloadCovers, "cv-dl-covers", false, "Downloads all covers from ComicVine and adds them to the server")
|
||||||
flag.StringVar(&opts.cv.APIKey, "cv-api-key", "", "API Key to use to access the ComicVine API")
|
flag.StringVar(&opts.cv.APIKey, "cv-api-key", "", "API Key to use to access the ComicVine API")
|
||||||
flag.StringVar(&opts.cv.path, "cv-path", "", "Path to store ComicVine data in")
|
flag.StringVar(&opts.cv.path, "cv-path", "", fmt.Sprintf("Path to store ComicVine data in (default %v)", filepath.Join(wd, "comicvine")))
|
||||||
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
|
flag.BoolVar(&opts.cv.thumbOnly, "cv-thumb-only", true, "Only downloads the thumbnail image from comicvine")
|
||||||
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
|
flag.BoolVar(&opts.cv.hashDownloaded, "cv-hash-downloaded", true, "Hash already downloaded images")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
@ -186,17 +195,28 @@ func main() {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opts.onlyHashNewIDs = opts.onlyHashNewIDs || opts.truncateHashedImages
|
// opts.onlyHashNewIDs = opts.onlyHashNewIDs || opts.deleteHashedImages
|
||||||
if opts.cv.downloadCovers {
|
if opts.cv.downloadCovers {
|
||||||
if opts.cv.APIKey == "" {
|
if opts.cv.APIKey == "" {
|
||||||
log.Fatal("No ComicVine API Key provided")
|
log.Fatal("No ComicVine API Key provided")
|
||||||
}
|
}
|
||||||
if opts.cv.path == "" {
|
|
||||||
log.Fatal("No path provided for ComicVine data")
|
|
||||||
}
|
}
|
||||||
|
opts.cv.thumbOnly = opts.cv.thumbOnly || (opts.onlyHashNewIDs && opts.deleteHashedImages)
|
||||||
|
opts.path, _ = filepath.Abs(opts.path)
|
||||||
|
if opts.hashesPath == "" {
|
||||||
|
opts.hashesPath = filepath.Join(opts.path, "hashes.gz")
|
||||||
|
}
|
||||||
|
opts.hashesPath, _ = filepath.Abs(opts.hashesPath)
|
||||||
|
if opts.sqlitePath == "" {
|
||||||
|
opts.sqlitePath = filepath.Join(opts.path, "tmp.sqlite")
|
||||||
}
|
}
|
||||||
opts.sqlitePath, _ = filepath.Abs(opts.sqlitePath)
|
opts.sqlitePath, _ = filepath.Abs(opts.sqlitePath)
|
||||||
log.Println(pretty.Formatter(opts))
|
if opts.cv.path == "" {
|
||||||
|
opts.cv.path = filepath.Join(opts.path, "comicvine")
|
||||||
|
}
|
||||||
|
opts.cv.path, _ = filepath.Abs(opts.cv.path)
|
||||||
|
pretty.Log(opts)
|
||||||
|
|
||||||
startServer(opts)
|
startServer(opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -505,7 +525,7 @@ func (s *Server) hasher(workerID int, done func(int)) {
|
|||||||
for image := range s.hashingQueue {
|
for image := range s.hashingQueue {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
|
if image.NewOnly && len(s.hashes.GetIDs(image.ID)) > 0 {
|
||||||
fmt.Println("skipping", image)
|
log.Printf("Skipping existing hash with ID: %s found", image.ID)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
hash := ch.HashImage(image)
|
hash := ch.HashImage(image)
|
||||||
@ -749,7 +769,7 @@ func saveHashes(opts Opts, encodeHashes func(format Format) ([]byte, error)) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func downloadProcessor(opts Opts, imagePaths chan cv.Download, server Server) {
|
func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, server Server) {
|
||||||
defer func() {
|
defer func() {
|
||||||
log.Println("Download Processor completed")
|
log.Println("Download Processor completed")
|
||||||
}()
|
}()
|
||||||
@ -759,23 +779,22 @@ func downloadProcessor(opts Opts, imagePaths chan cv.Download, server Server) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
file, err := os.OpenFile(path.Dest, os.O_RDWR|os.O_CREATE, 0666)
|
if chdb.PathHashed(path.Dest) {
|
||||||
|
// log.Println(path.Dest, "File has already been hashed, it may not be saved in the hashes file because we currently don't save any hashes if we've crashed")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
file, err := os.OpenFile(path.Dest, os.O_RDWR, 0666)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
i, format, err := image.Decode(bufio.NewReader(file))
|
i, format, err := image.Decode(bufio.NewReader(file))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
file.Close()
|
file.Close()
|
||||||
|
log.Println("Reading image failed", path.Dest)
|
||||||
continue // skip this image
|
continue // skip this image
|
||||||
}
|
}
|
||||||
if opts.truncateHashedImages {
|
|
||||||
file.Seek(0, io.SeekStart)
|
|
||||||
err = file.Truncate(0)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Failed to truncate %#v: %v", path.Dest, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
file.Close()
|
file.Close()
|
||||||
|
chdb.AddPath(path.Dest) // Add to sqlite db and remove file if opts.deleteHashedImages is true
|
||||||
|
|
||||||
im := ch.Im{
|
im := ch.Im{
|
||||||
Im: i,
|
Im: i,
|
||||||
@ -788,7 +807,7 @@ func downloadProcessor(opts Opts, imagePaths chan cv.Download, server Server) {
|
|||||||
log.Println("Recieved quit")
|
log.Println("Recieved quit")
|
||||||
return
|
return
|
||||||
case server.hashingQueue <- im:
|
case server.hashingQueue <- im:
|
||||||
log.Println("Sending:", im)
|
// log.Println("Sending:", im)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -858,11 +877,15 @@ func startServer(opts Opts) {
|
|||||||
loadHashes(opts, server.DecodeHashes)
|
loadHashes(opts, server.DecodeHashes)
|
||||||
|
|
||||||
server.HashLocalImages(opts)
|
server.HashLocalImages(opts)
|
||||||
|
chdb, err := ch.OpenCHDB(filepath.Join(opts.path, "ch.sqlite"), opts.cv.path, opts.deleteHashedImages)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
log.Println("Init downloaders")
|
log.Println("Init downloaders")
|
||||||
dwg := sync.WaitGroup{}
|
dwg := sync.WaitGroup{}
|
||||||
finishedDownloadQueue := make(chan cv.Download)
|
finishedDownloadQueue := make(chan cv.Download)
|
||||||
go downloadProcessor(opts, finishedDownloadQueue, server)
|
go downloadProcessor(chdb, opts, finishedDownloadQueue, server)
|
||||||
|
|
||||||
if opts.cv.downloadCovers {
|
if opts.cv.downloadCovers {
|
||||||
dwg.Add(1)
|
dwg.Add(1)
|
||||||
@ -870,7 +893,7 @@ func startServer(opts Opts) {
|
|||||||
if opts.cv.thumbOnly {
|
if opts.cv.thumbOnly {
|
||||||
imageTypes = append(imageTypes, "thumb_url")
|
imageTypes = append(imageTypes, "thumb_url")
|
||||||
}
|
}
|
||||||
cvdownloader := cv.NewCVDownloader(server.Context, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.hashDownloaded, finishedDownloadQueue)
|
cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.hashDownloaded, finishedDownloadQueue)
|
||||||
go func() {
|
go func() {
|
||||||
defer dwg.Done()
|
defer dwg.Done()
|
||||||
cv.DownloadCovers(cvdownloader)
|
cv.DownloadCovers(cvdownloader)
|
||||||
@ -921,6 +944,7 @@ func startServer(opts Opts) {
|
|||||||
close(finishedDownloadQueue)
|
close(finishedDownloadQueue)
|
||||||
for range finishedDownloadQueue {
|
for range finishedDownloadQueue {
|
||||||
}
|
}
|
||||||
|
_ = chdb.Close()
|
||||||
|
|
||||||
// server.EncodeHashes would normally need a read lock
|
// server.EncodeHashes would normally need a read lock
|
||||||
// the server has been stopped so it's not needed here
|
// the server has been stopped so it's not needed here
|
||||||
|
95
cv/cv.go
95
cv/cv.go
@ -21,6 +21,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"slices"
|
"slices"
|
||||||
|
|
||||||
|
ch "gitea.narnian.us/lordwelch/comic-hasher"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Download struct {
|
type Download struct {
|
||||||
@ -70,12 +72,11 @@ type CVDownloader struct {
|
|||||||
|
|
||||||
fileList []fs.DirEntry
|
fileList []fs.DirEntry
|
||||||
totalResults int
|
totalResults int
|
||||||
badURLs []string
|
|
||||||
bMut sync.Mutex
|
|
||||||
imageWG sync.WaitGroup
|
imageWG sync.WaitGroup
|
||||||
downloadQueue chan *CVResult
|
downloadQueue chan *CVResult
|
||||||
imageDownloads chan download
|
imageDownloads chan download
|
||||||
notFound chan download
|
notFound chan download
|
||||||
|
chdb ch.CHDB
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -83,28 +84,6 @@ var (
|
|||||||
ErrInvalidPage = errors.New("Invalid ComicVine Page")
|
ErrInvalidPage = errors.New("Invalid ComicVine Page")
|
||||||
)
|
)
|
||||||
|
|
||||||
func (c *CVDownloader) InsertBadURL(url string) {
|
|
||||||
c.bMut.Lock()
|
|
||||||
defer c.bMut.Unlock()
|
|
||||||
index, itemFound := slices.BinarySearch(c.badURLs, url)
|
|
||||||
if itemFound {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.badURLs = slices.Insert(c.badURLs, index, url)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *CVDownloader) InsertBadURLs(url ...string) {
|
|
||||||
c.bMut.Lock()
|
|
||||||
defer c.bMut.Unlock()
|
|
||||||
c.badURLs = append(c.badURLs, url...)
|
|
||||||
slices.Sort(c.badURLs)
|
|
||||||
}
|
|
||||||
func (c *CVDownloader) IsBadURL(url string) bool {
|
|
||||||
c.bMut.Lock()
|
|
||||||
defer c.bMut.Unlock()
|
|
||||||
_, itemFound := slices.BinarySearch(c.badURLs, url)
|
|
||||||
return itemFound
|
|
||||||
}
|
|
||||||
func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
||||||
var issues []*CVResult
|
var issues []*CVResult
|
||||||
for _, file_entry := range c.fileList {
|
for _, file_entry := range c.fileList {
|
||||||
@ -272,7 +251,6 @@ func (c *CVDownloader) updateIssues() {
|
|||||||
}
|
}
|
||||||
resp, err, cancelDownloadCTX := Get(c.Context, URI.String())
|
resp, err, cancelDownloadCTX := Get(c.Context, URI.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
_ = resp.Body.Close()
|
|
||||||
cancelDownloadCTX()
|
cancelDownloadCTX()
|
||||||
if retry(URI.String(), err) {
|
if retry(URI.String(), err) {
|
||||||
continue
|
continue
|
||||||
@ -338,6 +316,7 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
for i := range 5 {
|
for i := range 5 {
|
||||||
go func() {
|
go func() {
|
||||||
log.Println("starting downloader", i)
|
log.Println("starting downloader", i)
|
||||||
|
dir_created := make(map[string]bool)
|
||||||
for dl := range c.imageDownloads {
|
for dl := range c.imageDownloads {
|
||||||
if c.hasQuit() {
|
if c.hasQuit() {
|
||||||
c.imageWG.Done()
|
c.imageWG.Done()
|
||||||
@ -358,10 +337,11 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
dir := filepath.Dir(dl.dest)
|
||||||
resp, err, cancelDownload := Get(c.Context, dl.url)
|
resp, err, cancelDownload := Get(c.Context, dl.url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancelDownload()
|
cancelDownload()
|
||||||
log.Println("Failed to download", dl.url, err)
|
log.Println("Failed to download", dl.volumeID, "/", dl.issueID, dl.url, err)
|
||||||
c.imageWG.Done()
|
c.imageWG.Done()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -381,6 +361,10 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
cleanup()
|
cleanup()
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !dir_created[dir] {
|
||||||
|
_ = os.MkdirAll(dir, 0o755)
|
||||||
|
dir_created[dir] = true
|
||||||
|
}
|
||||||
image, err := os.Create(dl.dest)
|
image, err := os.Create(dl.dest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println("Unable to create image file", dl.dest, err)
|
log.Println("Unable to create image file", dl.dest, err)
|
||||||
@ -408,42 +392,10 @@ func (c *CVDownloader) start_downloader() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *CVDownloader) loadBadURLs(path string) error {
|
|
||||||
bad_urls_file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("Unable to read bad_urls: %w", err)
|
|
||||||
}
|
|
||||||
bad_urls_bytes, err := io.ReadAll(bad_urls_file)
|
|
||||||
bad_urls_file.Close()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("Unable to read bad_urls: %w", err)
|
|
||||||
}
|
|
||||||
c.bMut.Lock()
|
|
||||||
c.badURLs = strings.Split(string(bad_urls_bytes), "\n")
|
|
||||||
c.bMut.Unlock()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *CVDownloader) handleNotFound() {
|
func (c *CVDownloader) handleNotFound() {
|
||||||
err := c.loadBadURLs("bad_urls")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
file, err := os.OpenFile("bad_urls", os.O_RDWR|os.O_CREATE, 0666)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
_, err = file.Seek(0, io.SeekEnd)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
for failedDownload := range c.notFound {
|
for failedDownload := range c.notFound {
|
||||||
c.InsertBadURL(failedDownload.url)
|
c.chdb.AddURL(failedDownload.url)
|
||||||
log.Printf("Not found: volumeID: %d issueID: %d Offset: %d URL: %s\n", failedDownload.volumeID, failedDownload.issueID, failedDownload.offset, failedDownload.url)
|
log.Printf("Not found: volumeID: %d issueID: %d Offset: %d URL: %s\n", failedDownload.volumeID, failedDownload.issueID, failedDownload.offset, failedDownload.url)
|
||||||
file.Write([]byte(failedDownload.url))
|
|
||||||
file.Write([]byte("\n"))
|
|
||||||
file.Sync()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -456,7 +408,6 @@ func (c *CVDownloader) downloadImages() {
|
|||||||
|
|
||||||
go c.handleNotFound()
|
go c.handleNotFound()
|
||||||
added := 0
|
added := 0
|
||||||
dir_created := make(map[string]bool)
|
|
||||||
for list := range c.downloadQueue {
|
for list := range c.downloadQueue {
|
||||||
log.Printf("Checking downloads at offset %v\r", list.Offset)
|
log.Printf("Checking downloads at offset %v\r", list.Offset)
|
||||||
for _, issue := range list.Results {
|
for _, issue := range list.Results {
|
||||||
@ -472,7 +423,7 @@ func (c *CVDownloader) downloadImages() {
|
|||||||
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
|
if len(c.ImageTypes) > 0 && !slices.Contains(c.ImageTypes, image.name) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if c.IsBadURL(image.url) {
|
if c.chdb.CheckURL(image.url) {
|
||||||
log.Printf("Skipping known bad url %s", image.url)
|
log.Printf("Skipping known bad url %s", image.url)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -491,13 +442,13 @@ func (c *CVDownloader) downloadImages() {
|
|||||||
if ext == "" || (len(ext) > 4 && !slices.Contains([]string{".avif", ".webp", ".tiff", ".heif"}, ext)) {
|
if ext == "" || (len(ext) > 4 && !slices.Contains([]string{".avif", ".webp", ".tiff", ".heif"}, ext)) {
|
||||||
ext = ".jpg"
|
ext = ".jpg"
|
||||||
}
|
}
|
||||||
path := filepath.Join(c.ImagePath, strconv.Itoa(issue.Volume.ID), strconv.Itoa(issue.ID), image.name+ext)
|
dir := filepath.Join(c.ImagePath, strconv.Itoa(issue.Volume.ID), strconv.Itoa(issue.ID))
|
||||||
|
path := filepath.Join(dir, image.name+ext)
|
||||||
|
|
||||||
image_file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
|
if c.chdb.PathDownloaded(path) {
|
||||||
if errors.Is(err, os.ErrExist) {
|
if _, err = os.Stat(path); c.SendExistingImages && err == nil {
|
||||||
if c.SendExistingImages {
|
|
||||||
// We don't add to the count of added as these should be processed immediately
|
// We don't add to the count of added as these should be processed immediately
|
||||||
|
log.Printf("Sending Existing image %v/%v %v", issue.Volume.ID, issue.ID, path)
|
||||||
c.imageWG.Add(1)
|
c.imageWG.Add(1)
|
||||||
c.imageDownloads <- download{
|
c.imageDownloads <- download{
|
||||||
url: image.url,
|
url: image.url,
|
||||||
@ -510,13 +461,8 @@ func (c *CVDownloader) downloadImages() {
|
|||||||
}
|
}
|
||||||
continue // If it exists assume it is fine, adding some basic verification might be a good idea later
|
continue // If it exists assume it is fine, adding some basic verification might be a good idea later
|
||||||
}
|
}
|
||||||
dir := filepath.Join(c.ImagePath, strconv.Itoa(issue.Volume.ID), strconv.Itoa(issue.ID))
|
|
||||||
if !dir_created[dir] {
|
|
||||||
os.MkdirAll(dir, 0o777)
|
|
||||||
dir_created[dir] = true
|
|
||||||
}
|
|
||||||
added++
|
added++
|
||||||
image_file.Close()
|
|
||||||
c.imageWG.Add(1)
|
c.imageWG.Add(1)
|
||||||
c.imageDownloads <- download{
|
c.imageDownloads <- download{
|
||||||
url: image.url,
|
url: image.url,
|
||||||
@ -564,7 +510,7 @@ list:
|
|||||||
if c.hasQuit() {
|
if c.hasQuit() {
|
||||||
return ErrQuit
|
return ErrQuit
|
||||||
}
|
}
|
||||||
if c.IsBadURL(url) {
|
if c.chdb.CheckURL(url) {
|
||||||
indexesToRemove = append(indexesToRemove, i)
|
indexesToRemove = append(indexesToRemove, i)
|
||||||
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile.Name())); err != nil {
|
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile.Name())); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -591,7 +537,7 @@ func (c *CVDownloader) hasQuit() bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewCVDownloader(ctx context.Context, workPath, APIKey string, imageTypes []string, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader {
|
||||||
return &CVDownloader{
|
return &CVDownloader{
|
||||||
Context: ctx,
|
Context: ctx,
|
||||||
JSONPath: filepath.Join(workPath, "_json"),
|
JSONPath: filepath.Join(workPath, "_json"),
|
||||||
@ -603,6 +549,7 @@ func NewCVDownloader(ctx context.Context, workPath, APIKey string, imageTypes []
|
|||||||
FinishedDownloadQueue: finishedDownloadQueue,
|
FinishedDownloadQueue: finishedDownloadQueue,
|
||||||
SendExistingImages: sendExistingImages,
|
SendExistingImages: sendExistingImages,
|
||||||
ImageTypes: imageTypes,
|
ImageTypes: imageTypes,
|
||||||
|
chdb: chdb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user