From 033c68593bf65e330a2965d70988ad8e0a242bc7 Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Thu, 9 Jan 2025 02:07:36 -0800 Subject: [PATCH] Implement a buffer pool Downloads in quick succession could cause an OOM when the GC couldn't keep up --- cmd/comic-hasher/main.go | 21 ++++++++++++++++++--- cv/cv.go | 19 ++++++++++++------- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/cmd/comic-hasher/main.go b/cmd/comic-hasher/main.go index dba3474..300bd2a 100644 --- a/cmd/comic-hasher/main.go +++ b/cmd/comic-hasher/main.go @@ -74,6 +74,15 @@ var formatValues = map[string]Format{ "msgpack": Msgpack, } +var bufPool = &sync.Pool{ + New: func() any { + // The Pool's New function should generally only return pointer + // types, since a pointer can be put into the return interface + // value without an allocation: + return new(bytes.Buffer) + }, +} + func (f Format) String() string { if name, known := formatNames[f]; known { return name @@ -799,15 +808,21 @@ func downloadProcessor(chdb ch.CHDB, opts Opts, imagePaths chan cv.Download, ser panic(err) } } else { - file = io.NopCloser(bytes.NewBuffer(path.Image)) + file = io.NopCloser(path.Image) } i, format, err := image.Decode(bufio.NewReader(file)) if err != nil { file.Close() - log.Println("Reading image failed", path.Dest) + log.Println("Reading image failed", path.Dest, err) + if path.Image != nil { + bufPool.Put(path.Image) + } continue // skip this image } file.Close() + if path.Image != nil { + bufPool.Put(path.Image) + } chdb.AddPath(path.Dest) // Add to sqlite db and remove file if opts.deleteHashedImages is true im := ch.Im{ @@ -906,7 +921,7 @@ func startServer(opts Opts) { if opts.cv.thumbOnly { imageTypes = append(imageTypes, "thumb_url") } - cvdownloader := cv.NewCVDownloader(server.Context, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue) + cvdownloader := cv.NewCVDownloader(server.Context, bufPool, chdb, opts.cv.path, opts.cv.APIKey, imageTypes, opts.cv.keepDownloaded, opts.cv.hashDownloaded, finishedDownloadQueue) go func() { defer dwg.Done() cv.DownloadCovers(cvdownloader) diff --git a/cv/cv.go b/cv/cv.go index 863dd55..85f2841 100644 --- a/cv/cv.go +++ b/cv/cv.go @@ -30,7 +30,7 @@ type Download struct { URL string Dest string IssueID string - Image []byte + Image *bytes.Buffer } type Issue struct { @@ -80,6 +80,7 @@ type CVDownloader struct { imageDownloads chan download notFound chan download chdb ch.CHDB + bufPool *sync.Pool } var ( @@ -375,13 +376,16 @@ func (c *CVDownloader) start_downloader() { } } else { - image := &bytes.Buffer{} + image := c.bufPool.Get().(*bytes.Buffer) log.Println("downloading", dl.dest) _, err = io.Copy(image, resp.Body) if err != nil { log.Println("Failed when downloading image", err) cleanup() os.Remove(dl.dest) + if image != nil { + c.bufPool.Put(image) + } continue } @@ -389,7 +393,7 @@ func (c *CVDownloader) start_downloader() { URL: dl.url, Dest: dl.dest, IssueID: strconv.Itoa(dl.issueID), - Image: image.Bytes(), + Image: image, } } cleanup() @@ -564,15 +568,16 @@ func (c *CVDownloader) cleanDirs() { }) } -func NewCVDownloader(ctx context.Context, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader { +func NewCVDownloader(ctx context.Context, bufPool *sync.Pool, chdb ch.CHDB, workPath, APIKey string, imageTypes []string, keepDownloadedImages, sendExistingImages bool, finishedDownloadQueue chan Download) *CVDownloader { return &CVDownloader{ Context: ctx, JSONPath: filepath.Join(workPath, "_json"), ImagePath: filepath.Join(workPath, "_image"), APIKey: APIKey, - downloadQueue: make(chan *CVResult, 1000), - imageDownloads: make(chan download, 250), - notFound: make(chan download, 100), + downloadQueue: make(chan *CVResult, 1000), // This is just json it shouldn't take up much more than 122 MB + imageDownloads: make(chan download, 250), // These are just URLs should only take a few MB + notFound: make(chan download, 100), // Same here + bufPool: bufPool, // Only used if keepDownloadedImages is false to save space on byte buffers. The buffers get sent back via finishedDownloadQueue FinishedDownloadQueue: finishedDownloadQueue, SendExistingImages: sendExistingImages, KeepDownloadedImages: keepDownloadedImages,