Fix issues with reading cv files

This commit is contained in:
Timmy Welch 2025-01-11 15:26:35 -08:00
parent 8ce1ca3354
commit fe3f045c6e
2 changed files with 45 additions and 42 deletions

View File

@ -143,7 +143,15 @@ func (f *Storage) Set(s string) error {
type Encoder func(any) ([]byte, error) type Encoder func(any) ([]byte, error)
type Decoder func([]byte, interface{}) error type Decoder func([]byte, interface{}) error
type CVOpts struct {
downloadCovers bool
APIKey string
path string
thumbOnly bool
originalOnly bool
hashDownloaded bool
keepDownloaded bool
}
type Opts struct { type Opts struct {
cpuprofile string cpuprofile string
coverPath string coverPath string
@ -157,15 +165,7 @@ type Opts struct {
deleteHashedImages bool deleteHashedImages bool
path string path string
cv struct { cv CVOpts
downloadCovers bool
APIKey string
path string
thumbOnly bool
originalOnly bool
hashDownloaded bool
keepDownloaded bool
}
} }
func main() { func main() {

View File

@ -73,7 +73,7 @@ type CVDownloader struct {
Context context.Context Context context.Context
FinishedDownloadQueue chan Download FinishedDownloadQueue chan Download
fileList []fs.DirEntry fileList []string
totalResults int totalResults int
imageWG sync.WaitGroup imageWG sync.WaitGroup
downloadQueue chan *CVResult downloadQueue chan *CVResult
@ -90,11 +90,11 @@ var (
func (c *CVDownloader) readJson() ([]*CVResult, error) { func (c *CVDownloader) readJson() ([]*CVResult, error) {
var issues []*CVResult var issues []*CVResult
for _, file_entry := range c.fileList { for _, filename := range c.fileList {
if c.hasQuit() { if c.hasQuit() {
return nil, ErrQuit return nil, ErrQuit
} }
result, err := c.loadIssues(file_entry) result, err := c.loadIssues(filename)
if err != nil { if err != nil {
if err == ErrInvalidPage { if err == ErrInvalidPage {
continue continue
@ -107,9 +107,9 @@ func (c *CVDownloader) readJson() ([]*CVResult, error) {
} }
return issues, nil return issues, nil
} }
func (c *CVDownloader) loadIssues(file_entry fs.DirEntry) (*CVResult, error) { func (c *CVDownloader) loadIssues(filename string) (*CVResult, error) {
tmp := &CVResult{Results: make([]Issue, 0, 100)} tmp := &CVResult{Results: make([]Issue, 0, 100)}
file, err := os.Open(filepath.Join(c.JSONPath, file_entry.Name())) file, err := os.Open(filepath.Join(c.JSONPath, filename))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -122,7 +122,7 @@ func (c *CVDownloader) loadIssues(file_entry fs.DirEntry) (*CVResult, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
if getOffset(file_entry) != tmp.Offset { if getOffset(filename) != tmp.Offset {
return nil, ErrInvalidPage return nil, ErrInvalidPage
} }
return tmp, nil return tmp, nil
@ -138,8 +138,8 @@ func Get(ctx context.Context, url string) (*http.Response, error, func()) {
return resp, err, cancel return resp, err, cancel
} }
func getOffset(entry fs.DirEntry) int { func getOffset(name string) int {
i, _ := strconv.Atoi(entry.Name()[3 : len(entry.Name())-1-4]) i, _ := strconv.Atoi(name[3 : len(name)-1-4])
return i return i
} }
@ -181,12 +181,13 @@ func (c *CVDownloader) updateIssues() {
return failCount < 15 return failCount < 15
} }
for offset = 0; offset < c.totalResults; offset += 100 { for offset = 0; offset < c.totalResults; offset += 100 {
index := offset / 100
if c.hasQuit() { if c.hasQuit() {
return return
} }
if offset/100 < len(c.fileList) { if index < len(c.fileList) {
if getOffset(c.fileList[offset/100]) == offset { // If it's in order and it's not missing it should be here if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
if issue, err := c.loadIssues(c.fileList[offset/100]); err == nil && issue != nil { if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults) c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
prev = -1 prev = -1
failCount = 0 failCount = 0
@ -197,19 +198,19 @@ func (c *CVDownloader) updateIssues() {
} }
continue continue
} else { } else {
log.Println("Failed to read page at offset ", offset, err) log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[offset/100].Name())) os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, offset/100, (offset/100)+1) c.fileList = slices.Delete(c.fileList, index, index+1)
} }
} else {
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[index]))
} }
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[offset/100]))
} }
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a fs.DirEntry, b int) int { index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
ai, _ := strconv.Atoi(a.Name()[3 : len(a.Name())-1-4]) return cmp.Compare(getOffset(a), b)
return cmp.Compare(ai, b)
}) })
if found { if found {
if issue, err := c.loadIssues(c.fileList[index]); err != nil && issue != nil { if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
prev = -1 prev = -1
failCount = 0 failCount = 0
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns // When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
@ -219,8 +220,8 @@ func (c *CVDownloader) updateIssues() {
} }
continue continue
} else { } else {
log.Println("Failed to read page at offset ", offset, err) log.Println("Failed to read page at offset", offset, issue, err)
os.Remove(filepath.Join(c.JSONPath, c.fileList[index].Name())) os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
c.fileList = slices.Delete(c.fileList, index, (index)+1) c.fileList = slices.Delete(c.fileList, index, (index)+1)
} }
} }
@ -517,7 +518,7 @@ list:
list, err := c.loadIssues(jsonFile) list, err := c.loadIssues(jsonFile)
if err != nil { if err != nil {
indexesToRemove = append(indexesToRemove, i) indexesToRemove = append(indexesToRemove, i)
os.Remove(filepath.Join(c.JSONPath, jsonFile.Name())) os.Remove(filepath.Join(c.JSONPath, jsonFile))
continue continue
} }
for _, issue := range list.Results { for _, issue := range list.Results {
@ -527,7 +528,7 @@ list:
} }
if c.chdb.CheckURL(url) { if c.chdb.CheckURL(url) {
indexesToRemove = append(indexesToRemove, i) indexesToRemove = append(indexesToRemove, i)
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile.Name())); err != nil { if err := os.Remove(filepath.Join(c.JSONPath, jsonFile)); err != nil {
return err return err
} }
// We've removed the entire page, lets see if the new url works // We've removed the entire page, lets see if the new url works
@ -591,24 +592,26 @@ func DownloadCovers(c *CVDownloader) {
var ( var (
err error err error
) )
log.Println("Reading json")
os.MkdirAll(c.JSONPath, 0o777) os.MkdirAll(c.JSONPath, 0o777)
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep")) f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
f.Close() f.Close()
c.cleanDirs() if !c.KeepDownloadedImages {
c.fileList, err = os.ReadDir(c.JSONPath) log.Println("Cleaning directories")
c.cleanDirs()
}
log.Println("Reading json")
var d *os.File
d, err = os.Open(c.JSONPath)
c.fileList, err = d.Readdirnames(-1)
if err != nil { if err != nil {
panic(fmt.Errorf("Unable to open path for json files: %w", err)) panic(fmt.Errorf("Unable to open path for json files: %w", err))
} }
slices.SortFunc(c.fileList, func(x, y fs.DirEntry) int { slices.SortFunc(c.fileList, func(x, y string) int {
xi, _ := strconv.Atoi(x.Name()[3 : len(x.Name())-1-4]) return cmp.Compare(getOffset(x), getOffset(y))
yi, _ := strconv.Atoi(y.Name()[3 : len(y.Name())-1-4])
return cmp.Compare(xi, yi)
}) })
if len(c.fileList) > 0 { if len(c.fileList) > 0 {
last_file := c.fileList[len(c.fileList)-1].Name() c.totalResults = getOffset(c.fileList[len(c.fileList)-1])
c.totalResults, _ = strconv.Atoi(last_file[3 : len(last_file)-1-4])
} }
c.totalResults += 100 c.totalResults += 100
log.Println("Number of pages", len(c.fileList), "Expected Pages:", c.totalResults/100) log.Println("Number of pages", len(c.fileList), "Expected Pages:", c.totalResults/100)