Fix issues with reading cv files
This commit is contained in:
parent
8ce1ca3354
commit
fe3f045c6e
@ -143,7 +143,15 @@ func (f *Storage) Set(s string) error {
|
|||||||
|
|
||||||
type Encoder func(any) ([]byte, error)
|
type Encoder func(any) ([]byte, error)
|
||||||
type Decoder func([]byte, interface{}) error
|
type Decoder func([]byte, interface{}) error
|
||||||
|
type CVOpts struct {
|
||||||
|
downloadCovers bool
|
||||||
|
APIKey string
|
||||||
|
path string
|
||||||
|
thumbOnly bool
|
||||||
|
originalOnly bool
|
||||||
|
hashDownloaded bool
|
||||||
|
keepDownloaded bool
|
||||||
|
}
|
||||||
type Opts struct {
|
type Opts struct {
|
||||||
cpuprofile string
|
cpuprofile string
|
||||||
coverPath string
|
coverPath string
|
||||||
@ -157,15 +165,7 @@ type Opts struct {
|
|||||||
deleteHashedImages bool
|
deleteHashedImages bool
|
||||||
path string
|
path string
|
||||||
|
|
||||||
cv struct {
|
cv CVOpts
|
||||||
downloadCovers bool
|
|
||||||
APIKey string
|
|
||||||
path string
|
|
||||||
thumbOnly bool
|
|
||||||
originalOnly bool
|
|
||||||
hashDownloaded bool
|
|
||||||
keepDownloaded bool
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
67
cv/cv.go
67
cv/cv.go
@ -73,7 +73,7 @@ type CVDownloader struct {
|
|||||||
Context context.Context
|
Context context.Context
|
||||||
FinishedDownloadQueue chan Download
|
FinishedDownloadQueue chan Download
|
||||||
|
|
||||||
fileList []fs.DirEntry
|
fileList []string
|
||||||
totalResults int
|
totalResults int
|
||||||
imageWG sync.WaitGroup
|
imageWG sync.WaitGroup
|
||||||
downloadQueue chan *CVResult
|
downloadQueue chan *CVResult
|
||||||
@ -90,11 +90,11 @@ var (
|
|||||||
|
|
||||||
func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
||||||
var issues []*CVResult
|
var issues []*CVResult
|
||||||
for _, file_entry := range c.fileList {
|
for _, filename := range c.fileList {
|
||||||
if c.hasQuit() {
|
if c.hasQuit() {
|
||||||
return nil, ErrQuit
|
return nil, ErrQuit
|
||||||
}
|
}
|
||||||
result, err := c.loadIssues(file_entry)
|
result, err := c.loadIssues(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == ErrInvalidPage {
|
if err == ErrInvalidPage {
|
||||||
continue
|
continue
|
||||||
@ -107,9 +107,9 @@ func (c *CVDownloader) readJson() ([]*CVResult, error) {
|
|||||||
}
|
}
|
||||||
return issues, nil
|
return issues, nil
|
||||||
}
|
}
|
||||||
func (c *CVDownloader) loadIssues(file_entry fs.DirEntry) (*CVResult, error) {
|
func (c *CVDownloader) loadIssues(filename string) (*CVResult, error) {
|
||||||
tmp := &CVResult{Results: make([]Issue, 0, 100)}
|
tmp := &CVResult{Results: make([]Issue, 0, 100)}
|
||||||
file, err := os.Open(filepath.Join(c.JSONPath, file_entry.Name()))
|
file, err := os.Open(filepath.Join(c.JSONPath, filename))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -122,7 +122,7 @@ func (c *CVDownloader) loadIssues(file_entry fs.DirEntry) (*CVResult, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if getOffset(file_entry) != tmp.Offset {
|
if getOffset(filename) != tmp.Offset {
|
||||||
return nil, ErrInvalidPage
|
return nil, ErrInvalidPage
|
||||||
}
|
}
|
||||||
return tmp, nil
|
return tmp, nil
|
||||||
@ -138,8 +138,8 @@ func Get(ctx context.Context, url string) (*http.Response, error, func()) {
|
|||||||
return resp, err, cancel
|
return resp, err, cancel
|
||||||
}
|
}
|
||||||
|
|
||||||
func getOffset(entry fs.DirEntry) int {
|
func getOffset(name string) int {
|
||||||
i, _ := strconv.Atoi(entry.Name()[3 : len(entry.Name())-1-4])
|
i, _ := strconv.Atoi(name[3 : len(name)-1-4])
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,12 +181,13 @@ func (c *CVDownloader) updateIssues() {
|
|||||||
return failCount < 15
|
return failCount < 15
|
||||||
}
|
}
|
||||||
for offset = 0; offset < c.totalResults; offset += 100 {
|
for offset = 0; offset < c.totalResults; offset += 100 {
|
||||||
|
index := offset / 100
|
||||||
if c.hasQuit() {
|
if c.hasQuit() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if offset/100 < len(c.fileList) {
|
if index < len(c.fileList) {
|
||||||
if getOffset(c.fileList[offset/100]) == offset { // If it's in order and it's not missing it should be here
|
if getOffset(c.fileList[index]) == offset { // If it's in order and it's not missing it should be here
|
||||||
if issue, err := c.loadIssues(c.fileList[offset/100]); err == nil && issue != nil {
|
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
|
||||||
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
|
c.totalResults = max(c.totalResults, issue.NumberOfTotalResults)
|
||||||
prev = -1
|
prev = -1
|
||||||
failCount = 0
|
failCount = 0
|
||||||
@ -197,19 +198,19 @@ func (c *CVDownloader) updateIssues() {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
log.Println("Failed to read page at offset ", offset, err)
|
log.Println("Failed to read page at offset", offset, issue, err)
|
||||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[offset/100].Name()))
|
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||||
c.fileList = slices.Delete(c.fileList, offset/100, (offset/100)+1)
|
c.fileList = slices.Delete(c.fileList, index, index+1)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[index]))
|
||||||
}
|
}
|
||||||
log.Printf("Expected Offset %d got Offset %d", offset, getOffset(c.fileList[offset/100]))
|
|
||||||
}
|
}
|
||||||
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a fs.DirEntry, b int) int {
|
index, found := slices.BinarySearchFunc(c.fileList, offset, func(a string, b int) int {
|
||||||
ai, _ := strconv.Atoi(a.Name()[3 : len(a.Name())-1-4])
|
return cmp.Compare(getOffset(a), b)
|
||||||
return cmp.Compare(ai, b)
|
|
||||||
})
|
})
|
||||||
if found {
|
if found {
|
||||||
if issue, err := c.loadIssues(c.fileList[index]); err != nil && issue != nil {
|
if issue, err := c.loadIssues(c.fileList[index]); err == nil && issue != nil {
|
||||||
prev = -1
|
prev = -1
|
||||||
failCount = 0
|
failCount = 0
|
||||||
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
|
// When canceled one of these will randomly be chosen, c.downloadQueue won't be closed until after this function returns
|
||||||
@ -219,8 +220,8 @@ func (c *CVDownloader) updateIssues() {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
log.Println("Failed to read page at offset ", offset, err)
|
log.Println("Failed to read page at offset", offset, issue, err)
|
||||||
os.Remove(filepath.Join(c.JSONPath, c.fileList[index].Name()))
|
os.Remove(filepath.Join(c.JSONPath, c.fileList[index]))
|
||||||
c.fileList = slices.Delete(c.fileList, index, (index)+1)
|
c.fileList = slices.Delete(c.fileList, index, (index)+1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -517,7 +518,7 @@ list:
|
|||||||
list, err := c.loadIssues(jsonFile)
|
list, err := c.loadIssues(jsonFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
indexesToRemove = append(indexesToRemove, i)
|
indexesToRemove = append(indexesToRemove, i)
|
||||||
os.Remove(filepath.Join(c.JSONPath, jsonFile.Name()))
|
os.Remove(filepath.Join(c.JSONPath, jsonFile))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, issue := range list.Results {
|
for _, issue := range list.Results {
|
||||||
@ -527,7 +528,7 @@ list:
|
|||||||
}
|
}
|
||||||
if c.chdb.CheckURL(url) {
|
if c.chdb.CheckURL(url) {
|
||||||
indexesToRemove = append(indexesToRemove, i)
|
indexesToRemove = append(indexesToRemove, i)
|
||||||
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile.Name())); err != nil {
|
if err := os.Remove(filepath.Join(c.JSONPath, jsonFile)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// We've removed the entire page, lets see if the new url works
|
// We've removed the entire page, lets see if the new url works
|
||||||
@ -591,24 +592,26 @@ func DownloadCovers(c *CVDownloader) {
|
|||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
)
|
)
|
||||||
log.Println("Reading json")
|
|
||||||
os.MkdirAll(c.JSONPath, 0o777)
|
os.MkdirAll(c.JSONPath, 0o777)
|
||||||
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
|
f, _ := os.Create(filepath.Join(c.ImagePath, ".keep"))
|
||||||
f.Close()
|
f.Close()
|
||||||
c.cleanDirs()
|
if !c.KeepDownloadedImages {
|
||||||
c.fileList, err = os.ReadDir(c.JSONPath)
|
log.Println("Cleaning directories")
|
||||||
|
c.cleanDirs()
|
||||||
|
}
|
||||||
|
log.Println("Reading json")
|
||||||
|
var d *os.File
|
||||||
|
d, err = os.Open(c.JSONPath)
|
||||||
|
c.fileList, err = d.Readdirnames(-1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("Unable to open path for json files: %w", err))
|
panic(fmt.Errorf("Unable to open path for json files: %w", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
slices.SortFunc(c.fileList, func(x, y fs.DirEntry) int {
|
slices.SortFunc(c.fileList, func(x, y string) int {
|
||||||
xi, _ := strconv.Atoi(x.Name()[3 : len(x.Name())-1-4])
|
return cmp.Compare(getOffset(x), getOffset(y))
|
||||||
yi, _ := strconv.Atoi(y.Name()[3 : len(y.Name())-1-4])
|
|
||||||
return cmp.Compare(xi, yi)
|
|
||||||
})
|
})
|
||||||
if len(c.fileList) > 0 {
|
if len(c.fileList) > 0 {
|
||||||
last_file := c.fileList[len(c.fileList)-1].Name()
|
c.totalResults = getOffset(c.fileList[len(c.fileList)-1])
|
||||||
c.totalResults, _ = strconv.Atoi(last_file[3 : len(last_file)-1-4])
|
|
||||||
}
|
}
|
||||||
c.totalResults += 100
|
c.totalResults += 100
|
||||||
log.Println("Number of pages", len(c.fileList), "Expected Pages:", c.totalResults/100)
|
log.Println("Number of pages", len(c.fileList), "Expected Pages:", c.totalResults/100)
|
||||||
|
Loading…
Reference in New Issue
Block a user