diff --git a/cmd.go b/cmd.go index 09ee9ef..dcf92f8 100644 --- a/cmd.go +++ b/cmd.go @@ -1,4 +1,5 @@ //go:build !gokrazy + package main import ( @@ -14,7 +15,12 @@ func main() { os.Exit(0) } - createDirs() - - downlod() + createDirs() + lastUpdate := getLatestDate() + if shouldUpdate(lastUpdate) { + download(lastUpdate) + log.Println("update complete, waiting 1h to check again") + } else { + log.Println("It's too early, waiting 1h to check again") + } } diff --git a/gokrazy.go b/gokrazy.go index 62ad6f7..279a7b2 100644 --- a/gokrazy.go +++ b/gokrazy.go @@ -1,4 +1,5 @@ //go:build gokrazy + package main import ( @@ -7,12 +8,10 @@ import ( "os" "time" - "github.com/gokrazy/gokrazy" "github.com/antelman107/net-wait-go/wait" + "github.com/gokrazy/gokrazy" ) - - func main() { gokrazy.WaitForClock() if !wait.New(wait.WithDeadline(time.Minute), wait.WithWait(time.Second), wait.WithBreak(time.Second)).Do([]string{"www.comics.org:443"}) { @@ -26,8 +25,9 @@ func main() { } for { createDirs() - if shouldUpdate() { - downlod() + lastUpdate := getLatestDate() + if shouldUpdate(lastUpdate) { + download(lastUpdate) log.Println("update complete, waiting 1h to check again") } else { log.Println("It's too early, waiting 1h to check again") diff --git a/main.go b/main.go index 9fc4545..be1ee4c 100644 --- a/main.go +++ b/main.go @@ -3,8 +3,10 @@ package main import ( "archive/zip" "context" + "errors" "flag" "io" + "io/ioutil" "log" "net/http" "net/http/cookiejar" @@ -31,11 +33,18 @@ func Fatal(a ...any) { os.Exit(125) } -func getForm(selector string, resp *http.Response) (action string, form url.Values) { - var body io.Reader = resp.Body +func getReaderFromHTTP(resp *http.Response) io.ReadCloser { + var body io.ReadCloser = resp.Body if resp.Header.Get("Content-Encoding") == "br" { - body = brotli.NewReader(resp.Body) + body = struct { + io.Reader + io.Closer + }{brotli.NewReader(resp.Body), resp.Body} } + return body +} + +func getForm(selector string, body io.Reader) (action string, form url.Values) { doc, err := goquery.NewDocumentFromReader(body) if err != nil { Fatal(err) @@ -52,6 +61,23 @@ func getForm(selector string, resp *http.Response) (action string, form url.Valu return action, form } +func getDate(body io.Reader) (time.Time, error) { + doc, err := goquery.NewDocumentFromReader(body) + if err != nil { + Fatal(err) + } + + if sel := doc.Find("#sizing_base > div.body_content > ul:nth-child(13) > li:nth-child(3) > span"); sel != nil && len(sel.Nodes) == 1 { + date, err := time.Parse("2006-01-02 15:04:05", sel.Text()) + if err != nil { + return date, err + } + return time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, date.Location()), nil + } + Fatal(doc.Html()) + return time.UnixMicro(0), errors.New("Failed to find date") +} + func (cl *client) do(ctx context.Context, method string, url string, body io.Reader) (*http.Response, error) { req, _ := http.NewRequestWithContext(ctx, method, url, body) req.Header.Set("Referer", cl.referrer) @@ -89,9 +115,9 @@ func Redirections(resp *http.Response) (history []*http.Request) { func printResp(resp *http.Response) { for _, req := range Redirections(resp) { - log.Println("URL:", req.URL) - pretty.Println("Sent Headers:", req.Header) -} + log.Println("URL:", req.URL) + pretty.Println("Sent Headers:", req.Header) + } log.Println("Status:", resp.Status) pretty.Println(resp.Header) @@ -99,13 +125,14 @@ func printResp(resp *http.Response) { } var ( - sqlite_filename string + sqlite_filename string sqlite_br_filename string - destination *string = flag.String("destination", "/perm/www/items", "destination directory for sqlite files") - workDir *string = flag.String("work", "/perm/workDir", "working directory for sqlite files") + destination *string = flag.String("destination", "/perm/www/items", "destination directory for sqlite files") + workDir *string = flag.String("work", "/perm/workDir", "working directory for sqlite files") ) -func downlod() { +func download(current time.Time) { + current = time.Date(current.Year(), current.Month(), current.Day(), 0, 0, 0, 0, current.Location()) u, _ := url.Parse("https://www.comics.org/accounts/login/?next=/download/") cl := client{&http.Client{}, ""} cl.Jar, _ = cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) @@ -122,11 +149,12 @@ func downlod() { } var ( action string - form url.Values + form url.Values ) + body := getReaderFromHTTP(resp) - action, form = getForm(`div.body_content > form[action*="/accounts/login/"]`, resp) - resp.Body.Close() + action, form = getForm(`div.body_content > form[action*="/accounts/login/"]`, body) + body.Close() form.Set("username", os.Getenv("GCD_USERNAME")) form.Set("password", os.Getenv("GCD_PASSWORD")) form.Del("") @@ -142,7 +170,23 @@ func downlod() { if resp.StatusCode != http.StatusOK { Fatal(resp.Status) } - action, form = getForm(`div.body_content > form.download`, resp) + + by, err := ioutil.ReadAll(getReaderFromHTTP(resp)) + if err != nil { + Fatal(err) + } + body2 := strings.NewReader(string(by)) + date, err := getDate(body2) + if err != nil { + Fatal(err) + } + if !date.After(current) { + log.Print("No new data found") + return + } + + body2.Seek(0, io.SeekStart) + action, form = getForm(`div.body_content > form.download`, body2) resp.Body.Close() form.Del("mysqldump") form.Del("name-value") @@ -175,6 +219,7 @@ func downlod() { } log.Println("Download Complete") + // open sqlite file size, err := file.Seek(0, io.SeekEnd) if err != nil { Fatal(err) @@ -189,14 +234,9 @@ func downlod() { Fatal(err) } + // determine names and create destination files sqlite_filename = zipFile.File[0].Name - sqlite_br_filename = zipFile.File[0].Name+".br" - - sqlite, err :=zipFile.File[0].Open() - if err != nil { - Fatal(err) - } - defer sqlite.Close() + sqlite_br_filename = zipFile.File[0].Name + ".br" sqlite_file, err := os.Create(filepath.Join(*workDir, sqlite_filename)) if err != nil { @@ -210,17 +250,26 @@ func downlod() { } defer sqlite_br_file.Close() + sqlite, err := zipFile.File[0].Open() + if err != nil { + Fatal(err) + } + defer sqlite.Close() + + // read from zip to file in addition to the brotli encoder below sqlite_reader := io.TeeReader(sqlite, sqlite_file) br := brotli.NewWriterLevel(sqlite_br_file, 5) log.Printf("starting compression of %s to %s\n", sqlite_filename, sqlite_br_filename) + // Brotli encodes sqlite file. TeeReader ensures uncompressed copy is also available _, err = io.Copy(br, sqlite_reader) if err != nil { Fatal(err) } + // rename files to final location err = os.Rename(filepath.Join(*workDir, sqlite_filename), filepath.Join(*destination, sqlite_filename)) if err != nil { Fatal(err) @@ -230,6 +279,7 @@ func downlod() { Fatal(err) } + // remove temporary zip err = os.Remove(filepath.Join(*workDir, "temp.zip")) if err != nil { Fatal(err) @@ -249,7 +299,7 @@ func createDirs() { } } -func shouldUpdate() bool { +func getLatestDate() time.Time { dir, err := os.Open(*destination) if err != nil { Fatal(err) @@ -267,12 +317,16 @@ func shouldUpdate() bool { } sort.Strings(names2) if len(names2) < 1 { - return true + return time.UnixMicro(0) } latest := names2[len(names2)-1] lastUpdate, _ := time.Parse("2006-01-02.db", latest) - timeTillUpdate := lastUpdate.Sub(time.Now().UTC().AddDate(0,0,-7)) - if lastUpdate.Before(time.Now().UTC().AddDate(0,0,-7)) { + return lastUpdate +} + +func shouldUpdate(lastUpdate time.Time) bool { + timeTillUpdate := lastUpdate.Sub(time.Now().UTC().AddDate(0, 0, -7)) + if lastUpdate.Before(time.Now().UTC().AddDate(0, 0, -7)) { return true } log.Printf("can't update for another %v", timeTillUpdate)