package main import ( "archive/zip" "context" "errors" "flag" "io" "io/ioutil" "log" "net/http" "net/http/cookiejar" "net/url" "os" "path/filepath" "sort" "strings" "time" "github.com/PuerkitoBio/goquery" "github.com/andybalholm/brotli" "github.com/kr/pretty" "golang.org/x/net/publicsuffix" ) type client struct { *http.Client referrer string } func Fatal(a ...any) { log.Print(a...) os.Exit(125) } func getReaderFromHTTP(resp *http.Response) io.ReadCloser { var body io.ReadCloser = resp.Body if resp.Header.Get("Content-Encoding") == "br" { body = struct { io.Reader io.Closer }{brotli.NewReader(resp.Body), resp.Body} } return body } func getForm(selector string, body io.Reader) (action string, form url.Values) { doc, err := goquery.NewDocumentFromReader(body) if err != nil { Fatal(err) } form = url.Values{} if sel := doc.Find(selector); sel != nil && len(sel.Nodes) == 1 { action = sel.AttrOr("action", "") sel.Find("input").Each(func(i int, s *goquery.Selection) { form.Add(s.AttrOr("name", s.AttrOr("id", "")), s.AttrOr("value", "")) }) } else { Fatal(doc.Html()) } return action, form } func getDate(body io.Reader) (time.Time, error) { doc, err := goquery.NewDocumentFromReader(body) if err != nil { Fatal(err) } if sel := doc.Find("#sizing_base > div.body_content > ul:nth-child(13) > li:nth-child(3) > span"); sel != nil && len(sel.Nodes) == 1 { date, err := time.Parse("2006-01-02 15:04:05", sel.Text()) if err != nil { return date, err } return time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, date.Location()), nil } Fatal(doc.Html()) return time.UnixMicro(0), errors.New("Failed to find date") } func (cl *client) do(ctx context.Context, method string, url string, body io.Reader) (*http.Response, error) { req, _ := http.NewRequestWithContext(ctx, method, url, body) req.Header.Set("Referer", cl.referrer) req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") req.Header.Set("Accept-Encoding", "gzip, deflate, br") resp, err := cl.Do(req) cl.referrer = url return resp, err } func (cl *client) Post(ctx context.Context, url, contentType string, body io.Reader) (resp *http.Response, err error) { req, _ := http.NewRequestWithContext(ctx, "POST", url, body) req.Header.Set("Referer", cl.referrer) req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36") req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") req.Header.Set("Accept-Encoding", "gzip, deflate, br") req.Header.Set("Content-Type", contentType) resp, err = cl.Do(req) cl.referrer = url return resp, err } func Redirections(resp *http.Response) (history []*http.Request) { for resp != nil { req := resp.Request history = append(history, req) resp = req.Response } for l, r := 0, len(history)-1; l < r; l, r = l+1, r-1 { history[l], history[r] = history[r], history[l] } return history } func printResp(resp *http.Response) { for _, req := range Redirections(resp) { log.Println("URL:", req.URL) pretty.Println("Sent Headers:", req.Header) } log.Println("Status:", resp.Status) pretty.Println(resp.Header) } var ( sqlite_filename string sqlite_br_filename string destination *string = flag.String("destination", "/perm/www/items", "destination directory for sqlite files") workDir *string = flag.String("work", "/perm/workDir", "working directory for sqlite files") ) func download(current time.Time) { current = time.Date(current.Year(), current.Month(), current.Day(), 0, 0, 0, 0, current.Location()) u, _ := url.Parse("https://www.comics.org/accounts/login/?next=/download/") cl := client{&http.Client{}, ""} cl.Jar, _ = cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() resp, err := cl.do(ctx, "GET", u.String(), nil) if err != nil { Fatal(err) } // printResp(resp) if resp.StatusCode != http.StatusOK { Fatal(resp.Status) } var ( action string form url.Values ) body := getReaderFromHTTP(resp) action, form = getForm(`div.body_content > form[action*="/accounts/login/"]`, body) body.Close() form.Set("username", os.Getenv("GCD_USERNAME")) form.Set("password", os.Getenv("GCD_PASSWORD")) form.Del("") ctx, cancel = context.WithTimeout(context.Background(), time.Minute) defer cancel() resp, err = cl.Post(ctx, u.ResolveReference(&url.URL{Path: action}).String(), "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) if err != nil { Fatal(err) } // printResp(resp) if resp.StatusCode != http.StatusOK { Fatal(resp.Status) } by, err := ioutil.ReadAll(getReaderFromHTTP(resp)) if err != nil { Fatal(err) } body2 := strings.NewReader(string(by)) date, err := getDate(body2) if err != nil { Fatal(err) } if !date.After(current) { log.Print("No new data found") return } body2.Seek(0, io.SeekStart) action, form = getForm(`div.body_content > form.download`, body2) resp.Body.Close() form.Del("mysqldump") form.Del("name-value") form.Del("") form.Set("accept_license", "on") form.Set("purpose", "non-commercial") ctx, cancel = context.WithTimeout(context.Background(), time.Minute*15) defer cancel() resp, err = cl.Post(ctx, u.ResolveReference(&url.URL{Path: action}).String(), "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) if err != nil { Fatal(err) } // printResp(resp) if resp.StatusCode != http.StatusOK { Fatal(resp.Status) } defer resp.Body.Close() file, err := os.Create(filepath.Join(*workDir, "temp.zip")) if err != nil { Fatal(err) } defer file.Close() log.Println("Downloading sqlite file now") _, err = io.Copy(file, resp.Body) if err != nil { Fatal(err) } log.Println("Download Complete") // open sqlite file size, err := file.Seek(0, io.SeekEnd) if err != nil { Fatal(err) } _, err = file.Seek(0, io.SeekStart) if err != nil { Fatal(err) } zipFile, err := zip.NewReader(file, size) if err != nil { Fatal(err) } // determine names and create destination files sqlite_filename = zipFile.File[0].Name sqlite_br_filename = zipFile.File[0].Name + ".br" sqlite_file, err := os.Create(filepath.Join(*workDir, sqlite_filename)) if err != nil { Fatal(err) } defer sqlite_file.Close() sqlite_br_file, err := os.Create(filepath.Join(*workDir, sqlite_br_filename)) if err != nil { Fatal(err) } defer sqlite_br_file.Close() sqlite, err := zipFile.File[0].Open() if err != nil { Fatal(err) } defer sqlite.Close() // read from zip to file in addition to the brotli encoder below sqlite_reader := io.TeeReader(sqlite, sqlite_file) br := brotli.NewWriterLevel(sqlite_br_file, 5) defer br.Close() log.Printf("starting compression of %s to %s\n", sqlite_filename, sqlite_br_filename) // Brotli encodes sqlite file. TeeReader ensures uncompressed copy is also available _, err = io.Copy(br, sqlite_reader) if err != nil { Fatal(err) } // rename files to final location err = os.Rename(filepath.Join(*workDir, sqlite_filename), filepath.Join(*destination, sqlite_filename)) if err != nil { Fatal(err) } err = os.Rename(filepath.Join(*workDir, sqlite_br_filename), filepath.Join(*destination, sqlite_br_filename)) if err != nil { Fatal(err) } // remove temporary zip err = os.Remove(filepath.Join(*workDir, "temp.zip")) if err != nil { Fatal(err) } log.Println("complete") } func createDirs() { err := os.MkdirAll(*workDir, 0o777) if err != nil { Fatal(err) } err = os.MkdirAll(*destination, 0o777) if err != nil { Fatal(err) } } func getLatestDate() time.Time { dir, err := os.Open(*destination) if err != nil { Fatal(err) } names, err := dir.Readdirnames(0) if err != nil { Fatal(err) } var names2 []string for _, name := range names { if len(name) != 13 { continue } names2 = append(names2, name) } sort.Strings(names2) if len(names2) < 1 { return time.UnixMicro(0) } latest := names2[len(names2)-1] lastUpdate, _ := time.Parse("2006-01-02.db", latest) return lastUpdate } func shouldUpdate(lastUpdate time.Time) bool { timeTillUpdate := lastUpdate.Sub(time.Now().UTC().AddDate(0, 0, -7)) if lastUpdate.Before(time.Now().UTC().AddDate(0, 0, -7)) { return true } log.Printf("can't update for another %v", timeTillUpdate) return false }