From 99f527d07a116813cb651030e9b47e1321619a8f Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Mon, 16 Oct 2023 01:01:08 -0700 Subject: [PATCH] Initial Commit --- go.mod | 16 +++ go.sum | 45 +++++++++ main.go | 298 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 359 insertions(+) create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4221039 --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module https://gitea.narnian.us/lordwelch/gcd_downloader + +go 1.21.1 + +require ( + github.com/PuerkitoBio/goquery v1.8.1 + github.com/andybalholm/brotli v1.0.6 + github.com/kr/pretty v0.3.1 + golang.org/x/net v0.7.0 +) + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/rogpeppe/go-internal v1.9.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8305b12 --- /dev/null +++ b/go.sum @@ -0,0 +1,45 @@ +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sxfOI= +github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/main.go b/main.go new file mode 100644 index 0000000..a7ad08a --- /dev/null +++ b/main.go @@ -0,0 +1,298 @@ +package main + +import ( + "archive/zip" + "context" + "flag" + "io" + "log" + "net/http" + "net/http/cookiejar" + "net/url" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/andybalholm/brotli" + "github.com/kr/pretty" + "golang.org/x/net/publicsuffix" +) + +type client struct { + *http.Client + referrer string +} + +func Fatal(a ...any) { + log.Print(a...) + os.Exit(125) +} + +func getForm(selector string, resp *http.Response) (action string, form url.Values) { + var body io.Reader = resp.Body + if resp.Header.Get("Content-Encoding") == "br" { + body = brotli.NewReader(resp.Body) + } + doc, err := goquery.NewDocumentFromReader(body) + if err != nil { + Fatal(err) + } + form = url.Values{} + if sel := doc.Find(selector); sel != nil && len(sel.Nodes) == 1 { + action = sel.AttrOr("action", "") + sel.Find("input").Each(func(i int, s *goquery.Selection) { + form.Add(s.AttrOr("name", s.AttrOr("id", "")), s.AttrOr("value", "")) + }) + } else { + Fatal(doc.Html()) + } + return action, form +} + +func (cl *client) do(ctx context.Context, method string, url string, body io.Reader) (*http.Response, error) { + req, _ := http.NewRequestWithContext(ctx, method, url, body) + req.Header.Set("Referer", cl.referrer) + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") + req.Header.Set("Accept-Encoding", "gzip, deflate, br") + resp, err := cl.Do(req) + cl.referrer = url + return resp, err +} + +func (cl *client) Post(ctx context.Context, url, contentType string, body io.Reader) (resp *http.Response, err error) { + req, _ := http.NewRequestWithContext(ctx, "POST", url, body) + req.Header.Set("Referer", cl.referrer) + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7") + req.Header.Set("Accept-Encoding", "gzip, deflate, br") + req.Header.Set("Content-Type", contentType) + resp, err = cl.Do(req) + cl.referrer = url + return resp, err +} + +func Redirections(resp *http.Response) (history []*http.Request) { + for resp != nil { + req := resp.Request + history = append(history, req) + resp = req.Response + } + for l, r := 0, len(history)-1; l < r; l, r = l+1, r-1 { + history[l], history[r] = history[r], history[l] + } + return history +} + +func printResp(resp *http.Response) { + for _, req := range Redirections(resp) { + log.Println("URL:", req.URL) + pretty.Println("Sent Headers:", req.Header) +} + + log.Println("Status:", resp.Status) + pretty.Println(resp.Header) + +} + +var ( + sqlite_filename string + sqlite_br_filename string + destination *string = flag.String("destination", "/perm/www/items", "destination directory for sqlite files") + workDir *string = flag.String("workDir", "/perm/workDir", "working directory for sqlite files") +) + +func downlod() { + u, _ := url.Parse("https://www.comics.org/accounts/login/?next=/download/") + cl := client{&http.Client{}, ""} + cl.Jar, _ = cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + resp, err := cl.do(ctx, "GET", u.String(), nil) + if err != nil { + Fatal(err) + } + // printResp(resp) + + if resp.StatusCode != http.StatusOK { + Fatal(resp.Status) + } + var ( + action string + form url.Values + ) + + action, form = getForm(`div.body_content > form[action*="/accounts/login/"]`, resp) + resp.Body.Close() + form.Set("username", os.Getenv("GCD_USERNAME")) + form.Set("password", os.Getenv("GCD_PASSWORD")) + form.Del("") + + ctx, cancel = context.WithTimeout(context.Background(), time.Minute) + defer cancel() + resp, err = cl.Post(ctx, u.ResolveReference(&url.URL{Path: action}).String(), "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) + if err != nil { + Fatal(err) + } + // printResp(resp) + + if resp.StatusCode != http.StatusOK { + Fatal(resp.Status) + } + action, form = getForm(`div.body_content > form.download`, resp) + resp.Body.Close() + form.Del("mysqldump") + form.Del("name-value") + form.Del("") + form.Set("accept_license", "on") + form.Set("purpose", "non-commercial") + + ctx, cancel = context.WithTimeout(context.Background(), time.Minute*15) + defer cancel() + resp, err = cl.Post(ctx, u.ResolveReference(&url.URL{Path: action}).String(), "application/x-www-form-urlencoded", strings.NewReader(form.Encode())) + if err != nil { + Fatal(err) + } + // printResp(resp) + + if resp.StatusCode != http.StatusOK { + Fatal(resp.Status) + } + defer resp.Body.Close() + file, err := os.Create(filepath.Join(*workDir, "temp.zip")) + if err != nil { + Fatal(err) + } + defer file.Close() + + log.Println("Downloading sqlite file now") + _, err = io.Copy(file, resp.Body) + if err != nil { + Fatal(err) + } + log.Println("Download Complete") + + size, err := file.Seek(0, io.SeekEnd) + if err != nil { + Fatal(err) + } + _, err = file.Seek(0, io.SeekStart) + if err != nil { + Fatal(err) + } + + zipFile, err := zip.NewReader(file, size) + if err != nil { + Fatal(err) + } + + sqlite_filename = zipFile.File[0].Name + sqlite_br_filename = zipFile.File[0].Name+".br" + + sqlite, err :=zipFile.File[0].Open() + if err != nil { + Fatal(err) + } + defer sqlite.Close() + + sqlite_file, err := os.Create(filepath.Join(*workDir, sqlite_filename)) + if err != nil { + Fatal(err) + } + defer sqlite_file.Close() + + sqlite_br_file, err := os.Create(filepath.Join(*workDir, sqlite_br_filename)) + if err != nil { + Fatal(err) + } + defer sqlite_br_file.Close() + + sqlite_reader := io.TeeReader(sqlite, sqlite_file) + + br := brotli.NewWriterLevel(sqlite_br_file, 5) + + log.Printf("starting compression of %s to %s\n", sqlite_filename, sqlite_br_filename) + + _, err = io.Copy(br, sqlite_reader) + if err != nil { + Fatal(err) + } + + err = os.Rename(filepath.Join(*workDir, sqlite_filename), filepath.Join(*destination, sqlite_filename)) + if err != nil { + Fatal(err) + } + err = os.Rename(filepath.Join(*workDir, sqlite_br_filename), filepath.Join(*destination, sqlite_br_filename)) + if err != nil { + Fatal(err) + } + + err = os.Remove(filepath.Join(*workDir, "temp.zip")) + if err != nil { + Fatal(err) + } + + log.Println("complete") +} + +func createDirs() { + err := os.MkdirAll(*workDir, 0o777) + if err != nil { + Fatal(err) + } + err = os.MkdirAll(*destination, 0o777) + if err != nil { + Fatal(err) + } +} + +func shouldUpdate() bool { + dir, err := os.Open(*destination) + if err != nil { + Fatal(err) + } + names, err := dir.Readdirnames(0) + if err != nil { + Fatal(err) + } + var names2 []string + for _, name := range names { + if len(name) != 13 { + continue + } + names2 = append(names2, name) + } + sort.Strings(names2) + if len(names2) < 1 { + return true + } + latest := names2[len(names2)-1] + lastUpdate, _ := time.Parse("2006-01-02.db", latest) + timeTillUpdate := lastUpdate.Sub(time.Now().UTC().AddDate(0,0,-7)) + if lastUpdate.Before(time.Now().UTC().AddDate(0,0,-7)) { + return true + } + log.Printf("can't update for another %v", timeTillUpdate) + return false +} + +func main() { + flag.Parse() + if os.Getenv("GCD_USERNAME") == "" || os.Getenv("GCD_PASSWORD") == "" { + log.Println("Username and password not provided") + os.Exit(0) + } + for { + createDirs() + if shouldUpdate() { + downlod() + log.Println("update complete, waiting 1h to check again") + } else { + log.Println("It's too early, waiting 1h to check again") + } + time.Sleep(time.Hour) + } +}