package main import ( "archive/tar" "bytes" "context" "encoding/json" "errors" "fmt" "html/template" "io" "io/ioutil" "log" "math" "mime" "net" "net/http" "net/http/cookiejar" "net/url" "os" "path" "path/filepath" "sort" "strconv" "strings" "time" "git.narnian.us/lordwelch/gloader/bundled" "github.com/cavaliercoder/grab" "github.com/lordwelch/pathvalidate" "github.com/wI2L/jettison" "golang.org/x/net/publicsuffix" ) var ( DefaultMaxActiveDownloads = 4 ErrUnsupportedScheme = errors.New("unsupported scheme") ) type Priority int type Status int const ( Highest Priority = iota High Medium Low ) const ( Queued Status = iota Complete Stopped Paused Downloading Error Canceled ) type Downloader struct { DataDir string DownloadDir string CompleteDir string InfoDir string MaxActiveDownloads int Server *http.Server Downloads RequestQueue History RequestQueue NewRequest chan Request requestDone chan *Request OnComplete func(d *Downloader, r Request) OnAdd func(d *Downloader, r Request) OnDelete func(d *Downloader, r Request) } type Request struct { URL string `json:"url"` Cookies []http.Cookie `json:"cookies"` ForceDownload bool `json:"forceDownload"` Status Status `json:"status"` Priority Priority `json:"priority"` FilePath string `json:"filepath"` Filename string `json:"filename"` Subdir string `json:"subdir"` TempPath string `json:"tempPath"` Response *grab.Response `json:"-"` Error string `json:"error"` Err error `json:"-"` Completed time.Time `json:"completed"` Started time.Time `json:"started"` Jar http.CookieJar `json:"-"` Progress string `json:"progress,omitempty"` grab *grab.Client } type RequestQueue struct { Queue []*Request URLSort bool DateSort bool } func (p Priority) MarshalJSON() ([]byte, error) { var v string switch p { default: v = "Medium" case Medium: v = "Medium" case Low: v = "Low" case High: v = "High" case Highest: v = "Highest" } return json.Marshal(v) } func (p *Priority) UnmarshalJSON(b []byte) error { var ( v int s string ) if err := json.Unmarshal(b, &v); err == nil { *p = Priority(v) return nil } json.Unmarshal(b, &s) switch strings.ToLower(s) { default: *p = Medium case "medium": *p = Medium case "low": *p = Low case "high": *p = High case "highest": *p = Highest } return nil } func (s *Status) UnmarshalJSON(b []byte) error { var v string json.Unmarshal(b, &v) switch strings.ToLower(v) { default: *s = Queued case "queue", "queued": *s = Queued case "complete", "completed": *s = Complete case "stop", "stopped": *s = Stopped case "download", "downloading": *s = Downloading case "error": *s = Error case "cancel", "canceled": *s = Canceled } return nil } func (s Status) MarshalJSON() ([]byte, error) { return json.Marshal(s.String()) } func (s Status) String() string { switch s { default: return "Queued" case Queued: return "Queued" case Complete: return "Complete" case Stopped: return "Stopped" case Downloading: return "Downloading" case Error: return "Error" case Canceled: return "Canceled" } } func (rq RequestQueue) Less(i, j int) bool { ii := 0 jj := 0 if rq.Queue[i].ForceDownload { ii = 1 } if rq.Queue[j].ForceDownload { jj = 1 } if ii < jj { return true } if rq.Queue[i].Priority < rq.Queue[j].Priority { return true } if rq.DateSort && rq.Queue[i].Completed.Before(rq.Queue[j].Completed) { return true } if rq.URLSort && rq.Queue[i].URL < rq.Queue[j].URL { return true } return false } func (rq RequestQueue) Len() int { return len(rq.Queue) } func (rq RequestQueue) Swap(i, j int) { rq.Queue[i], rq.Queue[j] = rq.Queue[j], rq.Queue[i] } func (rq *RequestQueue) Pop(i int) *Request { r := rq.Queue[i] copy(rq.Queue[i:], rq.Queue[i+1:]) rq.Queue[len(rq.Queue)-1] = nil rq.Queue = rq.Queue[:len(rq.Queue)-1] return r } func (rq *RequestQueue) find(url string) *Request { for _, req := range rq.Queue { if req.URL == url { return req } } return nil } func (rq *RequestQueue) remove(url string) *Request { for i, req := range rq.Queue { if req.URL == url { copy(rq.Queue[i:], rq.Queue[i+1:]) rq.Queue[len(rq.Queue)-1] = nil rq.Queue = rq.Queue[:len(rq.Queue)-1] return req } } return nil } func (rq *RequestQueue) updateStatus() { for _, req := range rq.Queue { if req.Response != nil { req.Progress = fmt.Sprintf("%.2f", math.Abs(req.Response.Progress()*100)) } } } func (r *Request) setError(err error) { if err != nil { r.Status = Error r.Err = err r.Error = err.Error() } else { r.Status = Paused r.Err = nil r.Error = "" } } func (r Request) Delete() error { var err, ret error if r.Response != nil { err = r.Response.Cancel() if err != nil && err != context.Canceled { ret = err } } _ = os.Remove(r.TempPath) err = os.Remove(r.FilePath) if err != nil && err != os.ErrNotExist { ret = err } return ret } func newCookieJar() http.CookieJar { c, _ := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) return c } func (d *Downloader) Start(network, address string) { var ( listener net.Listener mux = http.NewServeMux() err error ) if d.NewRequest == nil { d.NewRequest = make(chan Request, 64) } if d.requestDone == nil { d.requestDone = make(chan *Request, 64) } if d.MaxActiveDownloads < 1 { d.MaxActiveDownloads = DefaultMaxActiveDownloads } if d.Server == nil { d.Server = &http.Server{ Addr: address, Handler: mux, ReadTimeout: 2 * time.Minute, WriteTimeout: 2 * time.Minute, } } if d.DataDir == "" { d.DataDir = "/perm/gloader" } if d.DownloadDir == "" { d.DownloadDir = path.Join(d.DataDir, "Download") } if d.CompleteDir == "" { d.CompleteDir = path.Join(d.DataDir, "Complete") } log.Println(d.DataDir) log.Println(d.DownloadDir) log.Println(d.CompleteDir) _ = os.MkdirAll(d.DataDir, 0777) _ = os.MkdirAll(d.DownloadDir, 0777) _ = os.MkdirAll(d.CompleteDir, 0777) listener, err = net.Listen(network, address) if err != nil { panic(err) } log.Println("adding http handlers") d.initStatus(mux) mux.HandleFunc("/add", d.restAddDownload) mux.HandleFunc("/queued", d.restStatus(true)) mux.HandleFunc("/completed", d.restStatus(false)) mux.HandleFunc("/set", d.restSetDownloadStatus) mux.HandleFunc("/delete", d.restDelete) mux.Handle("/get/", http.StripPrefix("/get", http.HandlerFunc(d.get))) log.Println("starting main go routine") go d.download() log.Println("serving http server") _ = d.Server.Serve(listener) } func httpMethodNotAllowed(w http.ResponseWriter, method ...string) { w.Header().Set("Content-Type", "text/plain; charset=utf-8") w.Header().Set("X-Content-Type-Options", "nosniff") w.Header().Add("Allow", strings.Join(method, ", ")) w.WriteHeader(http.StatusMethodNotAllowed) fmt.Fprintf(w, "HTTP Error 405 – Method Not Allowed\nOnly %s method(s) is allowed\n", strings.Join(method, ", ")) log.Printf("HTTP Error 405 – Method Not Allowed\nOnly %s method(s) is allowed\n", strings.Join(method, ", ")) } func (d *Downloader) get(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet && r.Method != http.MethodPost { httpMethodNotAllowed(w, http.MethodPost, http.MethodGet) return } if r.Method == http.MethodGet { var file = filepath.Join(d.CompleteDir, strings.TrimLeft(filepath.Clean(r.URL.Path), "./")) http.ServeFile(w, r, file) return } if r.Method == http.MethodPost { var ( downloads []struct { Subdir string Filename string } filenames []string err error ) err = json.NewDecoder(r.Body).Decode(&downloads) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } filenames = make([]string, 0, len(downloads)) for _, name := range downloads { var ( subdir = strings.TrimLeft(filepath.Clean(name.Subdir), "./") filename = strings.TrimLeft(filepath.Clean(name.Filename), "./") ) filenames = append(filenames, filepath.Join(d.CompleteDir, subdir, filename)) } tarFiles(w, filenames...) return } } func tarFiles(w http.ResponseWriter, files ...string) { var ( err error size int64 ) size, err = DirSize(files...) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Add("content-length", fmt.Sprintf("%v", size)) w.Header().Add("content-disposition", fmt.Sprintf("attachment; filename=\"downloads_%v.tar\"", time.Now().Format("01-02-2006_15-04-05"))) w.Header().Add("content-type", "application/x-tar") tw := tar.NewWriter(w) defer tw.Close() var tarLength int64 for _, filename := range files { var ( info os.FileInfo file io.ReadCloser ) info, err = os.Stat(filename) if err != nil { break } err = tw.WriteHeader(&tar.Header{ Name: info.Name(), Mode: 0777, Size: info.Size(), }) if err != nil { break } file, err = os.OpenFile(filename, os.O_RDONLY, 0o666) if err != nil { break } var data int64 data, err = io.Copy(tw, file) tarLength += data file.Close() if err != nil { break } } } func DirSize(files ...string) (int64, error) { var size int64 var err error for _, filename := range files { var ( info os.FileInfo ) info, err = os.Stat(filename) if err != nil { break } size += info.Size() + 512 remainder := size % 512 if remainder > 0 { size += 512 - remainder } } size += 1024 return size, err } func (d *Downloader) restDelete(w http.ResponseWriter, r *http.Request) { var ( err error downloads struct { Requests []struct { URL string } History bool } ret []*Request ) if r.Method != http.MethodDelete { httpMethodNotAllowed(w, http.MethodDelete) return } err = json.NewDecoder(r.Body).Decode(&downloads) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } sort.Slice(downloads.Requests, func(i, j int) bool { return downloads.Requests[i].URL < downloads.Requests[j].URL }) if downloads.History { for _, i := range downloads.Requests { ret = append(ret, d.History.remove(i.URL)) ret[len(ret)-1].Delete() if d.OnDelete != nil { d.OnDelete(d, *ret[len(ret)-1]) } } } else { for _, i := range downloads.Requests { ret = append(ret, d.Downloads.remove(i.URL)) ret[len(ret)-1].Delete() if d.OnDelete != nil { d.OnDelete(d, *ret[len(ret)-1]) } } } d.syncDownloads() v, err := jettison.MarshalOpts(ret, jettison.DenyList([]string{"cookies"})) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Add("Content-Type", "application/json; charset=utf-8") w.WriteHeader(http.StatusOK) w.Write(v) } func (d *Downloader) restSetDownloadStatus(w http.ResponseWriter, r *http.Request) { var ( err error downloads []struct { URL string Status Status Priority Priority } req *Request ) if r.Method != http.MethodPost { httpMethodNotAllowed(w, http.MethodPost) return } err = json.NewDecoder(r.Body).Decode(&downloads) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } for _, i := range downloads { req = d.Downloads.find(i.URL) req.Priority = i.Priority req.Status = i.Status req.Err = nil req.Error = "" } d.syncDownloads() } func (d *Downloader) restStatus(q bool) func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { w.Header().Add("Access-Control-Allow-Origin", "*") w.Header().Add("Access-Control-Allow-Methods", "GET") w.Header().Add("Access-Control-Allow-Headers", "X-PINGOTHER, Content-Type") if r.Method == http.MethodOptions { w.Header().Add("Allow", "GET") w.WriteHeader(http.StatusNoContent) return } var queue = &d.History if q { d.Downloads.updateStatus() queue = &d.Downloads } if r.Method != http.MethodGet { httpMethodNotAllowed(w, http.MethodGet) return } queue.updateStatus() v, err := jettison.MarshalOpts(queue.Queue, jettison.DenyList([]string{"cookies"})) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Add("Content-Type", "application/json; charset=utf-8") w.WriteHeader(http.StatusOK) w.Write(v) } } func (d *Downloader) restAddDownload(w http.ResponseWriter, r *http.Request) { var ( requests []Request err error ) if r.Method != http.MethodPost { httpMethodNotAllowed(w, http.MethodPost) return } // TODO fail only on individual requests err = json.NewDecoder(r.Body).Decode(&requests) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return } for _, req := range requests { req.TempPath = "" // not allowed via REST API req.FilePath = "" // not allowed via REST API if req.Status != Paused { req.Status = Queued } log.Println("adding request", req.URL) d.NewRequest <- req } w.WriteHeader(http.StatusOK) } func (d Downloader) getNameFromHEAD(r Request) string { var ( err error re *http.Response p map[string]string ) r.insertCookies() ht := &http.Client{ Jar: r.Jar, Timeout: 30 * time.Second, Transport: &http.Transport{ Dial: (&net.Dialer{ Timeout: 5 * time.Second, KeepAlive: 30 * time.Second, }).Dial, TLSHandshakeTimeout: 5 * time.Second, ResponseHeaderTimeout: 5 * time.Second, ExpectContinueTimeout: 1 * time.Second, }, } re, err = ht.Head(r.URL) if err != nil { return "" } if re.StatusCode < 200 || re.StatusCode > 299 { return "" } re.Body.Close() _, p, err = mime.ParseMediaType(re.Header.Get("Content-Disposition")) if err == nil { if f, ok := p["filename"]; ok { return f } } return path.Base(re.Request.URL.Path) } // getFilename checks the provided filepath // if not set uses the content-disposition from a head request // if not set uses the basename of the url // and sanitizes the filename using github.com/lordwelch/pathvalidate func (d *Downloader) getFilename(r *Request) { log.Println("Determining filename") r.Filename = filepath.Clean(r.Filename) if r.Filename == "." { log.Println("filename is empty, testing head request") r.Filename = d.getNameFromHEAD(*r) log.Println("path from head request:", r.Filename) if r.Filename == "" { u, _ := url.Parse(r.URL) r.Filename, _ = url.PathUnescape(filepath.Base(u.Path)) } } r.Filename, _ = pathvalidate.SanitizeFilename(r.Filename, '_') // r.Filename = filepath.Join(d.CompleteDir, r.Filename) // if filepath.IsAbs(r.Filename) { // should already exist // dir, file := filepath.Split(r.Filename) // // someone is trying to be sneaky (or someone changed the CompleteDir), change path to the correct dir // if dir != filepath.Clean(d.CompleteDir) { // r.Filename = filepath.Join(d.CompleteDir, file) // } // return // } log.Println("result path:", r.Filename) } func getNewFilename(dir, name string) string { var ( err error index = 1 ) log.Println("getfilename", dir, name) ext := filepath.Ext(name) base := strings.TrimSuffix(name, ext) log.Println("stat", filepath.Join(dir, name)) _, err = os.Stat(filepath.Join(dir, name)) for err == nil { name = strings.TrimRight(base+"."+strconv.Itoa(index)+ext, ".") log.Println("stat", filepath.Join(dir, name)) _, err = os.Stat(filepath.Join(dir, name)) index++ } if os.IsNotExist(err) { return filepath.Join(dir, name) } panic(err) // other path error } func (d Downloader) getTempFilename(r *Request) { if r.TempPath == "" { f, err := ioutil.TempFile(d.DownloadDir, filepath.Base(r.Filename)) if err != nil { log.Printf("request for %v failed: %v", r.URL, err) } r.TempPath = f.Name() f.Close() } os.MkdirAll(filepath.Dir(r.FilePath), 0o777) f, err := os.OpenFile(r.Filename, os.O_CREATE|os.O_EXCL, 0666) if err != nil { return } f.Close() } func (d Downloader) SearchDownloads(u string) int { for i, req := range d.Downloads.Queue { if req.URL == u { return i } } return -1 } func (d Downloader) SearchHistory(u string) int { for i, req := range d.History.Queue { if req.URL == u { return i } } return -1 } func (d Downloader) FindRequest(u string) *Request { if i := d.SearchDownloads(u); i >= 0 { return d.Downloads.Queue[i] } if i := d.SearchHistory(u); i >= 0 { return d.History.Queue[i] } return nil } func (d *Downloader) addRequest(r *Request) { log.Println("adding download for", r.URL) req := d.FindRequest(r.URL) d.getFilename(r) if req != nil { // url alread added log.Println("URL is already added", r.URL) return // if fi, err := os.Stat(r.Filepath); filepath.Base(req.Filepath) == filepath.Base(r.Filepath) || (err == nil && fi.Name() == filepath.Base(r.Filepath) && fi.Size() != 0) { // filepath has been found, should this check for multiple downloads of the same url or let the download name increment automatically // log.Println("file already exists", r.Filepath) // d.validate(*r) // TODO, should also check to see if it seems like it is similar, (check first k to see if it is the same file?? leave option to user) // return // } } r.FilePath = getNewFilename(d.CompleteDir, filepath.Join(r.Subdir, r.Filename)) d.Downloads.Queue = append(d.Downloads.Queue, r) if len(d.getRunningDownloads()) < d.MaxActiveDownloads { d.startDownload(d.Downloads.Len() - 1) } } // func (d *Downloader) validate(r Request) { // //TODO // } func (d *Downloader) startDownload(i int) { var ( r *Request req *grab.Request err error ) r = d.Downloads.Queue[i] r.insertCookies() d.getTempFilename(r) log.Println("starting download for", r.URL, "to", r.TempPath) // d.Downloads.Queue = append(d.Downloads.Queue, r) if r.Response == nil || r.Response.Err() != nil { req, err = grab.NewRequest(r.TempPath, r.URL) if err != nil { r.setError(err) return } } r.Status = Downloading if r.grab == nil { r.grab = &grab.Client{ HTTPClient: &http.Client{ Jar: r.Jar, Transport: &http.Transport{ Dial: (&net.Dialer{ Timeout: 10 * time.Second, KeepAlive: 30 * time.Second, }).Dial, TLSHandshakeTimeout: 5 * time.Second, ResponseHeaderTimeout: 5 * time.Second, ExpectContinueTimeout: 1 * time.Second, }, }, } } r.Response = r.grab.Do(req) r.Started = time.Now() go func(r *Request) { log.Println("wait for download") log.Println(r.Response.IsComplete()) r.Response.Wait() log.Println("download completed for", r.URL) d.requestDone <- r }(r) } func (d Downloader) getRunningDownloads() []*Request { var ( running = make([]*Request, 0, d.MaxActiveDownloads) ) for _, req := range d.Downloads.Queue { if req.Status == Downloading && req.Response != nil { running = append(running, req) } } return running } func (d *Downloader) syncDownloads() { if len(d.getRunningDownloads()) >= d.MaxActiveDownloads { return } sort.Stable(d.Downloads) var downloadsMaxed bool // Start new downloads for i, req := range d.Downloads.Queue { switch req.Status { case Queued: if !downloadsMaxed && d.MaxActiveDownloads >= len(d.getRunningDownloads()) { d.startDownload(i) } case Stopped: if req.Response != nil { var err = req.Response.Cancel() if err != nil && err != context.Canceled { req.setError(err) } req.Response = nil } case Downloading: if req.Response == nil { d.startDownload(i) } case Error: if req.Response != nil && req.Err == nil { var err = req.Response.Err() var err2 = req.Response.Cancel() if err != nil && err2 != context.Canceled { err = err2 } req.setError(err) } case Canceled: if req.Response != nil { err := req.Response.Cancel() if err != nil && err != context.Canceled { req.setError(err) } req.Response = nil } } } // Clean completed/canceled downloads for i := 0; i < d.Downloads.Len(); i++ { if d.Downloads.Queue[i].Status == Complete || d.Downloads.Queue[i].Status == Canceled { d.History.Queue = append(d.History.Queue, d.Downloads.Pop(i)) i-- } } d.Downloads.updateStatus() } func (d *Downloader) requestCompleted(r *Request) { if r.Response.Err() == nil { log.Println("removing from downloads") d.Downloads.remove(r.URL) r.Status = Complete log.Println(r.TempPath, "!=", r.FilePath) if r.TempPath != r.FilePath { log.Println("renaming download to the completed dir") os.Rename(r.TempPath, r.FilePath) } d.History.Queue = append(d.History.Queue, r) r.Completed = time.Now() } else { r.setError(r.Response.Err()) log.Println("fucking error:", r.Err) } } func (d *Downloader) download() { for { select { case <-time.After(10 * time.Second): d.syncDownloads() case r := <-d.NewRequest: r.Progress = "" // not allowed when adding d.addRequest(&r) if d.OnAdd != nil { d.OnAdd(d, r) } case r := <-d.requestDone: log.Println("finishing request for", r.URL) d.requestCompleted(r) if d.OnComplete != nil { d.OnComplete(d, *r) } } } } func (r *Request) insertCookies() { if r.Jar == nil { r.Jar = newCookieJar() } u, _ := url.Parse(r.URL) for i, v := range r.Cookies { r.Jar.SetCookies(&url.URL{ Scheme: u.Scheme, Path: v.Path, Host: v.Domain, }, []*http.Cookie{&r.Cookies[i]}) } } func (d *Downloader) initStatus(mux *http.ServeMux) { for _, fn := range []string{ "bootstrap-table-auto-refresh.min.js", "bootstrap-table-sticky-header.css", "bootstrap-table-sticky-header.min.js", "bootstrap-table.min.css", "bootstrap-table.min.js", "bootstrap.min.css", "bootstrap.min.js", "fontawesome.css", "jquery-3.2.1.min.js", "popper.min.js", "fa-solid-900.woff2", "downloads.js", "favicon.ico", } { mux.Handle("/"+fn, bundled.HTTPHandlerFunc(fn)) } mux.HandleFunc("/", d.httpQueue(false)) mux.HandleFunc("/history", d.httpQueue(true)) } func (d *Downloader) httpQueue(history bool) func(w http.ResponseWriter, r *http.Request) { commonTmpls := template.New("root") commonTmpls = template.Must(commonTmpls.New("header").Parse(bundled.Asset("header.tmpl"))) commonTmpls = template.Must(commonTmpls.New("footer").Parse(bundled.Asset("footer.tmpl"))) queueTmpl := template.Must(template.Must(commonTmpls.Clone()).New("queueTmpl").Parse(bundled.Asset("queue.tmpl"))) return func(w http.ResponseWriter, r *http.Request) { var buf bytes.Buffer d.Downloads.updateStatus() if err := queueTmpl.Execute(&buf, struct { Queue []*Request History []*Request Hostname string IsHistory bool }{ Queue: d.Downloads.Queue, History: d.History.Queue, Hostname: "gloader", IsHistory: history, }); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } io.Copy(w, &buf) } }