diff --git a/cmd/dhcp4/dhcp4.go b/cmd/dhcp4/dhcp4.go index 38dc7d5..bbe890d 100644 --- a/cmd/dhcp4/dhcp4.go +++ b/cmd/dhcp4/dhcp4.go @@ -17,11 +17,14 @@ package main import ( + "context" "encoding/json" + "errors" "flag" "fmt" "io/ioutil" "net" + "net/http" "os" "os/signal" "path/filepath" @@ -45,6 +48,44 @@ var ( stateDir = flag.String("state_dir", "/perm/dhcp4", "directory in which to store lease data (wire/lease.json) and last ACK (wire/ack)") ) +func healthy() error { + req, err := http.NewRequest("GET", "http://localhost:7733/health.json", nil) + if err != nil { + return err + } + ctx, canc := context.WithTimeout(context.Background(), 5*time.Second) + defer canc() + req = req.WithContext(ctx) + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + if got, want := resp.StatusCode, http.StatusOK; got != want { + b, _ := ioutil.ReadAll(resp.Body) + return fmt.Errorf("%v: got HTTP %v (%s), want HTTP status %v", + req.URL.String(), + resp.Status, + string(b), + want) + } + b, err := ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + var reply struct { + FirstError string `json:"first_error"` + } + if err := json.Unmarshal(b, &reply); err != nil { + return err + } + + if reply.FirstError != "" { + return errors.New(reply.FirstError) + } + + return nil +} + func logic() error { leasePath := filepath.Join(*stateDir, "wire/lease.json") if err := os.MkdirAll(filepath.Dir(leasePath), 0755); err != nil { @@ -91,6 +132,7 @@ func logic() error { Min: 10 * time.Second, Max: 1 * time.Minute, } +ObtainOrRenew: for c.ObtainOrRenew() { if err := c.Err(); err != nil { dur := backoff.Duration() @@ -121,15 +163,42 @@ func logic() error { if err := notify.Process("/user/netconfigd", syscall.SIGUSR1); err != nil { log.Printf("notifying netconfig: %v", err) } - select { - case <-time.After(time.Until(c.Config().RenewAfter)): - // fallthrough and renew the DHCP lease - case <-usr2: - log.Printf("SIGUSR2 received, sending DHCPRELEASE") - if err := c.Release(); err != nil { - return err + + unhealthyCycles := 0 + for { + select { + case <-time.After(time.Until(c.Config().RenewAfter)): + // fallthrough and renew the DHCP lease + continue ObtainOrRenew + + case <-time.After(1 * time.Minute): + if err := healthy(); err == nil { + unhealthyCycles = 0 + continue // wait another minute + } else { + unhealthyCycles++ + log.Printf("router unhealthy (cycle %d of 5): %v", unhealthyCycles, err) + if unhealthyCycles < 5 { + continue // wait until unhealthy for longer + } + // fallthrough + } + // Still not healthy? Drop DHCP lease and start from scratch. + log.Printf("unhealthy for 5 cycles, starting over without lease") + c.Ack = nil + + case <-usr2: + log.Printf("SIGUSR2 received, sending DHCPRELEASE") + if err := c.Release(); err != nil { + return err + } + // Ensure dhcp4 does start from scratch next time + // by deleting the DHCPACK file: + if err := os.Remove(ackFn); err != nil && !os.IsNotExist(err) { + return err + } + os.Exit(125) // quit supervision by gokrazy } - os.Exit(125) // quit supervision by gokrazy } } return c.Err() // permanent error