dhcp4: start from scratch after 5 minutes of continued unhealthiness

fixes #58
This commit is contained in:
Michael Stapelberg 2020-09-14 08:57:44 +02:00
parent 876f8e320f
commit fddfe80222

View File

@ -17,11 +17,14 @@
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
@ -45,6 +48,44 @@ var (
stateDir = flag.String("state_dir", "/perm/dhcp4", "directory in which to store lease data (wire/lease.json) and last ACK (wire/ack)")
)
func healthy() error {
req, err := http.NewRequest("GET", "http://localhost:7733/health.json", nil)
if err != nil {
return err
}
ctx, canc := context.WithTimeout(context.Background(), 5*time.Second)
defer canc()
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
if got, want := resp.StatusCode, http.StatusOK; got != want {
b, _ := ioutil.ReadAll(resp.Body)
return fmt.Errorf("%v: got HTTP %v (%s), want HTTP status %v",
req.URL.String(),
resp.Status,
string(b),
want)
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
var reply struct {
FirstError string `json:"first_error"`
}
if err := json.Unmarshal(b, &reply); err != nil {
return err
}
if reply.FirstError != "" {
return errors.New(reply.FirstError)
}
return nil
}
func logic() error {
leasePath := filepath.Join(*stateDir, "wire/lease.json")
if err := os.MkdirAll(filepath.Dir(leasePath), 0755); err != nil {
@ -91,6 +132,7 @@ func logic() error {
Min: 10 * time.Second,
Max: 1 * time.Minute,
}
ObtainOrRenew:
for c.ObtainOrRenew() {
if err := c.Err(); err != nil {
dur := backoff.Duration()
@ -121,15 +163,42 @@ func logic() error {
if err := notify.Process("/user/netconfigd", syscall.SIGUSR1); err != nil {
log.Printf("notifying netconfig: %v", err)
}
select {
case <-time.After(time.Until(c.Config().RenewAfter)):
// fallthrough and renew the DHCP lease
case <-usr2:
log.Printf("SIGUSR2 received, sending DHCPRELEASE")
if err := c.Release(); err != nil {
return err
unhealthyCycles := 0
for {
select {
case <-time.After(time.Until(c.Config().RenewAfter)):
// fallthrough and renew the DHCP lease
continue ObtainOrRenew
case <-time.After(1 * time.Minute):
if err := healthy(); err == nil {
unhealthyCycles = 0
continue // wait another minute
} else {
unhealthyCycles++
log.Printf("router unhealthy (cycle %d of 5): %v", unhealthyCycles, err)
if unhealthyCycles < 5 {
continue // wait until unhealthy for longer
}
// fallthrough
}
// Still not healthy? Drop DHCP lease and start from scratch.
log.Printf("unhealthy for 5 cycles, starting over without lease")
c.Ack = nil
case <-usr2:
log.Printf("SIGUSR2 received, sending DHCPRELEASE")
if err := c.Release(); err != nil {
return err
}
// Ensure dhcp4 does start from scratch next time
// by deleting the DHCPACK file:
if err := os.Remove(ackFn); err != nil && !os.IsNotExist(err) {
return err
}
os.Exit(125) // quit supervision by gokrazy
}
os.Exit(125) // quit supervision by gokrazy
}
}
return c.Err() // permanent error