fix: should roll back configuration if failure of reload

This commit is contained in:
mzz2017 2023-03-02 22:25:51 +08:00
parent 0433f8d5da
commit c15c0ee410
3 changed files with 54 additions and 12 deletions

View File

@ -2,6 +2,7 @@ package cmd
import (
"fmt"
"github.com/mohae/deepcopy"
"github.com/okzk/sdnotify"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
@ -120,33 +121,46 @@ loop:
// Load new config.
log.Warnln("[Reload] Load new config")
conf, includes, err := readConfig(cfgFile)
newConf, includes, err := readConfig(cfgFile)
if err != nil {
log.WithFields(logrus.Fields{
"err": err,
}).Errorln("Failed to reload")
}).Errorln("[Reload] Failed to reload")
sdnotify.Ready()
continue
}
log.Infof("Include config files: [%v]", strings.Join(includes, ", "))
// New logger.
log = logger.NewLogger(conf.Global.LogLevel, disableTimestamp)
log = logger.NewLogger(newConf.Global.LogLevel, disableTimestamp)
logrus.SetLevel(log.Level)
// New control plane.
obj := c.EjectBpf()
log.Warnln("[Reload] Load new control plane")
newC, err := newControlPlane(log, obj, conf)
newC, err := newControlPlane(log, obj, newConf)
if err != nil {
log.WithFields(logrus.Fields{
"err": err,
}).Errorln("Failed to reload")
sdnotify.Ready()
continue
}).Errorln("[Reload] Failed to reload; try to roll back configuration")
// Load last config back.
newC, err = newControlPlane(log, obj, conf)
if err != nil {
sdnotify.Stopping()
obj.Close()
c.Close()
log.WithFields(logrus.Fields{
"err": err,
}).Fatalln("[Reload] Failed to roll back configuration")
}
log.Warnln("[Reload] Last reload failed; rolled back configuration")
} else {
log.Warnln("[Reload] Stopped old control plane")
}
log.Warnln("[Reload] Stopped old control plane")
// Inject bpf objects into the new control plane life-cycle.
newC.InjectBpf(obj)
c.Close()
c = newC
conf = newConf
reloading = true
default:
break loop
@ -160,6 +174,9 @@ loop:
}
func newControlPlane(log *logrus.Logger, bpf interface{}, conf *config.Config) (c *control.ControlPlane, err error) {
// Deep copy to prevent modification.
conf = deepcopy.Copy(conf).(*config.Config)
/// Get tag -> nodeList mapping.
tagToNodeList := map[string][]string{}
if len(conf.Node) > 0 {

View File

@ -155,6 +155,8 @@ func NewControlPlane(
)
defer func() {
if err != nil {
// Flip back.
core.Flip()
_ = core.Close()
}
}()
@ -358,6 +360,9 @@ func NewControlPlane(
func (c *ControlPlane) EjectBpf() *bpfObjects {
return c.core.EjectBpf()
}
func (c *ControlPlane) InjectBpf(bpf *bpfObjects) {
c.core.InjectBpf(bpf)
}
func (c *ControlPlane) dnsUpstreamReadyCallback(raw *url.URL, dnsUpstream *dns.Upstream) (err error) {
// Waiting for ready.

View File

@ -35,8 +35,9 @@ type controlPlaneCore struct {
kernelVersion *internal.Version
flip int
isReload bool
flip int
isReload bool
bpfEjected bool
}
func newControlPlaneCore(log *logrus.Logger,
@ -48,9 +49,13 @@ func newControlPlaneCore(log *logrus.Logger,
if isReload {
coreFlip = coreFlip&1 ^ 1
}
var deferFuncs []func() error
if !isReload {
deferFuncs = append(deferFuncs, bpf.Close)
}
return &controlPlaneCore{
log: log,
deferFuncs: []func() error{bpf.Close},
deferFuncs: deferFuncs,
bpf: bpf,
outboundId2Name: outboundId2Name,
kernelVersion: kernelVersion,
@ -59,6 +64,9 @@ func newControlPlaneCore(log *logrus.Logger,
}
}
func (c *controlPlaneCore) Flip() {
coreFlip = coreFlip&1 ^ 1
}
func (c *controlPlaneCore) Close() (err error) {
// Invoke defer funcs in reverse order.
for i := len(c.deferFuncs) - 1; i >= 0; i-- {
@ -513,6 +521,18 @@ func (c *controlPlaneCore) BatchUpdateDomainRouting(cache *DnsCache) error {
// EjectBpf will resect bpf from destroying life-cycle of control plane core.
func (c *controlPlaneCore) EjectBpf() *bpfObjects {
c.deferFuncs = c.deferFuncs[1:]
if !c.bpfEjected && !c.isReload {
c.deferFuncs = c.deferFuncs[1:]
}
c.bpfEjected = true
return c.bpf
}
// InjectBpf will inject bpf back.
func (c *controlPlaneCore) InjectBpf(bpf *bpfObjects) {
if c.bpfEjected {
c.bpfEjected = false
c.deferFuncs = append([]func() error{bpf.Close}, c.deferFuncs...)
}
return
}