dae/control/netns_utils.go

400 lines
11 KiB
Go
Raw Normal View History

package control
import (
"fmt"
"net"
"os"
"path"
"runtime"
"sync"
"sync/atomic"
"github.com/daeuniverse/dae/common/consts"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
const (
NsName = "daens"
HostVethName = "dae0"
NsVethName = "dae0peer"
)
var (
daeNetns *DaeNetns
once sync.Once
)
type DaeNetns struct {
log *logrus.Logger
setupDone atomic.Bool
mu sync.Mutex
dae0, dae0peer netlink.Link
hostNs, daeNs netns.NsHandle
}
func InitDaeNetns(log *logrus.Logger) {
once.Do(func() {
daeNetns = &DaeNetns{}
})
daeNetns.log = log
}
func GetDaeNetns() *DaeNetns {
return daeNetns
}
func (ns *DaeNetns) NetnsID() (int, error) {
return netlink.GetNetNsIdByFd(int(ns.daeNs))
}
func (ns *DaeNetns) Dae0() netlink.Link {
return ns.dae0
}
func (ns *DaeNetns) Dae0Peer() netlink.Link {
return ns.dae0peer
}
func (ns *DaeNetns) Setup() (err error) {
if ns.setupDone.Load() {
return
}
ns.mu.Lock()
defer ns.mu.Unlock()
if ns.setupDone.Load() {
return
}
if err = ns.setup(); err != nil {
return
}
ns.setupDone.Store(true)
return nil
}
func (ns *DaeNetns) Close() (err error) {
DeleteNamedNetns(NsName)
DeleteLink(HostVethName)
return
}
func (ns *DaeNetns) With(f func() error) (err error) {
if err = daeNetns.Setup(); err != nil {
return fmt.Errorf("failed to setup dae netns: %v", err)
}
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
if err = f(); err != nil {
return fmt.Errorf("failed to run func in dae netns: %v", err)
}
return
}
func (ns *DaeNetns) setup() (err error) {
ns.log.Trace("setting up dae netns")
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if ns.hostNs, err = netns.Get(); err != nil {
return fmt.Errorf("failed to get host netns: %v", err)
}
defer netns.Set(ns.hostNs)
if err = ns.setupVeth(); err != nil {
return
}
if err = ns.setupNetns(); err != nil {
return
}
if err = ns.setupSysctl(); err != nil {
return
}
if err = ns.setupIPv4Datapath(); err != nil {
return
}
if err = ns.setupIPv6Datapath(); err != nil {
return
}
if err = ns.setupRoutingPolicy(); err != nil {
return
}
return
}
func (ns *DaeNetns) setupRoutingPolicy() (err error) {
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
/// Insert ip rule / ip route.
var table = 2023
/** ip table
ip route add local default dev lo table 2023
ip -6 route add local default dev lo table 2023
*/
routes := []netlink.Route{{
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0},
Mask: net.CIDRMask(0, 32),
},
Table: table,
Type: unix.RTN_LOCAL,
}, {
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Mask: net.CIDRMask(0, 128),
},
Table: table,
Type: unix.RTN_LOCAL,
}}
for _, route := range routes {
if err = netlink.RouteAdd(&route); err != nil {
if len(route.Dst.IP) == net.IPv6len {
// ipv6
ns.log.Warnln("IpRouteAdd: Bad IPv6 support. Perhaps your machine disabled IPv6.")
continue
}
return fmt.Errorf("IpRouteAdd: %w", err)
}
}
/** ip rule
ip rule add fwmark 0x8000000/0x8000000 table 2023
ip -6 rule add fwmark 0x8000000/0x8000000 table 2023
*/
rules := []netlink.Rule{{
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}, {
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET6,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}}
for _, rule := range rules {
if err = netlink.RuleAdd(&rule); err != nil {
if rule.Family == unix.AF_INET6 {
// ipv6
ns.log.Warnln("IpRuleAdd: Bad IPv6 support. Perhaps your machine disabled IPv6 (need CONFIG_IPV6_MULTIPLE_TABLES).")
continue
}
return fmt.Errorf("IpRuleAdd: %w", err)
}
}
return nil
}
func (ns *DaeNetns) setupVeth() (err error) {
// ip l a dae0 type veth peer name dae0peer
DeleteLink(HostVethName)
if err = netlink.LinkAdd(&netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: HostVethName,
TxQLen: 1000,
},
PeerName: NsVethName,
}); err != nil {
return fmt.Errorf("failed to add veth pair: %v", err)
}
if ns.dae0, err = netlink.LinkByName(HostVethName); err != nil {
return fmt.Errorf("failed to get link dae0: %v", err)
}
if ns.dae0peer, err = netlink.LinkByName(NsVethName); err != nil {
return fmt.Errorf("failed to get link dae0peer: %v", err)
}
// ip l s dae0 up
if err = netlink.LinkSetUp(ns.dae0); err != nil {
return fmt.Errorf("failed to set link dae0 up: %v", err)
}
return
}
func (ns *DaeNetns) setupNetns() (err error) {
// ip netns a daens
DeleteNamedNetns(NsName)
ns.daeNs, err = netns.NewNamed(NsName)
if err != nil {
return fmt.Errorf("failed to create netns: %v", err)
}
// NewNamed() will switch to the new netns, switch back to host netns
if err = netns.Set(ns.hostNs); err != nil {
return fmt.Errorf("failed to switch to host netns: %v", err)
}
// ip l s dae0peer netns daens
if err = netlink.LinkSetNsFd(ns.dae0peer, int(ns.daeNs)); err != nil {
return fmt.Errorf("failed to move dae0peer to daens: %v", err)
}
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip l s dae0peer up
if err = netlink.LinkSetUp(ns.dae0peer); err != nil {
return fmt.Errorf("failed to set link dae0peer up: %v", err)
}
// re-fetch dae0peer to make sure we have the latest mac address
if ns.dae0peer, err = netlink.LinkByName(NsVethName); err != nil {
return fmt.Errorf("failed to get link dae0peer: %v", err)
}
lo, err := netlink.LinkByName("lo")
if err != nil {
return fmt.Errorf("failed to get link lo: %v", err)
}
// (ip net e daens) ip l s lo up
if err = netlink.LinkSetUp(lo); err != nil {
return fmt.Errorf("failed to set link lo up: %v", err)
}
return
}
func (ns *DaeNetns) setupSysctl() (err error) {
// sysctl net.ipv6.conf.dae0.disable_ipv6=0
if err = sysctl.Set(fmt.Sprintf("net.ipv6.conf.%s.disable_ipv6", HostVethName), "0", true); err != nil {
return fmt.Errorf("failed to set disable_ipv6 for dae0: %v", err)
}
// sysctl net.ipv6.conf.dae0.forwarding=1
if err = sysctl.Set(fmt.Sprintf("net.ipv6.conf.%s.forwarding", HostVethName), "1", true); err != nil {
return fmt.Errorf("failed to set forwarding for dae0: %v", err)
}
// sysctl net.ipv6.conf.all.forwarding=1
SetForwarding("all", "1")
// *_early_demux is not mandatory, but it's recommended to enable it for better performance
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
sysctl.Set("net.ipv4.tcp_early_demux", "1", false)
sysctl.Set("net.ipv4.ip_early_demux", "1", false)
return
}
func (ns *DaeNetns) setupIPv4Datapath() (err error) {
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip a a 169.254.0.11 dev dae0peer
// Although transparent UDP socket doesn't use this IP, it's still needed to make proper L3 header
ip, ipNet, err := net.ParseCIDR("169.254.0.11/32")
ipNet.IP = ip
if err != nil {
return fmt.Errorf("failed to parse ip 169.254.0.11: %v", err)
}
if err = netlink.AddrAdd(ns.dae0peer, &netlink.Addr{IPNet: ipNet}); err != nil {
return fmt.Errorf("failed to add v4 addr to dae0peer: %v", err)
}
// (ip net e daens) ip r a 169.254.0.1 dev dae0peer
// 169.254.0.1 is the link-local address used for ARP caching
if err = netlink.RouteAdd(&netlink.Route{
LinkIndex: ns.dae0peer.Attrs().Index,
Dst: &net.IPNet{IP: net.ParseIP("169.254.0.1"), Mask: net.CIDRMask(32, 32)},
Gw: nil,
Scope: netlink.SCOPE_LINK,
}); err != nil {
return fmt.Errorf("failed to add v4 route1 to dae0peer: %v", err)
}
// (ip net e daens) ip r a default via 169.254.0.1 dev dae0peer
if err = netlink.RouteAdd(&netlink.Route{
LinkIndex: ns.dae0peer.Attrs().Index,
Dst: &net.IPNet{IP: net.IPv4(0, 0, 0, 0), Mask: net.CIDRMask(0, 32)},
Gw: net.ParseIP("169.254.0.1"),
}); err != nil {
return fmt.Errorf("failed to add v4 route2 to dae0peer: %v", err)
}
// (ip net e daens) ip n r 169.254.0.1 dev dae0peer lladdr $mac_dae0 nud permanent
if err = netlink.NeighSet(&netlink.Neigh{
IP: net.ParseIP("169.254.0.1"),
HardwareAddr: ns.dae0.Attrs().HardwareAddr,
LinkIndex: ns.dae0peer.Attrs().Index,
State: netlink.NUD_PERMANENT,
}); err != nil {
return fmt.Errorf("failed to add neigh to dae0peer: %v", err)
}
return
}
func (ns *DaeNetns) setupIPv6Datapath() (err error) {
// ip -6 a a fe80::ecee:eeff:feee:eeee/128 dev dae0 scope link
// fe80::ecee:eeff:feee:eeee/128 is the link-local address used for L2 NDP addressing
if err = netlink.AddrAdd(ns.dae0, &netlink.Addr{
IPNet: &net.IPNet{
IP: net.ParseIP("fe80::ecee:eeff:feee:eeee"),
Mask: net.CIDRMask(128, 128),
},
}); err != nil {
return fmt.Errorf("failed to add v6 addr to dae0: %v", err)
}
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip -6 r a default via fe80::ecee:eeff:feee:eeee dev dae0peer
if err = netlink.RouteAdd(&netlink.Route{
LinkIndex: ns.dae0peer.Attrs().Index,
Dst: &net.IPNet{IP: net.IPv6zero, Mask: net.CIDRMask(0, 128)},
Gw: net.ParseIP("fe80::ecee:eeff:feee:eeee"),
}); err != nil {
return fmt.Errorf("failed to add v6 route to dae0peer: %v", err)
}
// (ip net e daens) ip n r fe80::ecee:eeff:feee:eeee dev dae0peer lladdr $mac_dae0 nud permanent
if err = netlink.NeighSet(&netlink.Neigh{
IP: net.ParseIP("fe80::ecee:eeff:feee:eeee"),
HardwareAddr: ns.dae0.Attrs().HardwareAddr,
LinkIndex: ns.dae0peer.Attrs().Index,
State: netlink.NUD_PERMANENT,
}); err != nil {
return fmt.Errorf("failed to add neigh to dae0peer: %v", err)
}
return
}
func DeleteNamedNetns(name string) error {
namedPath := path.Join("/run/netns", name)
unix.Unmount(namedPath, unix.MNT_DETACH|unix.MNT_FORCE)
return os.Remove(namedPath)
}
func DeleteLink(name string) error {
link, err := netlink.LinkByName(name)
if err == nil {
return netlink.LinkDel(link)
}
return err
}