2024-01-11 20:33:16 +07:00
package control
import (
"fmt"
"net"
"os"
"path"
"runtime"
"sync"
"sync/atomic"
2024-03-01 17:27:02 +07:00
"github.com/daeuniverse/dae/common/consts"
2024-01-11 20:33:16 +07:00
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
const (
NsName = "daens"
HostVethName = "dae0"
NsVethName = "dae0peer"
)
var (
daeNetns * DaeNetns
2024-01-29 17:56:11 +07:00
once sync . Once
2024-01-11 20:33:16 +07:00
)
type DaeNetns struct {
2024-01-23 20:11:44 +07:00
log * logrus . Logger
2024-01-11 20:33:16 +07:00
setupDone atomic . Bool
mu sync . Mutex
dae0 , dae0peer netlink . Link
hostNs , daeNs netns . NsHandle
}
2024-01-23 20:11:44 +07:00
func InitDaeNetns ( log * logrus . Logger ) {
2024-01-29 17:56:11 +07:00
once . Do ( func ( ) {
daeNetns = & DaeNetns { }
} )
daeNetns . log = log
2024-01-11 20:33:16 +07:00
}
func GetDaeNetns ( ) * DaeNetns {
return daeNetns
}
2024-03-01 17:27:02 +07:00
func ( ns * DaeNetns ) NetnsID ( ) ( int , error ) {
return netlink . GetNetNsIdByFd ( int ( ns . daeNs ) )
}
func ( ns * DaeNetns ) Dae0 ( ) netlink . Link {
return ns . dae0
}
func ( ns * DaeNetns ) Dae0Peer ( ) netlink . Link {
return ns . dae0peer
}
2024-01-11 20:33:16 +07:00
func ( ns * DaeNetns ) Setup ( ) ( err error ) {
if ns . setupDone . Load ( ) {
return
}
ns . mu . Lock ( )
defer ns . mu . Unlock ( )
if ns . setupDone . Load ( ) {
return
}
if err = ns . setup ( ) ; err != nil {
return
}
ns . setupDone . Store ( true )
return nil
}
func ( ns * DaeNetns ) Close ( ) ( err error ) {
DeleteNamedNetns ( NsName )
DeleteLink ( HostVethName )
return
}
func ( ns * DaeNetns ) With ( f func ( ) error ) ( err error ) {
if err = daeNetns . Setup ( ) ; err != nil {
return fmt . Errorf ( "failed to setup dae netns: %v" , err )
}
runtime . LockOSThread ( )
defer runtime . UnlockOSThread ( )
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
if err = f ( ) ; err != nil {
return fmt . Errorf ( "failed to run func in dae netns: %v" , err )
}
return
}
func ( ns * DaeNetns ) setup ( ) ( err error ) {
2024-01-23 20:11:44 +07:00
ns . log . Trace ( "setting up dae netns" )
2024-01-11 20:33:16 +07:00
runtime . LockOSThread ( )
defer runtime . UnlockOSThread ( )
if ns . hostNs , err = netns . Get ( ) ; err != nil {
return fmt . Errorf ( "failed to get host netns: %v" , err )
}
defer netns . Set ( ns . hostNs )
if err = ns . setupVeth ( ) ; err != nil {
return
}
2024-03-01 17:27:02 +07:00
if err = ns . setupNetns ( ) ; err != nil {
2024-01-11 20:33:16 +07:00
return
}
2024-03-01 17:27:02 +07:00
if err = ns . setupSysctl ( ) ; err != nil {
2024-01-11 20:33:16 +07:00
return
}
if err = ns . setupIPv4Datapath ( ) ; err != nil {
return
}
if err = ns . setupIPv6Datapath ( ) ; err != nil {
return
}
2024-03-01 17:27:02 +07:00
if err = ns . setupRoutingPolicy ( ) ; err != nil {
return
}
2024-01-11 20:33:16 +07:00
return
}
2024-03-01 17:27:02 +07:00
func ( ns * DaeNetns ) setupRoutingPolicy ( ) ( err error ) {
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
/// Insert ip rule / ip route.
var table = 2023
/ * * ip table
ip route add local default dev lo table 2023
ip - 6 route add local default dev lo table 2023
* /
routes := [ ] netlink . Route { {
Scope : unix . RT_SCOPE_HOST ,
LinkIndex : consts . LoopbackIfIndex ,
Dst : & net . IPNet {
IP : [ ] byte { 0 , 0 , 0 , 0 } ,
Mask : net . CIDRMask ( 0 , 32 ) ,
} ,
Table : table ,
Type : unix . RTN_LOCAL ,
} , {
Scope : unix . RT_SCOPE_HOST ,
LinkIndex : consts . LoopbackIfIndex ,
Dst : & net . IPNet {
IP : [ ] byte { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
Mask : net . CIDRMask ( 0 , 128 ) ,
} ,
Table : table ,
Type : unix . RTN_LOCAL ,
} }
for _ , route := range routes {
if err = netlink . RouteAdd ( & route ) ; err != nil {
if len ( route . Dst . IP ) == net . IPv6len {
// ipv6
ns . log . Warnln ( "IpRouteAdd: Bad IPv6 support. Perhaps your machine disabled IPv6." )
continue
}
return fmt . Errorf ( "IpRouteAdd: %w" , err )
}
}
/ * * ip rule
ip rule add fwmark 0x8000000 / 0x8000000 table 2023
ip - 6 rule add fwmark 0x8000000 / 0x8000000 table 2023
* /
rules := [ ] netlink . Rule { {
SuppressIfgroup : - 1 ,
SuppressPrefixlen : - 1 ,
Priority : - 1 ,
Goto : - 1 ,
Flow : - 1 ,
Family : unix . AF_INET ,
Table : table ,
Mark : int ( consts . TproxyMark ) ,
Mask : int ( consts . TproxyMark ) ,
} , {
SuppressIfgroup : - 1 ,
SuppressPrefixlen : - 1 ,
Priority : - 1 ,
Goto : - 1 ,
Flow : - 1 ,
Family : unix . AF_INET6 ,
Table : table ,
Mark : int ( consts . TproxyMark ) ,
Mask : int ( consts . TproxyMark ) ,
} }
for _ , rule := range rules {
if err = netlink . RuleAdd ( & rule ) ; err != nil {
if rule . Family == unix . AF_INET6 {
// ipv6
ns . log . Warnln ( "IpRuleAdd: Bad IPv6 support. Perhaps your machine disabled IPv6 (need CONFIG_IPV6_MULTIPLE_TABLES)." )
continue
}
return fmt . Errorf ( "IpRuleAdd: %w" , err )
}
}
return nil
}
2024-01-11 20:33:16 +07:00
func ( ns * DaeNetns ) setupVeth ( ) ( err error ) {
// ip l a dae0 type veth peer name dae0peer
DeleteLink ( HostVethName )
if err = netlink . LinkAdd ( & netlink . Veth {
LinkAttrs : netlink . LinkAttrs {
2024-03-01 17:27:02 +07:00
Name : HostVethName ,
TxQLen : 1000 ,
2024-01-11 20:33:16 +07:00
} ,
PeerName : NsVethName ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add veth pair: %v" , err )
}
if ns . dae0 , err = netlink . LinkByName ( HostVethName ) ; err != nil {
return fmt . Errorf ( "failed to get link dae0: %v" , err )
}
if ns . dae0peer , err = netlink . LinkByName ( NsVethName ) ; err != nil {
return fmt . Errorf ( "failed to get link dae0peer: %v" , err )
}
// ip l s dae0 up
if err = netlink . LinkSetUp ( ns . dae0 ) ; err != nil {
return fmt . Errorf ( "failed to set link dae0 up: %v" , err )
}
return
}
func ( ns * DaeNetns ) setupNetns ( ) ( err error ) {
// ip netns a daens
DeleteNamedNetns ( NsName )
ns . daeNs , err = netns . NewNamed ( NsName )
if err != nil {
return fmt . Errorf ( "failed to create netns: %v" , err )
}
// NewNamed() will switch to the new netns, switch back to host netns
if err = netns . Set ( ns . hostNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to host netns: %v" , err )
}
// ip l s dae0peer netns daens
if err = netlink . LinkSetNsFd ( ns . dae0peer , int ( ns . daeNs ) ) ; err != nil {
return fmt . Errorf ( "failed to move dae0peer to daens: %v" , err )
}
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
// (ip net e daens) ip l s dae0peer up
if err = netlink . LinkSetUp ( ns . dae0peer ) ; err != nil {
return fmt . Errorf ( "failed to set link dae0peer up: %v" , err )
}
2024-03-01 17:27:02 +07:00
// re-fetch dae0peer to make sure we have the latest mac address
if ns . dae0peer , err = netlink . LinkByName ( NsVethName ) ; err != nil {
return fmt . Errorf ( "failed to get link dae0peer: %v" , err )
}
lo , err := netlink . LinkByName ( "lo" )
if err != nil {
return fmt . Errorf ( "failed to get link lo: %v" , err )
}
// (ip net e daens) ip l s lo up
if err = netlink . LinkSetUp ( lo ) ; err != nil {
return fmt . Errorf ( "failed to set link lo up: %v" , err )
}
return
}
func ( ns * DaeNetns ) setupSysctl ( ) ( err error ) {
// sysctl net.ipv6.conf.dae0.disable_ipv6=0
2024-06-11 20:33:21 +07:00
if err = sysctl . Keyf ( "net.ipv6.conf.%s.disable_ipv6" , HostVethName ) . Set ( "0" , true ) ; err != nil {
2024-03-01 17:27:02 +07:00
return fmt . Errorf ( "failed to set disable_ipv6 for dae0: %v" , err )
}
// sysctl net.ipv6.conf.dae0.forwarding=1
2024-06-11 20:33:21 +07:00
if err = sysctl . Keyf ( "net.ipv6.conf.%s.forwarding" , HostVethName ) . Set ( "1" , true ) ; err != nil {
2024-03-01 17:27:02 +07:00
return fmt . Errorf ( "failed to set forwarding for dae0: %v" , err )
}
2024-03-08 22:28:40 +07:00
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
2024-05-03 18:11:28 +07:00
// *_early_demux is not mandatory, but it's recommended to enable it for better performance
2024-06-11 20:33:21 +07:00
sysctl . Keyf ( "net.ipv4.tcp_early_demux" ) . Set ( "1" , false )
sysctl . Keyf ( "net.ipv4.ip_early_demux" ) . Set ( "1" , false )
2024-05-03 18:11:28 +07:00
// (ip net e daens) sysctl net.ipv4.conf.dae0peer.accept_local=1
// This is to prevent kernel from dropping skb due to "martian source" check: https://elixir.bootlin.com/linux/v6.6/source/net/ipv4/fib_frontend.c#L381
2024-06-11 20:33:21 +07:00
if err = sysctl . Keyf ( "net.ipv4.conf.%s.accept_local" , NsVethName ) . Set ( "1" , false ) ; err != nil {
2024-05-03 18:11:28 +07:00
return fmt . Errorf ( "failed to set accept_local for dae0peer: %v" , err )
}
2024-03-01 17:27:02 +07:00
return
}
func ( ns * DaeNetns ) setupIPv4Datapath ( ) ( err error ) {
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
2024-01-11 20:33:16 +07:00
// (ip net e daens) ip a a 169.254.0.11 dev dae0peer
// Although transparent UDP socket doesn't use this IP, it's still needed to make proper L3 header
ip , ipNet , err := net . ParseCIDR ( "169.254.0.11/32" )
ipNet . IP = ip
if err != nil {
return fmt . Errorf ( "failed to parse ip 169.254.0.11: %v" , err )
}
if err = netlink . AddrAdd ( ns . dae0peer , & netlink . Addr { IPNet : ipNet } ) ; err != nil {
return fmt . Errorf ( "failed to add v4 addr to dae0peer: %v" , err )
}
// (ip net e daens) ip r a 169.254.0.1 dev dae0peer
// 169.254.0.1 is the link-local address used for ARP caching
if err = netlink . RouteAdd ( & netlink . Route {
LinkIndex : ns . dae0peer . Attrs ( ) . Index ,
Dst : & net . IPNet { IP : net . ParseIP ( "169.254.0.1" ) , Mask : net . CIDRMask ( 32 , 32 ) } ,
Gw : nil ,
Scope : netlink . SCOPE_LINK ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add v4 route1 to dae0peer: %v" , err )
}
// (ip net e daens) ip r a default via 169.254.0.1 dev dae0peer
if err = netlink . RouteAdd ( & netlink . Route {
LinkIndex : ns . dae0peer . Attrs ( ) . Index ,
Dst : & net . IPNet { IP : net . IPv4 ( 0 , 0 , 0 , 0 ) , Mask : net . CIDRMask ( 0 , 32 ) } ,
Gw : net . ParseIP ( "169.254.0.1" ) ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add v4 route2 to dae0peer: %v" , err )
}
// (ip net e daens) ip n r 169.254.0.1 dev dae0peer lladdr $mac_dae0 nud permanent
2024-03-01 17:27:02 +07:00
if err = netlink . NeighSet ( & netlink . Neigh {
IP : net . ParseIP ( "169.254.0.1" ) ,
HardwareAddr : ns . dae0 . Attrs ( ) . HardwareAddr ,
LinkIndex : ns . dae0peer . Attrs ( ) . Index ,
State : netlink . NUD_PERMANENT ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add neigh to dae0peer: %v" , err )
}
2024-01-11 20:33:16 +07:00
return
}
func ( ns * DaeNetns ) setupIPv6Datapath ( ) ( err error ) {
// ip -6 a a fe80::ecee:eeff:feee:eeee/128 dev dae0 scope link
// fe80::ecee:eeff:feee:eeee/128 is the link-local address used for L2 NDP addressing
if err = netlink . AddrAdd ( ns . dae0 , & netlink . Addr {
IPNet : & net . IPNet {
IP : net . ParseIP ( "fe80::ecee:eeff:feee:eeee" ) ,
Mask : net . CIDRMask ( 128 , 128 ) ,
} ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add v6 addr to dae0: %v" , err )
}
if err = netns . Set ( ns . daeNs ) ; err != nil {
return fmt . Errorf ( "failed to switch to daens: %v" , err )
}
defer netns . Set ( ns . hostNs )
// (ip net e daens) ip -6 r a default via fe80::ecee:eeff:feee:eeee dev dae0peer
if err = netlink . RouteAdd ( & netlink . Route {
LinkIndex : ns . dae0peer . Attrs ( ) . Index ,
Dst : & net . IPNet { IP : net . IPv6zero , Mask : net . CIDRMask ( 0 , 128 ) } ,
Gw : net . ParseIP ( "fe80::ecee:eeff:feee:eeee" ) ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add v6 route to dae0peer: %v" , err )
}
2024-03-01 17:27:02 +07:00
// (ip net e daens) ip n r fe80::ecee:eeff:feee:eeee dev dae0peer lladdr $mac_dae0 nud permanent
2024-01-11 20:33:16 +07:00
if err = netlink . NeighSet ( & netlink . Neigh {
2024-03-01 17:27:02 +07:00
IP : net . ParseIP ( "fe80::ecee:eeff:feee:eeee" ) ,
2024-01-11 20:33:16 +07:00
HardwareAddr : ns . dae0 . Attrs ( ) . HardwareAddr ,
LinkIndex : ns . dae0peer . Attrs ( ) . Index ,
State : netlink . NUD_PERMANENT ,
} ) ; err != nil {
return fmt . Errorf ( "failed to add neigh to dae0peer: %v" , err )
}
return
}
func DeleteNamedNetns ( name string ) error {
namedPath := path . Join ( "/run/netns" , name )
unix . Unmount ( namedPath , unix . MNT_DETACH | unix . MNT_FORCE )
return os . Remove ( namedPath )
}
func DeleteLink ( name string ) error {
link , err := netlink . LinkByName ( name )
if err == nil {
return netlink . LinkDel ( link )
}
return err
}