feat(bpf): implement stack bypass (#458)

Co-authored-by: Sumire (菫) <151038614+sumire88@users.noreply.github.com>
This commit is contained in:
/gray 2024-03-01 18:27:02 +08:00 committed by GitHub
parent bb3a07cd15
commit 6f1db5ee1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 479 additions and 511 deletions

View File

@ -40,7 +40,7 @@ jobs:
strategy:
fail-fast: false
matrix:
kernel: [ '5.10-v0.3', '5.15-v0.3', '6.3-main', 'bpf-next-20231030.012704' ]
kernel: [ '5.10-20240201.165956', '5.15-20240201.165956', '6.1-20240201.165956', 'bpf-next-20240204.012837' ]
timeout-minutes: 10
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
@ -52,7 +52,7 @@ jobs:
path: dae
- name: Provision LVH VMs
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
test-name: dae-test
image-version: ${{ matrix.kernel }}
@ -66,7 +66,7 @@ jobs:
apt install -y unzip
- name: Setup network
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -77,7 +77,7 @@ jobs:
docker run -td --name dae --privileged --network dae -v /host:/host -v /sys:/sys ubuntu:22.04 bash
- name: Setup v2ray server
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -125,7 +125,7 @@ jobs:
echo '{"v":"2","ps":"test","add":"v2ray","port":"23333","id":"b004539e-0d7b-7996-c378-fb040e42de70","aid":"0","net":"tcp","tls":"","type":"none","path":"","host":"v2ray"}' > vmess.json
- name: Setup dae server
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -165,11 +165,11 @@ jobs:
chmod 600 ./conf.dae
nohup docker exec dae /host/dae/dae run -c /host/conf.dae &> dae.log &
sleep 10s
sleep 5s
cat dae.log
- name: Check WAN IPv4 TCP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -180,7 +180,7 @@ jobs:
cat /host/v2ray.access.log | grep -q 'accepted tcp:1.1.1.1:443'
- name: Check WAN IPv4 UDP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -191,7 +191,7 @@ jobs:
cat /host/v2ray.access.log | grep -q 'accepted udp:1.1.1.1:53'
- name: Check WAN IPv6 TCP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -202,7 +202,41 @@ jobs:
cat /host/dae.log | grep -F -- '-> [2606:4700:4700::1111]:443'
- name: Check WAN IPv6 UDP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker exec dae dig @2606:4700:4700::1111 one.one.one.one
cat /host/dae.log | grep -F -- '-> [2606:4700:4700::1111]:53'
- name: Setup WAN UDP port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker restart -t0 dae v2ray
nohup docker exec v2ray v2ray -c /host/v2ray.json &> v2ray.log &
nohup docker exec dae /host/dae/dae run -c /host/conf.dae &> dae.log &
sleep 5s
nohup docker exec dae nc -lu 53 &> nc.log &
- name: Check WAN IPv4 UDP with port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker exec dae dig @1.1.1.1 one.one.one.one
cat /host/dae.log | grep -F -- '-> 1.1.1.1:53'
cat /host/v2ray.access.log | grep -q 'accepted udp:1.1.1.1:53'
- name: Check WAN IPv6 UDP with port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -212,7 +246,7 @@ jobs:
cat /host/dae.log | grep -F -- '-> [2606:4700:4700::1111]:53'
- name: Setup LAN
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -261,7 +295,7 @@ jobs:
log_level: trace
lan_interface: dae-veth-peer
wan_interface: auto
wan_interface: dae-veth-peer,eth0
allow_insecure: false
auto_config_kernel_parameter: true
}
@ -284,11 +318,11 @@ jobs:
chmod 600 ./conf.dae
nohup docker exec dae /host/dae/dae run -c /host/conf.dae &> dae.log &
sleep 10s
sleep 5s
cat dae.log
- name: Check LAN IPv4 TCP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -299,7 +333,7 @@ jobs:
cat /host/v2ray.access.log | grep -q 'accepted tcp:1.0.0.1:80'
- name: Check LAN IPv4 UDP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -310,7 +344,7 @@ jobs:
cat /host/v2ray.access.log | grep -q 'accepted udp:8.8.4.4:53'
- name: Check LAN IPv6 TCP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
@ -321,7 +355,47 @@ jobs:
cat /host/dae.log | grep -F -- '-> [2606:4700:4700::1001]:80'
- name: Check LAN IPv6 UDP
uses: cilium/little-vm-helper@908ab1ff8a596a03cd5221a1f8602dc44c3f906d # v0.0.12
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker exec dae ip net e dae dig @2001:4860:4860::8844 one.one.one.one
cat /host/dae.log | grep -F -- '-> [2001:4860:4860::8844]:53'
- name: Setup LAN UDP port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker restart -t0 dae v2ray
docker exec dae rm -f /var/run/netns/dae
docker exec dae bash /host/lan.bash
docker exec dae sysctl net.ipv4.conf.dae-veth-peer.send_redirects=0
docker exec dae sysctl net.ipv6.conf.dae-veth-peer.forwarding=1
nohup docker exec v2ray v2ray -c /host/v2ray.json &> v2ray.log &
nohup docker exec dae /host/dae/dae run -c /host/conf.dae &> dae.log &
sleep 5s
nohup docker exec dae nc -lu 53 &> nc.log &
- name: Check LAN IPv4 UDP with port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |
set -ex
docker exec dae ip net e dae dig @8.8.4.4 one.one.one.one
cat /host/dae.log | grep -F -- '-> 8.8.4.4:53'
cat /host/v2ray.access.log | grep -q 'accepted udp:8.8.4.4:53'
- name: Check LAN IPv6 UDP with port conflict
uses: cilium/little-vm-helper@9d758b756305e83718a51b792a5aeabd022a39ec # v0.0.16
with:
provision: 'false'
cmd: |

View File

@ -135,9 +135,12 @@ func Run(log *logrus.Logger, conf *config.Config, externGeoDataDirs []string) (e
_ = os.WriteFile(PidFilePath, []byte(strconv.Itoa(os.Getpid())), 0644)
}
}()
if listener, err = c.ListenAndServe(readyChan, conf.Global.TproxyPort); err != nil {
log.Errorln("ListenAndServe:", err)
}
control.GetDaeNetns().With(func() error {
if listener, err = c.ListenAndServe(readyChan, conf.Global.TproxyPort); err != nil {
log.Errorln("ListenAndServe:", err)
}
return err
})
sigs <- nil
}()
reloading := false

View File

@ -35,10 +35,11 @@ type Global struct {
DialMode string `mapstructure:"dial_mode" default:"domain"`
DisableWaitingNetwork bool `mapstructure:"disable_waiting_network" default:"false"`
AutoConfigKernelParameter bool `mapstructure:"auto_config_kernel_parameter" default:"false"`
AutoConfigFirewallRule bool `mapstructure:"auto_config_firewall_rule" default:"false"`
SniffingTimeout time.Duration `mapstructure:"sniffing_timeout" default:"100ms"`
TlsImplementation string `mapstructure:"tls_implementation" default:"tls"`
UtlsImitate string `mapstructure:"utls_imitate" default:"chrome_auto"`
// DEPRECATED: not used as of https://github.com/daeuniverse/dae/pull/458
AutoConfigFirewallRule bool `mapstructure:"auto_config_firewall_rule" default:"false"`
SniffingTimeout time.Duration `mapstructure:"sniffing_timeout" default:"100ms"`
TlsImplementation string `mapstructure:"tls_implementation" default:"tls"`
UtlsImitate string `mapstructure:"utls_imitate" default:"chrome_auto"`
}
type Utls struct {

View File

@ -192,7 +192,12 @@ func (p *AnyfromPool) GetOrCreate(lAddr string, ttl time.Duration) (conn *Anyfro
},
KeepAlive: 0,
}
pc, err := d.ListenPacket(context.Background(), "udp", lAddr)
var err error
var pc net.PacketConn
GetDaeNetns().With(func() error {
pc, err = d.ListenPacket(context.Background(), "udp", lAddr)
return nil
})
if err != nil {
return nil, true, err
}

View File

@ -217,13 +217,24 @@ func fullLoadBpfObjects(
opts *loadBpfOptions,
) (err error) {
retryLoadBpf:
netnsID, err := GetDaeNetns().NetnsID()
if err != nil {
return fmt.Errorf("failed to get netns id: %w", err)
}
constants := map[string]interface{}{
"PARAM": struct {
tproxyPort uint32
controlPlanePid uint32
dae0Ifindex uint32
dae0NetnsId uint32
dae0peerMac [6]byte
padding [2]byte
}{
tproxyPort: uint32(opts.BigEndianTproxyPort),
controlPlanePid: uint32(os.Getpid()),
dae0Ifindex: uint32(GetDaeNetns().Dae0().Attrs().Index),
dae0NetnsId: uint32(netnsID),
dae0peerMac: [6]byte(GetDaeNetns().Dae0Peer().Attrs().HardwareAddr),
},
}
if err = loadBpfObjectsWithConstants(bpf, opts.CollectionOptions, constants); err != nil {

View File

@ -128,6 +128,15 @@ func NewControlPlane(
if err = rlimit.RemoveMemlock(); err != nil {
return nil, fmt.Errorf("rlimit.RemoveMemlock:%v", err)
}
InitDaeNetns(log)
if err = InitSysctlManager(log); err != nil {
return nil, err
}
if err = GetDaeNetns().Setup(); err != nil {
return nil, fmt.Errorf("failed to setup dae netns: %w", err)
}
pinPath := filepath.Join(consts.BpfPinRoot, consts.AppName)
if err = os.MkdirAll(pinPath, 0755); err != nil && !os.IsExist(err) {
if os.IsNotExist(err) {
@ -194,20 +203,6 @@ func NewControlPlane(
}
}()
if len(global.LanInterface) > 0 || len(global.WanInterface) > 0 {
if err = core.setupRoutingPolicy(); err != nil {
return nil, err
}
if global.AutoConfigFirewallRule {
if ok := core.addAcceptInputMark(); ok {
core.deferFuncs = append(core.deferFuncs, func() error {
core.delAcceptInputMark()
return nil
})
}
}
}
/// Bind to links. Binding should be advance of dialerGroups to avoid un-routable old connection.
// Bind to LAN
if len(global.LanInterface) > 0 {
@ -232,6 +227,10 @@ func NewControlPlane(
}
}
}
// Bind to dae0 and dae0peer
if err = core.bindDaens(); err != nil {
return nil, fmt.Errorf("bindDaens: %w", err)
}
/// DialerGroups (outbounds).
if global.AllowInsecure {
@ -471,11 +470,6 @@ func NewControlPlane(
}
go dnsUpstream.InitUpstreams()
InitDaeNetns(log)
if err = InitSysctlManager(log); err != nil {
return nil, err
}
close(plane.ready)
return plane, nil
}

View File

@ -9,12 +9,9 @@ import (
"context"
"errors"
"fmt"
"net"
"net/netip"
"os"
"os/exec"
"regexp"
"strings"
"sync"
"github.com/cilium/ebpf"
@ -194,175 +191,6 @@ func (c *controlPlaneCore) delQdisc(ifname string) error {
return nil
}
// TODO: Support more than firewalld and fw4: need more user feedback.
var nftInputChains = [][3]string{
{"inet", "firewalld", "filter_INPUT"},
{"inet", "fw4", "input"},
}
func (c *controlPlaneCore) addAcceptInputMark() (ok bool) {
for _, rule := range nftInputChains {
if err := exec.Command("nft", "insert rule "+strings.Join(rule[:], " ")+" mark & "+consts.TproxyMarkString+" == "+consts.TproxyMarkString+" accept").Run(); err == nil {
ok = true
}
}
return ok
}
func (c *controlPlaneCore) delAcceptInputMark() (ok bool) {
for _, rule := range nftInputChains {
output, err := exec.Command("nft", "--handle", "--numeric", "list", "chain", rule[0], rule[1], rule[2]).Output()
if err != nil {
continue
}
lines := strings.Split(string(output), "\n")
regex := regexp.MustCompile("meta mark & " + consts.TproxyMarkString + " == " + consts.TproxyMarkString + " accept # handle ([0-9]+)")
for _, line := range lines {
matches := regex.FindStringSubmatch(line)
if len(matches) >= 2 {
handle := matches[1]
if err = exec.Command("nft", "delete rule "+strings.Join(rule[:], " ")+" handle "+handle).Run(); err == nil {
ok = true
}
break
}
}
}
return ok
}
func (c *controlPlaneCore) setupRoutingPolicy() (err error) {
/// Insert ip rule / ip route.
var table = 2023 + c.flip
/** ip table
ip route add local default dev lo table 2023
ip -6 route add local default dev lo table 2023
*/
routes := []netlink.Route{{
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0},
Mask: net.CIDRMask(0, 32),
},
Table: table,
Type: unix.RTN_LOCAL,
}, {
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Mask: net.CIDRMask(0, 128),
},
Table: table,
Type: unix.RTN_LOCAL,
}}
var routeBadIpv6 bool
cleanRoutes := func() error {
var errs error
for _, route := range routes {
if e := netlink.RouteDel(&route); e != nil {
if len(route.Dst.IP) == net.IPv6len && routeBadIpv6 {
// Not clean for bad ipv6.
continue
}
if errs != nil {
errs = fmt.Errorf("%w; %v", errs, e)
} else {
errs = e
}
}
}
if errs != nil {
return fmt.Errorf("IpRouteDel(lo): %w", errs)
}
return nil
}
tryRouteAddAgain:
for _, route := range routes {
if err = netlink.RouteAdd(&route); err != nil {
if os.IsExist(err) {
_ = cleanRoutes()
goto tryRouteAddAgain
}
if len(route.Dst.IP) == net.IPv6len {
// ipv6
c.log.Warnln("IpRouteAdd: Bad IPv6 support. Perhaps your machine disabled IPv6.")
routeBadIpv6 = true
continue
}
return fmt.Errorf("IpRouteAdd: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, cleanRoutes)
/** ip rule
ip rule add fwmark 0x8000000/0x8000000 table 2023
ip -6 rule add fwmark 0x8000000/0x8000000 table 2023
*/
rules := []netlink.Rule{{
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}, {
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET6,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}}
var ruleBadIpv6 bool
cleanRules := func() error {
var errs error
for _, rule := range rules {
if rule.Family == unix.AF_INET6 && ruleBadIpv6 {
// Not clean for bad ipv6.
continue
}
if e := netlink.RuleDel(&rule); e != nil {
if errs != nil {
errs = fmt.Errorf("%w; %v", errs, e)
} else {
errs = e
}
}
}
if errs != nil {
return fmt.Errorf("IpRuleDel: %w", errs)
}
return nil
}
tryRuleAddAgain:
for _, rule := range rules {
if err = netlink.RuleAdd(&rule); err != nil {
if os.IsExist(err) {
_ = cleanRules()
goto tryRuleAddAgain
}
if rule.Family == unix.AF_INET6 {
// ipv6
c.log.Warnln("IpRuleAdd: Bad IPv6 support. Perhaps your machine disabled IPv6 (need CONFIG_IPV6_MULTIPLE_TABLES).")
ruleBadIpv6 = true
continue
}
return fmt.Errorf("IpRuleAdd: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, cleanRules)
return nil
}
func (c *controlPlaneCore) addLinkCb(_ifname string, rtmType uint16, cb func()) error {
ch := make(chan netlink.LinkUpdate)
done := make(chan struct{})
@ -555,11 +383,6 @@ func (c *controlPlaneCore) setupSkPidMonitor() error {
}
func (c *controlPlaneCore) bindWan(ifname string, autoConfigKernelParameter bool) error {
if autoConfigKernelParameter {
if err := sysctl.Set(fmt.Sprintf("net.ipv4.conf.%v.accept_local", ifname), "1", false); err != nil {
return err
}
}
return c._bindWan(ifname)
}
@ -626,36 +449,84 @@ func (c *controlPlaneCore) _bindWan(ifname string) error {
return nil
})
filterIngress := &netlink.BpfFilter{
return nil
}
func (c *controlPlaneCore) bindDaens() (err error) {
daens := GetDaeNetns()
// tproxy_dae0peer_ingress@eth0 at dae netns
daens.With(func() error {
return c.addQdisc(daens.Dae0Peer().Attrs().Name)
})
filterDae0peerIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
LinkIndex: daens.Dae0Peer().Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0x2023, 0b010+uint16(c.flip)),
Handle: netlink.MakeHandle(0x2022, 0b010+uint16(c.flip)),
Protocol: unix.ETH_P_ALL,
Priority: 1,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyWanIngress.FD(),
Name: consts.AppName + "_wan_ingress",
Fd: c.bpf.bpfPrograms.TproxyDae0peerIngress.FD(),
Name: consts.AppName + "_dae0peer_ingress",
DirectAction: true,
}
_ = netlink.FilterDel(filterIngress)
daens.With(func() error {
return netlink.FilterDel(filterDae0peerIngress)
})
// Remove and add.
if !c.isReload {
// Clean up thoroughly.
filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter)
filterIngressFlipped := deepcopy.Copy(filterDae0peerIngress).(*netlink.BpfFilter)
filterIngressFlipped.FilterAttrs.Handle ^= 1
_ = netlink.FilterDel(filterIngressFlipped)
daens.With(func() error {
return netlink.FilterDel(filterDae0peerIngress)
})
}
if err := netlink.FilterAdd(filterIngress); err != nil {
if err = daens.With(func() error {
return netlink.FilterAdd(filterDae0peerIngress)
}); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.FilterDel(filterIngress); err != nil {
return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err)
daens.With(func() error {
return netlink.FilterDel(filterDae0peerIngress)
})
return nil
})
// tproxy_dae0_ingress@dae0 at host netns
c.addQdisc(daens.Dae0().Attrs().Name)
filterDae0Ingress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: daens.Dae0().Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0x2022, 0b010+uint16(c.flip)),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyDae0Ingress.FD(),
Name: consts.AppName + "_dae0_ingress",
DirectAction: true,
}
_ = netlink.FilterDel(filterDae0Ingress)
// Remove and add.
if !c.isReload {
// Clean up thoroughly.
filterEgressFlipped := deepcopy.Copy(filterDae0Ingress).(*netlink.BpfFilter)
filterEgressFlipped.FilterAttrs.Handle ^= 1
_ = netlink.FilterDel(filterEgressFlipped)
}
if err := netlink.FilterAdd(filterDae0Ingress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter egress: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.FilterDel(filterDae0Ingress); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("FilterDel(%v:%v): %w", daens.Dae0().Attrs().Name, filterDae0Ingress.Name, err)
}
return nil
})
return nil
return
}
// BatchUpdateDomainRouting update bpf map domain_routing. Since one IP may have multiple domains, this function should

View File

@ -39,6 +39,9 @@
#define IPV6_DST_OFF(link_h_len) (link_h_len + offsetof(struct ipv6hdr, daddr))
#define IPV6_SRC_OFF(link_h_len) (link_h_len + offsetof(struct ipv6hdr, saddr))
#define PACKET_HOST 0
#define PACKET_OTHERHOST 3
#define NOWHERE_IFINDEX 0
#define LOOPBACK_IFINDEX 1
@ -108,9 +111,6 @@ struct {
__uint(max_entries, 2);
} listen_socket_map SEC(".maps");
/// TODO: Remove items from the dst_map by conntrack.
// Dest map:
union ip6 {
__u8 u6_addr8[16];
__be16 u6_addr16[8];
@ -118,6 +118,26 @@ union ip6 {
__be64 u6_addr64[2];
};
struct redirect_tuple {
union ip6 sip;
union ip6 dip;
__u8 l4proto;
};
struct redirect_entry {
__u32 ifindex;
__u8 smac[6];
__u8 dmac[6];
__u8 from_wan;
};
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct redirect_tuple);
__type(value, struct redirect_entry);
__uint(max_entries, 65536);
} redirect_track SEC(".maps");
struct ip_port {
union ip6 ip;
__be16 port;
@ -149,6 +169,10 @@ struct tuples {
struct dae_param {
__u32 tproxy_port;
__u32 control_plane_pid;
__u32 dae0_ifindex;
__u32 dae_netns_id;
__u8 dae0peer_mac[6];
__u8 padding[2];
};
static volatile const struct dae_param PARAM = {};
@ -812,15 +836,14 @@ static __always_inline __u32 get_link_h_len(__u32 ifindex,
}
static __always_inline int
assign_socket_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
__u32 len, bool established) {
lookup_and_assign_tcp_established(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, __u32 len)
{
int ret = -1;
struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, tuple, len, BPF_F_CURRENT_NETNS, 0);
if (!sk)
return -1;
if (established &&
(sk->state == BPF_TCP_LISTEN || sk->state == BPF_TCP_TIME_WAIT)) {
if (sk->state == BPF_TCP_LISTEN || sk->state == BPF_TCP_TIME_WAIT) {
goto release;
}
@ -831,9 +854,14 @@ release:
}
static __always_inline int
assign_socket_udp(struct __sk_buff *skb,
struct bpf_sock_tuple *tuple, __u32 len) {
struct bpf_sock *sk = bpf_sk_lookup_udp(skb, tuple, len, BPF_F_CURRENT_NETNS, 0);
assign_listener(struct __sk_buff *skb, __u8 l4proto)
{
struct bpf_sock *sk;
if (l4proto == IPPROTO_TCP)
sk = bpf_map_lookup_elem(&listen_socket_map, &zero_key);
else
sk = bpf_map_lookup_elem(&listen_socket_map, &one_key);
if (!sk)
return -1;
@ -843,11 +871,39 @@ assign_socket_udp(struct __sk_buff *skb,
}
static __always_inline int
assign_socket(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
__u32 len, __u8 nexthdr, bool established) {
if (nexthdr == IPPROTO_TCP)
return assign_socket_tcp(skb, tuple, len, established);
return assign_socket_udp(skb, tuple, len);
redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len,
struct tuples *tuples, __u8 l4proto,
struct ethhdr *ethh, __u8 from_wan) {
/* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */
if (!link_h_len) {
__u16 l3proto = skb->protocol;
bpf_skb_change_head(skb, sizeof(struct ethhdr), 0);
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto),
&l3proto, sizeof(l3proto), 0);
}
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
(void *)&PARAM.dae0peer_mac, sizeof(ethh->h_dest), 0);
struct redirect_tuple redirect_tuple = {};
if (skb->protocol == bpf_htons(ETH_P_IP)) {
redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3];
redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3];
} else {
__builtin_memcpy(&redirect_tuple.sip, &tuples->five.sip, IPV6_BYTE_LENGTH);
__builtin_memcpy(&redirect_tuple.dip, &tuples->five.dip, IPV6_BYTE_LENGTH);
}
redirect_tuple.l4proto = l4proto;
struct redirect_entry redirect_entry = {};
redirect_entry.ifindex = skb->ifindex;
redirect_entry.from_wan = from_wan;
__builtin_memcpy(redirect_entry.smac, ethh->h_source, sizeof(ethh->h_source));
__builtin_memcpy(redirect_entry.dmac, ethh->h_dest, sizeof(ethh->h_dest));
bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, BPF_ANY);
skb->cb[0] = TPROXY_MARK;
return bpf_redirect(PARAM.dae0_ifindex, 0);
}
SEC("tc/ingress")
@ -893,7 +949,6 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
struct bpf_sock_tuple tuple = {0};
__u32 tuple_size;
struct bpf_sock *sk;
bool is_old_conn = false;
__u32 flag[8];
void *l4hdr;
@ -917,11 +972,11 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
goto new_connection;
}
sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0);
sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, PARAM.dae_netns_id, 0);
if (sk) {
if (sk->state != BPF_TCP_LISTEN) {
is_old_conn = true;
goto assign;
bpf_sk_release(sk);
goto control_plane;
}
bpf_sk_release(sk);
}
@ -1015,45 +1070,8 @@ new_connection:
}
// Assign to control plane.
if (l4proto == IPPROTO_TCP) {
// TCP.
sk = bpf_map_lookup_elem(&listen_socket_map, &zero_key);
if (!sk || sk->state != BPF_TCP_LISTEN) {
bpf_printk("accpet tcp tproxy not listen");
goto sk_accept;
}
} else {
// UDP.
sk = bpf_map_lookup_elem(&listen_socket_map, &one_key);
if (!sk) {
bpf_printk("accpet udp tproxy not listen");
goto sk_accept;
}
}
assign:
skb->mark = TPROXY_MARK;
ret = bpf_sk_assign(skb, sk, 0);
bpf_sk_release(sk);
if (ret) {
if (is_old_conn && ret == -ESOCKTNOSUPPORT) {
bpf_printk("bpf_sk_assign: %d, perhaps you have other TPROXY programs "
"(such as v2ray) running?",
ret);
return TC_ACT_OK;
} else {
bpf_printk("bpf_sk_assign: %d", ret);
}
return TC_ACT_SHOT;
}
return TC_ACT_OK;
sk_accept:
if (sk) {
bpf_sk_release(sk);
}
control_plane:
return redirect_to_control_plane(skb, link_h_len, &tuples, l4proto, &ethh, 0);
direct:
return TC_ACT_OK;
@ -1112,9 +1130,6 @@ static __always_inline bool pid_is_control_plane(struct __sk_buff *skb,
}
}
__u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0};
__u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1};
// Routing and redirect the packet back.
// We cannot modify the dest address here. So we cooperate with wan_ingress.
SEC("tc/wan_egress")
@ -1273,18 +1288,6 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
&routing_result, BPF_ANY);
}
// Write mac.
if ((ret =
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
} else if (l4proto == IPPROTO_UDP) {
// Routing. It decides if we redirect traffic to control plane.
@ -1364,39 +1367,13 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
return TC_ACT_SHOT;
}
// Write mac.
if ((ret =
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
}
// // Print packet in hex for debugging (checksum or something else).
// if ((l4proto == IPPROTO_TCP ? tcph.dest : udph.dest) == bpf_htons(8443)) {
// bpf_printk("PRINT OUTPUT PACKET");
// for (__u32 i = 0; i < skb->len && i < 500; i++) {
// __u8 t = 0;
// bpf_skb_load_bytes(skb, i, &t, 1);
// bpf_printk("%02x", t);
// }
// }
// Redirect from egress to ingress.
if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) {
bpf_printk("Shot bpf_redirect: %d", ret);
return TC_ACT_SHOT;
}
return TC_ACT_REDIRECT;
return redirect_to_control_plane(skb, link_h_len, &tuples, l4proto, &ethh, 1);
}
SEC("tc/wan_ingress")
int tproxy_wan_ingress(struct __sk_buff *skb) {
SEC("tc/dae0peer_ingress")
int tproxy_dae0peer_ingress(struct __sk_buff *skb) {
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
@ -1405,10 +1382,12 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;
if (get_link_h_len(skb->ifindex, &link_h_len)) {
return TC_ACT_OK;
__u32 link_h_len = 14;
if (skb->cb[0] != TPROXY_MARK) {
return TC_ACT_SHOT;
}
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
if (ret) {
@ -1420,95 +1399,86 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
struct tuples tuples;
get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);
// bpf_printk("bpf_ntohs(*(__u16 *)&ethh.h_source[4]): %u",
// bpf_ntohs(*(__u16 *)&ethh.h_source[4]));
// Tproxy related.
__u16 tproxy_typ = bpf_ntohs(*(__u16 *)&ethh.h_source[4]);
if (*(__u32 *)&ethh.h_source[0] != bpf_htonl(0x02000203) || tproxy_typ > 1) {
// Check for security. Reject packets that is UDP and sent to tproxy port.
__be16 tproxy_port = PARAM.tproxy_port;
if (!tproxy_port) {
goto accept;
}
if (unlikely(tproxy_port == tuples.five.dport)) {
struct bpf_sock_tuple tuple = {0};
__u32 tuple_size;
if (skb->protocol == bpf_htons(ETH_P_IP)) {
tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3];
tuple.ipv4.dport = tuples.five.dport;
tuple_size = sizeof(tuple.ipv4);
} else {
__builtin_memcpy(tuple.ipv6.daddr, &tuples.five.dip, IPV6_BYTE_LENGTH);
tuple.ipv6.dport = tuples.five.dport;
tuple_size = sizeof(tuple.ipv6);
}
struct bpf_sock *sk =
bpf_sk_lookup_udp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0);
if (sk) {
// Scope is host.
bpf_sk_release(sk);
return TC_ACT_SHOT;
}
}
accept:
return TC_ACT_PIPE;
}
// Should send the packet to tproxy.
skb->mark = TPROXY_MARK;
struct bpf_sock_tuple tuple = {};
__u32 tuple_size = skb->protocol == bpf_htons(ETH_P_IP) ?
sizeof(tuple.ipv4) : sizeof(tuple.ipv6);
bpf_skb_change_type(skb, PACKET_HOST);
/* First look for established socket.
* This is done for TCP only, otherwise bpf_sk_lookup_udp would find
* previously created transparent socket for UDP, which is not what we want.
* */
if (l4proto == IPPROTO_TCP) {
__u32 tuple_size;
struct bpf_sock_tuple tuple = {};
if (skb->protocol == bpf_htons(ETH_P_IP)) {
tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3];
tuple.ipv4.sport = tuples.five.sport;
tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3];
tuple.ipv4.sport = tuples.five.sport;
tuple.ipv4.dport = tuples.five.dport;
tuple_size = sizeof(tuple.ipv4);
} else {
__builtin_memcpy(tuple.ipv6.saddr, &tuples.five.sip, IPV6_BYTE_LENGTH);
__builtin_memcpy(tuple.ipv6.daddr, &tuples.five.dip, IPV6_BYTE_LENGTH);
tuple.ipv6.sport = tuples.five.sport;
tuple.ipv6.dport = tuples.five.dport;
tuple_size = sizeof(tuple.ipv6);
}
ret = assign_socket(skb, &tuple, tuple_size, l4proto, true);
if (ret == 0) {
if (lookup_and_assign_tcp_established(skb, &tuple, tuple_size) == 0) {
return TC_ACT_OK;
}
}
/* Then look for tproxy listening socket */
__be16 tproxy_port = PARAM.tproxy_port;
if (!tproxy_port) {
return TC_ACT_OK;
}
if (skb->protocol == bpf_htons(ETH_P_IP)) {
tuple.ipv4.saddr = 0;
tuple.ipv4.daddr = tuples.five.sip.u6_addr32[3];
tuple.ipv4.sport = 0;
tuple.ipv4.dport = tproxy_port;
} else {
__builtin_memset(tuple.ipv6.saddr, 0, IPV6_BYTE_LENGTH);
__builtin_memcpy(tuple.ipv6.daddr, &tuples.five.sip, IPV6_BYTE_LENGTH);
tuple.ipv6.sport = 0;
tuple.ipv6.dport = tproxy_port;
}
ret = assign_socket(skb, &tuple, tuple_size, l4proto, false);
if (ret == 0) {
if (assign_listener(skb, l4proto) == 0) {
return TC_ACT_OK;
}
return TC_ACT_SHOT;
}
SEC("tc/dae0_ingress")
int tproxy_dae0_ingress(struct __sk_buff *skb) {
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
struct icmp6hdr icmp6h;
struct tcphdr tcph;
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len = 14;
if (parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto)) {
return TC_ACT_OK;
}
struct tuples tuples;
get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);
// reverse the tuple!
struct redirect_tuple redirect_tuple = {};
if (skb->protocol == bpf_htons(ETH_P_IP)) {
redirect_tuple.sip.u6_addr32[3] = tuples.five.dip.u6_addr32[3];
redirect_tuple.dip.u6_addr32[3] = tuples.five.sip.u6_addr32[3];
} else {
__builtin_memcpy(&redirect_tuple.sip, &tuples.five.dip, IPV6_BYTE_LENGTH);
__builtin_memcpy(&redirect_tuple.dip, &tuples.five.sip, IPV6_BYTE_LENGTH);
}
redirect_tuple.l4proto = l4proto;
struct redirect_entry *redirect_entry = bpf_map_lookup_elem(&redirect_track, &redirect_tuple);
if (!redirect_entry)
return TC_ACT_OK;
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
redirect_entry->dmac, sizeof(redirect_entry->dmac), 0);
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
redirect_entry->smac, sizeof(redirect_entry->smac), 0);
__u32 type = redirect_entry->from_wan ? PACKET_HOST : PACKET_OTHERHOST;
bpf_skb_change_type(skb, type);
__u64 flags = redirect_entry->from_wan ? BPF_F_INGRESS : 0;
return bpf_redirect(redirect_entry->ifindex, flags);
}
static int __always_inline _update_map_elem_by_cookie(const __u64 cookie) {
if (unlikely(!cookie)) {
bpf_printk("zero cookie");

View File

@ -1,7 +1,6 @@
package control
import (
"bytes"
"fmt"
"net"
"os"
@ -10,6 +9,7 @@ import (
"sync"
"sync/atomic"
"github.com/daeuniverse/dae/common/consts"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"github.com/vishvananda/netns"
@ -48,6 +48,18 @@ func GetDaeNetns() *DaeNetns {
return daeNetns
}
func (ns *DaeNetns) NetnsID() (int, error) {
return netlink.GetNetNsIdByFd(int(ns.daeNs))
}
func (ns *DaeNetns) Dae0() netlink.Link {
return ns.dae0
}
func (ns *DaeNetns) Dae0Peer() netlink.Link {
return ns.dae0peer
}
func (ns *DaeNetns) Setup() (err error) {
if ns.setupDone.Load() {
return
@ -104,10 +116,10 @@ func (ns *DaeNetns) setup() (err error) {
if err = ns.setupVeth(); err != nil {
return
}
if err = ns.setupSysctl(); err != nil {
if err = ns.setupNetns(); err != nil {
return
}
if err = ns.setupNetns(); err != nil {
if err = ns.setupSysctl(); err != nil {
return
}
if err = ns.setupIPv4Datapath(); err != nil {
@ -116,16 +128,100 @@ func (ns *DaeNetns) setup() (err error) {
if err = ns.setupIPv6Datapath(); err != nil {
return
}
go ns.monitorDae0LinkAddr()
if err = ns.setupRoutingPolicy(); err != nil {
return
}
return
}
func (ns *DaeNetns) setupRoutingPolicy() (err error) {
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
/// Insert ip rule / ip route.
var table = 2023
/** ip table
ip route add local default dev lo table 2023
ip -6 route add local default dev lo table 2023
*/
routes := []netlink.Route{{
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0},
Mask: net.CIDRMask(0, 32),
},
Table: table,
Type: unix.RTN_LOCAL,
}, {
Scope: unix.RT_SCOPE_HOST,
LinkIndex: consts.LoopbackIfIndex,
Dst: &net.IPNet{
IP: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Mask: net.CIDRMask(0, 128),
},
Table: table,
Type: unix.RTN_LOCAL,
}}
for _, route := range routes {
if err = netlink.RouteAdd(&route); err != nil {
if len(route.Dst.IP) == net.IPv6len {
// ipv6
ns.log.Warnln("IpRouteAdd: Bad IPv6 support. Perhaps your machine disabled IPv6.")
continue
}
return fmt.Errorf("IpRouteAdd: %w", err)
}
}
/** ip rule
ip rule add fwmark 0x8000000/0x8000000 table 2023
ip -6 rule add fwmark 0x8000000/0x8000000 table 2023
*/
rules := []netlink.Rule{{
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}, {
SuppressIfgroup: -1,
SuppressPrefixlen: -1,
Priority: -1,
Goto: -1,
Flow: -1,
Family: unix.AF_INET6,
Table: table,
Mark: int(consts.TproxyMark),
Mask: int(consts.TproxyMark),
}}
for _, rule := range rules {
if err = netlink.RuleAdd(&rule); err != nil {
if rule.Family == unix.AF_INET6 {
// ipv6
ns.log.Warnln("IpRuleAdd: Bad IPv6 support. Perhaps your machine disabled IPv6 (need CONFIG_IPV6_MULTIPLE_TABLES).")
continue
}
return fmt.Errorf("IpRuleAdd: %w", err)
}
}
return nil
}
func (ns *DaeNetns) setupVeth() (err error) {
// ip l a dae0 type veth peer name dae0peer
DeleteLink(HostVethName)
if err = netlink.LinkAdd(&netlink.Veth{
LinkAttrs: netlink.LinkAttrs{
Name: HostVethName,
Name: HostVethName,
TxQLen: 1000,
},
PeerName: NsVethName,
}); err != nil {
@ -144,38 +240,6 @@ func (ns *DaeNetns) setupVeth() (err error) {
return
}
func (ns *DaeNetns) setupSysctl() (err error) {
// sysctl net.ipv4.conf.dae0.rp_filter=0
if err = sysctl.Set(fmt.Sprintf("net.ipv4.conf.%s.rp_filter", HostVethName), "0", true); err != nil {
return fmt.Errorf("failed to set rp_filter for dae0: %v", err)
}
// sysctl net.ipv4.conf.all.rp_filter=0
if err = sysctl.Set("net.ipv4.conf.all.rp_filter", "0", true); err != nil {
return fmt.Errorf("failed to set rp_filter for all: %v", err)
}
// sysctl net.ipv4.conf.dae0.arp_filter=0
if err = sysctl.Set(fmt.Sprintf("net.ipv4.conf.%s.arp_filter", HostVethName), "0", true); err != nil {
return fmt.Errorf("failed to set arp_filter for dae0: %v", err)
}
// sysctl net.ipv4.conf.all.arp_filter=0
if err = sysctl.Set("net.ipv4.conf.all.arp_filter", "0", true); err != nil {
return fmt.Errorf("failed to set arp_filter for all: %v", err)
}
// sysctl net.ipv4.conf.dae0.accept_local=1
if err = sysctl.Set(fmt.Sprintf("net.ipv4.conf.%s.accept_local", HostVethName), "1", true); err != nil {
return fmt.Errorf("failed to set accept_local for dae0: %v", err)
}
// sysctl net.ipv6.conf.dae0.disable_ipv6=0
if err = sysctl.Set(fmt.Sprintf("net.ipv6.conf.%s.disable_ipv6", HostVethName), "0", true); err != nil {
return fmt.Errorf("failed to set disable_ipv6 for dae0: %v", err)
}
// sysctl net.ipv6.conf.dae0.forwarding=1
SetForwarding(HostVethName, "1")
// sysctl net.ipv6.conf.all.forwarding=1
SetForwarding("all", "1")
return
}
func (ns *DaeNetns) setupNetns() (err error) {
// ip netns a daens
DeleteNamedNetns(NsName)
@ -191,6 +255,41 @@ func (ns *DaeNetns) setupNetns() (err error) {
if err = netlink.LinkSetNsFd(ns.dae0peer, int(ns.daeNs)); err != nil {
return fmt.Errorf("failed to move dae0peer to daens: %v", err)
}
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip l s dae0peer up
if err = netlink.LinkSetUp(ns.dae0peer); err != nil {
return fmt.Errorf("failed to set link dae0peer up: %v", err)
}
// re-fetch dae0peer to make sure we have the latest mac address
if ns.dae0peer, err = netlink.LinkByName(NsVethName); err != nil {
return fmt.Errorf("failed to get link dae0peer: %v", err)
}
lo, err := netlink.LinkByName("lo")
if err != nil {
return fmt.Errorf("failed to get link lo: %v", err)
}
// (ip net e daens) ip l s lo up
if err = netlink.LinkSetUp(lo); err != nil {
return fmt.Errorf("failed to set link lo up: %v", err)
}
return
}
func (ns *DaeNetns) setupSysctl() (err error) {
// sysctl net.ipv6.conf.dae0.disable_ipv6=0
if err = sysctl.Set(fmt.Sprintf("net.ipv6.conf.%s.disable_ipv6", HostVethName), "0", true); err != nil {
return fmt.Errorf("failed to set disable_ipv6 for dae0: %v", err)
}
// sysctl net.ipv6.conf.dae0.forwarding=1
if err = sysctl.Set(fmt.Sprintf("net.ipv6.conf.%s.forwarding", HostVethName), "1", true); err != nil {
return fmt.Errorf("failed to set forwarding for dae0: %v", err)
}
// sysctl net.ipv6.conf.all.forwarding=1
SetForwarding("all", "1")
return
}
@ -200,10 +299,6 @@ func (ns *DaeNetns) setupIPv4Datapath() (err error) {
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip l s dae0peer up
if err = netlink.LinkSetUp(ns.dae0peer); err != nil {
return fmt.Errorf("failed to set link dae0peer up: %v", err)
}
// (ip net e daens) ip a a 169.254.0.11 dev dae0peer
// Although transparent UDP socket doesn't use this IP, it's still needed to make proper L3 header
ip, ipNet, err := net.ParseCIDR("169.254.0.11/32")
@ -233,6 +328,14 @@ func (ns *DaeNetns) setupIPv4Datapath() (err error) {
return fmt.Errorf("failed to add v4 route2 to dae0peer: %v", err)
}
// (ip net e daens) ip n r 169.254.0.1 dev dae0peer lladdr $mac_dae0 nud permanent
if err = netlink.NeighSet(&netlink.Neigh{
IP: net.ParseIP("169.254.0.1"),
HardwareAddr: ns.dae0.Attrs().HardwareAddr,
LinkIndex: ns.dae0peer.Attrs().Index,
State: netlink.NUD_PERMANENT,
}); err != nil {
return fmt.Errorf("failed to add neigh to dae0peer: %v", err)
}
return
}
@ -261,21 +364,9 @@ func (ns *DaeNetns) setupIPv6Datapath() (err error) {
}); err != nil {
return fmt.Errorf("failed to add v6 route to dae0peer: %v", err)
}
return
}
// updateNeigh() isn't named as setupNeigh() because it requires runtime.LockOSThread()
func (ns *DaeNetns) updateNeigh() (err error) {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if err = netns.Set(ns.daeNs); err != nil {
return fmt.Errorf("failed to switch to daens: %v", err)
}
defer netns.Set(ns.hostNs)
// (ip net e daens) ip n r fe80::ecee:eeff:feee:eeee dev dae0peer lladdr $mac_dae0 nud permanent
if err = netlink.NeighSet(&netlink.Neigh{
IP: net.ParseIP("169.254.0.1"),
IP: net.ParseIP("fe80::ecee:eeff:feee:eeee"),
HardwareAddr: ns.dae0.Attrs().HardwareAddr,
LinkIndex: ns.dae0peer.Attrs().Index,
State: netlink.NUD_PERMANENT,
@ -285,30 +376,6 @@ func (ns *DaeNetns) updateNeigh() (err error) {
return
}
func (ns *DaeNetns) monitorDae0LinkAddr() {
ch := make(chan netlink.LinkUpdate)
done := make(chan struct{})
defer close(done)
err := netlink.LinkSubscribe(ch, done)
if err != nil {
ns.log.Errorf("failed to subscribe link updates: %v", err)
}
if ns.dae0, err = netlink.LinkByName(HostVethName); err != nil {
ns.log.Errorf("failed to get link dae0: %v", err)
}
if err = ns.updateNeigh(); err != nil {
ns.log.Errorf("failed to update neigh: %v", err)
}
for msg := range ch {
if msg.Link.Attrs().Name == HostVethName && !bytes.Equal(msg.Link.Attrs().HardwareAddr, ns.dae0.Attrs().HardwareAddr) {
ns.log.WithField("old addr", ns.dae0.Attrs().HardwareAddr).WithField("new addr", msg.Link.Attrs().HardwareAddr).Info("dae0 link addr changed")
ns.dae0 = msg.Link
ns.updateNeigh()
}
}
}
func DeleteNamedNetns(name string) error {
namedPath := path.Join("/run/netns", name)
unix.Unmount(namedPath, unix.MNT_DETACH|unix.MNT_FORCE)

View File

@ -6,11 +6,9 @@
package control
import (
"errors"
"fmt"
"net"
"net/netip"
"syscall"
"time"
"github.com/daeuniverse/dae/common"
@ -50,23 +48,7 @@ func ChooseNatTimeout(data []byte, sniffDns bool) (dmsg *dnsmessage.Msg, timeout
// sendPkt uses bind first, and fallback to send hdr if addr is in use.
func sendPkt(log *logrus.Logger, data []byte, from netip.AddrPort, realTo, to netip.AddrPort, lConn *net.UDPConn) (err error) {
transparentTimeout := AnyfromTimeout
if from.Port() == 53 {
// Add port 53 (udp) to whitelist to avoid conflicts with the potential local dns server.
transparentTimeout = 0
}
uConn, _, err := DefaultAnyfromPool.GetOrCreate(from.String(), transparentTimeout)
if err != nil && errors.Is(err, syscall.EADDRINUSE) {
log.WithField("from", from).
WithField("to", to).
WithField("realTo", realTo).
Trace("Port in use, fallback to use netns.")
err = GetDaeNetns().With(func() (err error) {
uConn, _, err = DefaultAnyfromPool.GetOrCreate(from.String(), AnyfromTimeout)
return err
})
}
uConn, _, err := DefaultAnyfromPool.GetOrCreate(from.String(), AnyfromTimeout)
if err != nil {
return
}

View File

@ -162,7 +162,6 @@ global {
log_level: info
allow_insecure: false
auto_config_kernel_parameter: true
auto_config_firewall_rule: true
}
subscription {

View File

@ -37,6 +37,8 @@ The proxy mechanism of dae is akin to other programs. However, when binding to t
In terms of benchmarking, dae's proxy performance slightly surpasses that of other proxy programs, but the difference is not significant.
As of [PR:implement stack bypass](https://github.com/daeuniverse/dae/pull/458), the hijack datapath has been changed to bypass stack for better performance and less stack influence (e.g. netfilter, systemd-sysctl). Please refer to the PR description for better understanding.
### Direct Connection Mechanism
Conventionally, traffic splitting involves passing traffic through a proxy program, navigating the splitting module, and then determining whether to use a proxy or establish a direct connection. This process requires parsing, processing, and copying traffic through the network stack, delivering it to the proxy program, and subsequently copying, processing, and encapsulating it through the network stack before sending it out. This consumes substantial resources. Particularly in scenarios like BitTorrent downloads, even if a direct connection is set, it still consumes numerous connections, ports, memory, and CPU resources. It might even impact NAT type in gaming situations due to the proxy program's inadequate handling, resulting in connection errors.

View File

@ -156,7 +156,6 @@ global {
log_level: info
allow_insecure: false
auto_config_kernel_parameter: true
auto_config_firewall_rule: true
}
subscription {

View File

@ -34,11 +34,6 @@ global {
# https://github.com/daeuniverse/dae/blob/main/docs/en/user-guide/kernel-parameters.md to see what will dae do.
auto_config_kernel_parameter: true
# Automatically configure firewall rules like firewalld and fw4.
# firewalld: nft 'insert rule inet firewalld filter_INPUT mark 0x08000000 accept'
# fw4: nft 'insert rule inet fw4 input mark 0x08000000 accept'
auto_config_firewall_rule: true
##### Node connectivity check.
# Host of URL should have both IPv4 and IPv6 if you have double stack in local.

4
go.mod
View File

@ -12,6 +12,7 @@ require (
github.com/daeuniverse/dae-config-dist/go/dae_config v0.0.0-20230604120805-1c27619b592d
github.com/daeuniverse/outbound v0.0.0-20240101085641-7932e7df927d
github.com/daeuniverse/softwind v0.0.0-20231230065827-eed67f20d2c1
github.com/fsnotify/fsnotify v1.7.0
github.com/json-iterator/go v1.1.12
github.com/miekg/dns v1.1.55
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
@ -21,6 +22,7 @@ require (
github.com/spf13/cobra v1.7.0
github.com/v2rayA/ahocorasick-domain v0.0.0-20231231085011-99ceb8ef3208
github.com/vishvananda/netlink v1.1.0
github.com/vishvananda/netns v0.0.4
github.com/x-cray/logrus-prefixed-formatter v0.5.2
golang.org/x/crypto v0.12.0
golang.org/x/exp v0.0.0-20230728194245-b0cb94b80691
@ -54,7 +56,6 @@ require (
github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 // indirect
github.com/dgryski/go-rc2 v0.0.0-20150621095337-8a9021637152 // indirect
github.com/eknkc/basex v1.0.1 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@ -68,7 +69,6 @@ require (
github.com/refraction-networking/utls v1.4.3 // indirect
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
gitlab.com/yawning/chacha20.git v0.0.0-20230427033715-7877545b1b37 // indirect
golang.org/x/term v0.11.0 // indirect
golang.org/x/text v0.12.0 // indirect

5
go.sum
View File

@ -8,8 +8,6 @@ github.com/bits-and-blooms/bitset v1.8.0 h1:FD+XqgOZDUxxZ8hzoBFuV9+cGWY9CslN6d5M
github.com/bits-and-blooms/bitset v1.8.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/bits-and-blooms/bloom/v3 v3.5.0 h1:AKDvi1V3xJCmSR6QhcBfHbCN4Vf8FfxeWkMNQfmAGhY=
github.com/bits-and-blooms/bloom/v3 v3.5.0/go.mod h1:Y8vrn7nk1tPIlmLtW2ZPV+W7StdVMor6bC1xgpjMZFs=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/cilium/ebpf v0.12.3 h1:8ht6F9MquybnY97at+VDZb3eQQr8ev79RueWeVaEcG4=
github.com/cilium/ebpf v0.12.3/go.mod h1:TctK1ivibvI3znr66ljgi4hqOT8EYQjz1KWBfb1UVgM=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@ -38,7 +36,6 @@ github.com/eknkc/basex v1.0.1/go.mod h1:k/F/exNEHFdbs3ZHuasoP2E7zeWwZblG84Y7Z59v
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
@ -200,8 +197,6 @@ golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.1-0.20231108175955-e4099bfacb8c h1:3kC/TjQ+xzIblQv39bCOyRk8fbEeJcDHwbyxPUU2BpA=
golang.org/x/sys v0.14.1-0.20231108175955-e4099bfacb8c/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=