refactor: use cgroupv2 instead of ftrace to get better compatibility

This commit is contained in:
mzz2017 2023-02-02 21:22:18 +08:00
parent af88d51449
commit 6562866147
24 changed files with 686 additions and 643 deletions

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "component/control/kern/headers"]
path = component/control/kern/headers
url = https://github.com/v2rayA/dae_bpf_headers

View File

@ -22,7 +22,7 @@ else
VERSION ?= unstable-$(date).r$(count).$(commit)
endif
.PHONY: clean-ebpf ebpf dae
.PHONY: clean-ebpf bpf_objects ebpf dae
dae: ebpf
go build -o $(OUTPUT) -trimpath -ldflags "-s -w -X github.com/v2rayA/dae/cmd.Version=$(VERSION)" .
@ -31,13 +31,18 @@ clean-ebpf:
rm -f component/control/bpf_bpf*.go && \
rm -f component/control/bpf_bpf*.o
bpf_objects:
if [ ! -f component/control/bpf_objects_wan_lan.go ]; then \
go run github.com/v2rayA/dae/cmd/internal/generate_bpf_objects/dummy -o component/control/bpf_objects_wan_lan.go; \
fi
# $BPF_CLANG is used in go:generate invocations.
ebpf: export BPF_CLANG := $(CLANG)
ebpf: export BPF_STRIP := $(STRIP)
ebpf: export BPF_CFLAGS := $(CFLAGS)
ebpf: export BPF_TARGET := $(TARGET)
ebpf: clean-ebpf
ebpf: clean-ebpf bpf_objects
unset GOOS && \
unset GOARCH && \
unset GOARM && \
go generate ./component/control/...
go generate ./component/control/control.go

View File

@ -39,47 +39,14 @@ This feature requires the kernel version of machine on which dae install >= 5.2.
Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs.
**Bind to WAN: >= 5.5**
**Bind to WAN: >= 5.7**
You need bind dae to WAN interface, if you want dae to provide network service for local programs.
This feature requires kernel version of the machine >= 5.5.
This feature requires kernel version of the machine >= 5.2.
Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces.
### Kernel Configuration Item
Usually, mainstream desktop distributions have these items turned on. But on embedded Linux distributions like OpenWRT, Armbian, etc, in order to reduce kernel size, some items are turned off by default. You need to re-compile the kernel and turn them on.
Use following commands to check the kernel configuration items on your machine.
```shell
zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r)
```
**Bind to LAN**
```
CONFIG_DEBUG_INFO_BTF
```
**Bind to WAN**:
```
CONFIG_DEBUG_INFO_BTF
CONFIG_FUNCTION_TRACER
CONFIG_FUNCTION_GRAPH_TRACER
CONFIG_STACK_TRACER
CONFIG_DYNAMIC_FTRACE
```
Check them using command like:
```shell
(zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r)) | grep -E '(CONFIG_DEBUG_INFO_BTF|CONFIG_STACK_TRACER|CONFIG_FUNCTION_TRACER|CONFIG_FUNCTION_GRAPH_TRACER|CONFIG_STACK_TRACER|CONFIG_DYNAMIC_FTRACE)='
```
## TODO
1. Check dns upstream and source loop (whether upstream is also a client of us) and remind the user to add sip rule.

View File

@ -0,0 +1,37 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) since 2023, mzz2017 <mzz@tuta.io>
*/
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
)
func main() {
var output string
flag.StringVar(&output, "o", "", "Place the output into <file>.")
flag.Parse()
if output == "" {
fmt.Println("Please provide flag \"-o <file>\"")
os.Exit(1)
}
output, err := filepath.Abs(output)
if err != nil {
fmt.Printf("Failed to get absolute path of \"%v\": %v", output, err)
os.Exit(1)
}
// Trick: write a dummy bpfObjectsLan{} and bpfObjectsWan{} before call control package.
if err := os.WriteFile(output, []byte(`package control
type bpfObjectsLan struct{}
type bpfObjectsWan struct{}`), 0644); err != nil {
fmt.Printf("Failed to write \"%v\": %v", output, err)
os.Exit(1)
}
fmt.Printf("Generated dummy %v\n", output)
}

View File

@ -0,0 +1,27 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) since 2023, mzz2017 <mzz@tuta.io>
*/
package main
import (
"flag"
"fmt"
"github.com/v2rayA/dae/component/control"
"os"
)
func main() {
var output string
flag.StringVar(&output, "o", "", "Place the output into <file>.")
flag.Parse()
if output == "" {
fmt.Println("Please provide flag \"-o <file>\"")
os.Exit(1)
}
fmt.Printf("Generating %v\n", output)
control.GenerateObjects(output)
fmt.Printf("Generated %v\n", output)
}

View File

@ -64,8 +64,6 @@ func Run() (err error) {
}
// New ControlPlane.
bindLan := len(param.Global.LanInterface) != 0
bindWan := len(param.Global.WanInterface) != 0
t, err := control.NewControlPlane(
log,
nodeList,
@ -74,25 +72,13 @@ func Run() (err error) {
param.Global.DnsUpstream,
param.Global.CheckUrl,
param.Global.CheckInterval,
bindLan,
bindWan,
param.Global.LanInterface,
param.Global.WanInterface,
)
if err != nil {
return err
}
// Bind to links.
for _, ifname := range param.Global.LanInterface {
if err = t.BindLan(ifname); err != nil {
return fmt.Errorf("BindLan: %v: %w", ifname, err)
}
}
for _, ifname := range param.Global.WanInterface {
if err = t.BindWan(ifname); err != nil {
return fmt.Errorf("BindWan: %v: %w", ifname, err)
}
}
// Serve tproxy TCP/UDP server util signals.
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGKILL, syscall.SIGILL)

View File

@ -77,7 +77,7 @@ func (i OutboundIndex) String() string {
}
const (
MaxRoutingLen = 96
MaxMatchSetLen = 32 * 3
)
type L4ProtoType uint8
@ -88,13 +88,14 @@ const (
L4ProtoType_TCP_UDP L4ProtoType = 3
)
type IpVersion uint8
type IpVersionType uint8
const (
IpVersion_4 IpVersion = 1
IpVersion_6 IpVersion = 2
IpVersion_X IpVersion = 3
IpVersion_4 IpVersionType = 1
IpVersion_6 IpVersionType = 2
IpVersion_X IpVersionType = 3
)
var BasicFeatureVersion = internal.Version{5, 2, 0}
var FtraceFeatureVersion = internal.Version{5, 5, 0}
var CgGetPidFeatureVersion = internal.Version{5, 7, 0}

1
component/control/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
bpf_objects_wan_lan.go

View File

@ -6,7 +6,9 @@
package control
import (
"bufio"
"encoding/binary"
"errors"
"fmt"
"github.com/cilium/ebpf"
"github.com/v2rayA/dae/common"
@ -98,43 +100,6 @@ func BatchUpdate(m *ebpf.Map, keys interface{}, values interface{}, opts *ebpf.B
}
}
type bpfObjectsLan struct {
// FIXME: Consider to update me if any program added.
//bpfPrograms
TproxyEgress *ebpf.Program `ebpf:"tproxy_egress"`
TproxyIngress *ebpf.Program `ebpf:"tproxy_ingress"`
bpfMaps
}
type bpfObjectsWan struct {
// FIXME: Consider to update me if any program added.
//bpfPrograms
Inet6Bind *ebpf.Program `ebpf:"inet6_bind"`
InetAutobind *ebpf.Program `ebpf:"inet_autobind"`
InetBind *ebpf.Program `ebpf:"inet_bind"`
InetRelease *ebpf.Program `ebpf:"inet_release"`
InetSendPrepare *ebpf.Program `ebpf:"inet_send_prepare"`
TcpConnect *ebpf.Program `ebpf:"tcp_connect"`
TproxyWanEgress *ebpf.Program `ebpf:"tproxy_wan_egress"`
TproxyWanIngress *ebpf.Program `ebpf:"tproxy_wan_ingress"`
bpfMaps
}
func IsNotSupportFtraceError(err error) bool {
// FATA[0001] loading objects: field Inet6Bind: program
// inet6_bind: load program: invalid argument
return strings.HasSuffix(err.Error(), os.ErrInvalid.Error()) && strings.Contains(err.Error(),
"field Inet6Bind: program") // FIXME: Consider to update me if any program added.
}
func IsNoBtfError(err error) bool {
// FATA[0001] loading objects: field Inet6Bind: program inet6_bind:
// apply CO-RE relocations: load kernel spec: no BTF found for kernel version 5.15.90: not supported
return strings.Contains(err.Error(), "no BTF found for kernel version")
}
func AssignBpfObjects(to *bpfObjects, from interface{}) {
vTo := reflect.Indirect(reflect.ValueOf(to))
vFrom := reflect.Indirect(reflect.ValueOf(from))
@ -160,3 +125,25 @@ func AssignBpfObjects(to *bpfObjects, from interface{}) {
fieldTo.Set(fieldFrom)
}
}
// detectCgroupPath returns the first-found mount point of type cgroup2
// and stores it in the cgroupPath global variable.
// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/examples/cgroup_skb/main.go
func detectCgroupPath() (string, error) {
f, err := os.Open("/proc/mounts")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
// example fields: cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime 0 0
fields := strings.Split(scanner.Text(), " ")
if len(fields) >= 3 && fields[2] == "cgroup2" {
return fields[1], nil
}
}
return "", errors.New("cgroup2 not mounted")
}

View File

@ -5,5 +5,10 @@
package control
// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
// $BPF_CLANG, $BPF_STRIP, $BPF_CFLAGS, $BPF_TARGET are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -strip $BPF_STRIP -cflags $BPF_CFLAGS -target $BPF_TARGET bpf kern/tproxy.c -- -I./headers
// Separate bpfObjectsLan and bpfObjectsWan from bpfObjects.
//go:generate go clean -cache
//go:generate go run github.com/v2rayA/dae/cmd/internal/generate_bpf_objects -o bpf_objects_wan_lan.go
//go:generate go fmt bpf_objects_wan_lan.go

View File

@ -10,7 +10,6 @@ import (
"errors"
"fmt"
"github.com/cilium/ebpf"
ciliumLink "github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/rlimit"
"github.com/mzz2017/softwind/pool"
"github.com/sirupsen/logrus"
@ -22,7 +21,6 @@ import (
"github.com/v2rayA/dae/config"
"github.com/v2rayA/dae/pkg/config_parser"
internal "github.com/v2rayA/dae/pkg/ebpf_internal"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"net"
"net/netip"
@ -36,14 +34,12 @@ import (
)
type ControlPlane struct {
log *logrus.Logger
kernelVersion *internal.Version
*ControlPlaneCore
deferFuncs []func() error
// TODO: add mutex?
outbounds []*outbound.DialerGroup
outboundName2Id map[string]uint8
bpf *bpfObjects
SimulatedLpmTries [][]netip.Prefix
SimulatedDomainSet []DomainSet
@ -53,8 +49,6 @@ type ControlPlane struct {
mutex sync.Mutex
dnsCache map[string]*dnsCache
dnsUpstream netip.AddrPort
deferFuncs []func() error
}
func NewControlPlane(
@ -65,8 +59,8 @@ func NewControlPlane(
dnsUpstream string,
checkUrl string,
checkInterval time.Duration,
bindLan bool,
bindWan bool,
lanInterface []string,
wanInterface []string,
) (c *ControlPlane, err error) {
kernelVersion, e := internal.KernelVersion()
if e != nil {
@ -75,11 +69,6 @@ func NewControlPlane(
if kernelVersion.Less(consts.BasicFeatureVersion) {
return nil, fmt.Errorf("your kernel version %v does not satisfy basic requirement; expect >=%v", c.kernelVersion.String(), consts.BasicFeatureVersion.String())
}
if bindWan && kernelVersion.Less(consts.FtraceFeatureVersion) {
// Not support ftrace (fentry/fexit).
// PID bypass needs it.
return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again", c.kernelVersion.String(), consts.FtraceFeatureVersion.String())
}
// Allow the current process to lock memory for eBPF resources.
if err = rlimit.RemoveMemlock(); err != nil {
@ -89,18 +78,22 @@ func NewControlPlane(
os.MkdirAll(pinPath, 0755)
// Load pre-compiled programs and maps into the kernel.
log.Infof("Loading eBPF programs and maps into the kernel")
var bpf bpfObjects
var ProgramOptions ebpf.ProgramOptions
if log.IsLevelEnabled(logrus.TraceLevel) {
ProgramOptions = ebpf.ProgramOptions{
LogLevel: ebpf.LogLevelStats,
LogLevel: ebpf.LogLevelInstruction | ebpf.LogLevelStats,
}
}
var obj interface{} = &bpf // Bind both LAN and WAN.
if bindLan && !bindWan {
// Trick. Replace the beams with rotten timbers.
// Trick. Replace the beams with rotten timbers to reduce the loading.
var obj interface{} = &bpf // Bind to both LAN and WAN.
if len(lanInterface) > 0 && len(wanInterface) == 0 {
// Only bind LAN.
obj = &bpfObjectsLan{}
} else if !bindLan && bindWan {
} else if len(wanInterface) == 0 && len(wanInterface) > 0 {
// Only bind to WAN.
// Trick. Replace the beams with rotten timbers.
obj = &bpfObjectsWan{}
}
@ -126,20 +119,14 @@ retryLoadBpf:
// Get detailed log from ebpf.internal.(*VerifierError)
if log.IsLevelEnabled(logrus.TraceLevel) {
if v := reflect.Indirect(reflect.ValueOf(errors.Unwrap(errors.Unwrap(err)))); v.Kind() == reflect.Struct {
if log := v.FieldByName("Log"); log.IsValid() {
if strSlice, ok := log.Interface().([]string); ok {
err = fmt.Errorf("%v", strings.Join(strSlice, "\n"))
if _log := v.FieldByName("Log"); _log.IsValid() {
if strSlice, ok := _log.Interface().([]string); ok {
log.Traceln(strings.Join(strSlice, "\n"))
}
}
}
}
err := fmt.Errorf("loading objects: %w", err)
if IsNotSupportFtraceError(err) {
err = fmt.Errorf("%w: Maybe your kernel has no ftrace support. Make sure the kernel config items are on: CONFIG_FUNCTION_TRACER, CONFIG_FUNCTION_GRAPH_TRACER, CONFIG_STACK_TRACER, CONFIG_DYNAMIC_FTRACE", err)
} else if IsNoBtfError(err) {
err = fmt.Errorf("%w: Make sure the kernel config item is on: CONFIG_DEBUG_INFO_BTF", err)
}
return nil, err
return nil, fmt.Errorf("loading objects: %w", err)
}
if _, ok := obj.(*bpfObjects); !ok {
// Reverse takeover.
@ -174,6 +161,25 @@ retryLoadBpf:
return nil, err
}
core := &ControlPlaneCore{
log: log,
deferFuncs: []func() error{bpf.Close},
bpf: &bpf,
kernelVersion: &kernelVersion,
}
// Bind to links. Binding should be advance of dialerGroups to avoid un-routable old connection.
for _, ifname := range lanInterface {
if err = core.BindLan(ifname); err != nil {
return nil, fmt.Errorf("BindLan: %v: %w", ifname, err)
}
}
for _, ifname := range wanInterface {
if err = core.BindWan(ifname); err != nil {
return nil, fmt.Errorf("BindWan: %v: %w", ifname, err)
}
}
// DialerGroups (outbounds).
option := &dialer.GlobalOption{
Log: log,
@ -270,282 +276,19 @@ retryLoadBpf:
}
return &ControlPlane{
log: log,
kernelVersion: &kernelVersion,
ControlPlaneCore: core,
deferFuncs: nil,
outbounds: outbounds,
outboundName2Id: outboundName2Id,
bpf: &bpf,
SimulatedLpmTries: builder.SimulatedLpmTries,
SimulatedDomainSet: builder.SimulatedDomainSet,
Final: routingA.Final,
mutex: sync.Mutex{},
dnsCache: make(map[string]*dnsCache),
dnsUpstream: dnsAddrPort,
deferFuncs: []func() error{bpf.Close},
}, nil
}
func (c *ControlPlane) BindLan(ifname string) error {
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
// Insert an elem into IfindexIpsMap.
// TODO: We should monitor IP change of the link.
ipnets, err := netlink.AddrList(link, netlink.FAMILY_ALL)
if err != nil {
return err
}
// TODO: If we monitor IP change of the link, we should remove code below.
if len(ipnets) == 0 {
return fmt.Errorf("interface %v has no ip", ifname)
}
var linkIp bpfIfIp
for _, ipnet := range ipnets {
ip, ok := netip.AddrFromSlice(ipnet.IP)
if !ok {
continue
}
if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
continue
}
if (ip.Is6() && linkIp.HasIp6) ||
(ip.Is4() && linkIp.HasIp4) {
continue
}
ip6format := ip.As16()
if ip.Is4() {
linkIp.HasIp4 = true
linkIp.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
} else {
linkIp.HasIp6 = true
linkIp.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
}
if linkIp.HasIp4 && linkIp.HasIp6 {
break
}
}
if err := c.bpf.IfindexTproxyIpMap.Update(uint32(link.Attrs().Index), linkIp, ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
// FIXME: not only this link ip.
if linkIp.HasIp4 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip4,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
}
if linkIp.HasIp6 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip6,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
}
// Insert qdisc and filters.
qdisc := &netlink.GenericQdisc{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: link.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
},
QdiscType: "clsact",
}
if err := netlink.QdiscAdd(qdisc); err != nil {
if os.IsExist(err) {
_ = netlink.QdiscDel(qdisc)
err = netlink.QdiscAdd(qdisc)
}
if err != nil {
return fmt.Errorf("cannot add clsact qdisc: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.QdiscDel(qdisc); err != nil {
return fmt.Errorf("QdiscDel: %w", err)
}
return nil
})
filterIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyIngress.FD(),
Name: consts.AppName + "_ingress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterIngress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
filterEgress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyEgress.FD(),
Name: consts.AppName + "_egress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterEgress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
return nil
}
func (c *ControlPlane) BindWan(ifname string) error {
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
// Set-up SrcPidMapper.
// Attach programs to support pname routing.
// ipv4 tcp/udp: send
inetSendPrepare, err := ciliumLink.AttachTracing(ciliumLink.TracingOptions{
Program: c.bpf.InetSendPrepare,
})
if err != nil {
return fmt.Errorf("AttachTracing InetSendPrepare: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := inetSendPrepare.Close(); err != nil {
return fmt.Errorf("inetSendPrepare.Close(): %w", err)
}
return nil
})
// ipv4 tcp/udp: listen
inetBind, err := ciliumLink.AttachTracing(ciliumLink.TracingOptions{
Program: c.bpf.InetBind,
})
if err != nil {
return fmt.Errorf("AttachTracing InetBind: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := inetBind.Close(); err != nil {
return fmt.Errorf("inetBind.Close(): %w", err)
}
return nil
})
// ipv4 udp: sendto/sendmsg
inetAutoBind, err := ciliumLink.AttachTracing(ciliumLink.TracingOptions{
Program: c.bpf.InetAutobind,
})
if err != nil {
return fmt.Errorf("AttachTracing InetAutobind: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := inetAutoBind.Close(); err != nil {
return fmt.Errorf("inetAutoBind.Close(): %w", err)
}
return nil
})
// ipv4 tcp: connect
tcpConnect, err := ciliumLink.AttachTracing(ciliumLink.TracingOptions{
Program: c.bpf.TcpConnect,
})
if err != nil {
return fmt.Errorf("AttachTracing TcpConnect: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := tcpConnect.Close(); err != nil {
return fmt.Errorf("inetStreamConnect.Close(): %w", err)
}
return nil
})
// ipv6 tcp/udp: listen
inet6Bind, err := ciliumLink.AttachTracing(ciliumLink.TracingOptions{
Program: c.bpf.Inet6Bind,
})
if err != nil {
return fmt.Errorf("AttachTracing Inet6Bind: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := inet6Bind.Close(); err != nil {
return fmt.Errorf("inet6Bind.Close(): %w", err)
}
return nil
})
// Insert qdisc and tc filters.
qdisc := &netlink.GenericQdisc{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: link.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
},
QdiscType: "clsact",
}
if err := netlink.QdiscAdd(qdisc); err != nil {
if os.IsExist(err) {
_ = netlink.QdiscDel(qdisc)
err = netlink.QdiscAdd(qdisc)
}
if err != nil {
return fmt.Errorf("cannot add clsact qdisc: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.QdiscDel(qdisc); err != nil {
return fmt.Errorf("QdiscDel: %w", err)
}
return nil
})
filterEgress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyWanEgress.FD(),
Name: consts.AppName + "_egress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterEgress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter egress: %w", err)
}
filterIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyWanIngress.FD(),
Name: consts.AppName + "_ingress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterIngress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
return nil
}
func (c *ControlPlane) ListenAndServe(port uint16) (err error) {
// Listen.
listener, err := net.Listen("tcp", "0.0.0.0:"+strconv.Itoa(int(port)))
@ -632,5 +375,5 @@ func (c *ControlPlane) Close() (err error) {
}
}
}
return err
return c.ControlPlaneCore.Close()
}

View File

@ -0,0 +1,275 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) since 2023, mzz2017 <mzz@tuta.io>
*/
package control
import (
"fmt"
"github.com/cilium/ebpf"
ciliumLink "github.com/cilium/ebpf/link"
"github.com/sirupsen/logrus"
"github.com/v2rayA/dae/common"
"github.com/v2rayA/dae/common/consts"
internal "github.com/v2rayA/dae/pkg/ebpf_internal"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
"net/netip"
"os"
)
type ControlPlaneCore struct {
log *logrus.Logger
deferFuncs []func() error
bpf *bpfObjects
kernelVersion *internal.Version
}
func (c *ControlPlaneCore) Close() (err error) {
// Invoke defer funcs in reverse order.
for i := len(c.deferFuncs) - 1; i >= 0; i-- {
if e := c.deferFuncs[i](); e != nil {
// Combine errors.
if err != nil {
err = fmt.Errorf("%w; %v", err, e)
} else {
err = e
}
}
}
return err
}
func (c *ControlPlaneCore) BindLan(ifname string) error {
c.log.Infof("Bind to LAN: %v", ifname)
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
// Insert an elem into IfindexIpsMap.
// TODO: We should monitor IP change of the link.
ipnets, err := netlink.AddrList(link, netlink.FAMILY_ALL)
if err != nil {
return err
}
// TODO: If we monitor IP change of the link, we should remove code below.
if len(ipnets) == 0 {
return fmt.Errorf("interface %v has no ip", ifname)
}
var linkIp bpfIfIp
for _, ipnet := range ipnets {
ip, ok := netip.AddrFromSlice(ipnet.IP)
if !ok {
continue
}
if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
continue
}
if (ip.Is6() && linkIp.HasIp6) ||
(ip.Is4() && linkIp.HasIp4) {
continue
}
ip6format := ip.As16()
if ip.Is4() {
linkIp.HasIp4 = true
linkIp.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
} else {
linkIp.HasIp6 = true
linkIp.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
}
if linkIp.HasIp4 && linkIp.HasIp6 {
break
}
}
if err := c.bpf.IfindexTproxyIpMap.Update(uint32(link.Attrs().Index), linkIp, ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
// FIXME: not only this link ip.
if linkIp.HasIp4 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip4,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
}
if linkIp.HasIp6 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip6,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
}
// Insert qdisc and filters.
qdisc := &netlink.GenericQdisc{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: link.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
},
QdiscType: "clsact",
}
if err := netlink.QdiscAdd(qdisc); err != nil {
if os.IsExist(err) {
_ = netlink.QdiscDel(qdisc)
err = netlink.QdiscAdd(qdisc)
}
if err != nil {
return fmt.Errorf("cannot add clsact qdisc: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.QdiscDel(qdisc); err != nil {
return fmt.Errorf("QdiscDel: %w", err)
}
return nil
})
filterIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyLanIngress.FD(),
Name: consts.AppName + "_ingress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterIngress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
filterEgress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyLanEgress.FD(),
Name: consts.AppName + "_egress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterEgress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
return nil
}
func (c *ControlPlaneCore) BindWan(ifname string) error {
if c.kernelVersion.Less(consts.BasicFeatureVersion) {
return fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again", c.kernelVersion.String(), consts.CgGetPidFeatureVersion.String())
}
c.log.Infof("Bind to WAN: %v", ifname)
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
/// Set-up SrcPidMapper.
/// Attach programs to support pname routing.
// Get the first-mounted cgroupv2 path.
cgroupPath, err := detectCgroupPath()
if err != nil {
return err
}
// Bind cg programs
type cgProg struct {
Name string
Prog *ebpf.Program
Attach ebpf.AttachType
}
cgProgs := []cgProg{
{Prog: c.bpf.TproxyWanCgSockCreate, Attach: ebpf.AttachCGroupInetSockCreate},
{Prog: c.bpf.TproxyWanCgSockRelease, Attach: ebpf.AttachCgroupInetSockRelease},
//{Prog: c.bpf.TproxyWanCgConnect4, Attach: ebpf.AttachCGroupInet4Connect},
//{Prog: c.bpf.TproxyWanCgConnect6, Attach: ebpf.AttachCGroupInet6Connect},
//{Prog: c.bpf.TproxyWanCgSendmsg4, Attach: ebpf.AttachCGroupUDP4Sendmsg},
//{Prog: c.bpf.TproxyWanCgSendmsg6, Attach: ebpf.AttachCGroupUDP6Sendmsg},
}
for _, prog := range cgProgs {
attached, err := ciliumLink.AttachCgroup(ciliumLink.CgroupOptions{
Path: cgroupPath,
Attach: prog.Attach,
Program: prog.Prog,
})
if err != nil {
return fmt.Errorf("AttachTracing: %v: %w", prog.Prog.String(), err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := attached.Close(); err != nil {
return fmt.Errorf("inet6Bind.Close(): %w", err)
}
return nil
})
}
/// Set-up WAN ingress/egress TC programs.
// Insert qdisc.
qdisc := &netlink.GenericQdisc{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: link.Attrs().Index,
Handle: netlink.MakeHandle(0xffff, 0),
Parent: netlink.HANDLE_CLSACT,
},
QdiscType: "clsact",
}
if err := netlink.QdiscAdd(qdisc); err != nil {
if os.IsExist(err) {
_ = netlink.QdiscDel(qdisc)
err = netlink.QdiscAdd(qdisc)
}
if err != nil {
return fmt.Errorf("cannot add clsact qdisc: %w", err)
}
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.QdiscDel(qdisc); err != nil {
return fmt.Errorf("QdiscDel: %w", err)
}
return nil
})
// Insert TC filters
filterEgress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyWanEgress.FD(),
Name: consts.AppName + "_egress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterEgress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter egress: %w", err)
}
filterIngress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_INGRESS,
Handle: netlink.MakeHandle(0, 1),
Protocol: unix.ETH_P_ALL,
Priority: 0,
},
Fd: c.bpf.bpfPrograms.TproxyWanIngress.FD(),
Name: consts.AppName + "_ingress",
DirectAction: true,
}
if err := netlink.FilterAdd(filterIngress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
}
return nil
}

View File

@ -17,7 +17,7 @@ import (
)
type dnsCache struct {
DomainBitmap [consts.MaxRoutingLen / 32]uint32
DomainBitmap [consts.MaxMatchSetLen / 32]uint32
Answers []dnsmessage.Resource
Deadline time.Time
}

View File

@ -11,7 +11,7 @@ import (
"strings"
)
func (c *ControlPlane) MatchDomainBitmap(domain string) (bitmap [consts.MaxRoutingLen / 32]uint32) {
func (c *ControlPlane) MatchDomainBitmap(domain string) (bitmap [consts.MaxMatchSetLen / 32]uint32) {
// TODO: high performance implementation.
for _, s := range c.SimulatedDomainSet {
for _, d := range s.Domains {

@ -1 +0,0 @@
Subproject commit 372c3cc61d2d907b89ebdfb7bec180a09cd28169

View File

@ -3,17 +3,23 @@
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) since 2022, v2rayA Organization <team@v2raya.org>
*/
#include "headers/if_ether_defs.h"
#include "headers/pkt_cls_defs.h"
#include "headers/socket_defs.h"
#include "headers/vmlinux.h"
#include <asm-generic/errno-base.h>
#include <errno.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <stdbool.h>
#include <sys/socket.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
// #define __DEBUG_ROUTING
// #define __PRINT_ROUTING_RESULT
@ -58,11 +64,6 @@
#define OUTBOUND_LOGICAL_AND 0xFF
#define OUTBOUND_LOGICAL_MASK 0xFE
/* Current network namespace */
enum {
BPF_F_CURRENT_NETNS = (-1L),
};
enum {
DisableL4ChecksumPolicy_EnableL4Checksum,
DisableL4ChecksumPolicy_Restore,
@ -276,12 +277,12 @@ struct pid_pname {
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, struct ip_port_proto);
__type(key, __u64);
__type(value, struct pid_pname);
__uint(max_entries, MAX_SRC_PID_PNAME_MAPPING_NUM);
/// NOTICE: No persistence.
// __uint(pinning, LIBBPF_PIN_BY_NAME);
} src_pid_map SEC(".maps");
__uint(pinning, LIBBPF_PIN_BY_NAME);
} cookie_pid_map SEC(".maps");
// Functions:
@ -872,28 +873,26 @@ routing(const __u32 flag[6], const void *l4_hdr, const __be32 saddr[4],
} else {
__builtin_memcpy(daddr, _daddr, IPV6_BYTE_LENGTH);
}
struct lpm_key lpm_key_saddr, lpm_key_daddr, lpm_key_mac, *lpm_key;
lpm_key_saddr.trie_key.prefixlen = IPV6_BYTE_LENGTH * 8;
lpm_key_daddr.trie_key.prefixlen = IPV6_BYTE_LENGTH * 8;
lpm_key_mac.trie_key.prefixlen = IPV6_BYTE_LENGTH * 8;
__builtin_memcpy(lpm_key_saddr.data, saddr, IPV6_BYTE_LENGTH);
__builtin_memcpy(lpm_key_daddr.data, daddr, IPV6_BYTE_LENGTH);
__builtin_memcpy(lpm_key_mac.data, mac, IPV6_BYTE_LENGTH);
struct lpm_key lpm_key_instance, *lpm_key;
lpm_key_instance.trie_key.prefixlen = IPV6_BYTE_LENGTH * 8;
__builtin_memcpy(lpm_key_instance.data, daddr, IPV6_BYTE_LENGTH);
// bpf_printk("mac: %pI6", mac);
key = MatchType_IpSet;
if ((ret =
bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_daddr, BPF_ANY))) {
if ((ret = bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_instance,
BPF_ANY))) {
return ret;
};
__builtin_memcpy(lpm_key_instance.data, saddr, IPV6_BYTE_LENGTH);
key = MatchType_SourceIpSet;
if ((ret =
bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_saddr, BPF_ANY))) {
if ((ret = bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_instance,
BPF_ANY))) {
return ret;
};
if (!_is_wan) {
__builtin_memcpy(lpm_key_instance.data, mac, IPV6_BYTE_LENGTH);
key = MatchType_Mac;
if ((ret =
bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_mac, BPF_ANY))) {
if ((ret = bpf_map_update_elem(&lpm_key_map, &key, &lpm_key_instance,
BPF_ANY))) {
return ret;
};
}
@ -1052,7 +1051,7 @@ routing(const __u32 flag[6], const void *l4_hdr, const __be32 saddr[4],
// Do DNAT.
SEC("tc/ingress")
int tproxy_ingress(struct __sk_buff *skb) {
int tproxy_lan_ingress(struct __sk_buff *skb) {
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
@ -1277,6 +1276,57 @@ int tproxy_ingress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
// Cookie will change after the first packet, so we just use it for
// handshake.
static __always_inline bool pid_is_control_plane(struct __sk_buff *skb,
struct pid_pname **p) {
struct pid_pname *pid_pname;
__u64 cookie = bpf_get_socket_cookie(skb);
pid_pname = bpf_map_lookup_elem(&cookie_pid_map, &cookie);
if (pid_pname) {
if (p) {
// Assign.
*p = pid_pname;
}
// Get tproxy pid and compare if they are equal.
__u32 *pid_tproxy;
if (!(pid_tproxy =
bpf_map_lookup_elem(&param_map, &control_plane_pid_key))) {
bpf_printk("control_plane_pid is not set.");
return false;
}
return pid_pname->pid == *pid_tproxy;
} else {
if (p) {
*p = NULL;
}
if ((skb->mark & 0x80) == 0x80) {
bpf_printk("No pid_pname found. But it should not happen");
/*
if (l4proto == IPPROTO_TCP) {
if (tcph.syn && !tcph.ack) {
bpf_printk("No pid_pname found. But it should not happen: local:%u "
"(%u)[%llu]",
bpf_ntohs(sport), l4proto, cookie);
} else {
bpf_printk("No pid_pname found. But it should not happen: (Old "
"Connection): local:%u "
"(%u)[%llu]",
bpf_ntohs(sport), l4proto, cookie);
}
} else {
bpf_printk("No pid_pname found. But it should not happen: local:%u "
"(%u)[%llu]",
bpf_ntohs(sport), l4proto, cookie);
}
*/
return true;
}
return false;
}
}
/**
FIXME: We can do packet modification as early as possible (for example, at
lwt point) to avoid weird checksum offload problems by docker, etc. They do
@ -1289,7 +1339,7 @@ int tproxy_ingress(struct __sk_buff *skb) {
*/
// Do SNAT.
SEC("tc/egress")
int tproxy_egress(struct __sk_buff *skb) {
int tproxy_lan_egress(struct __sk_buff *skb) {
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
@ -1350,6 +1400,7 @@ int tproxy_egress(struct __sk_buff *skb) {
// Lookup original dest.
struct ip_port key_dst;
__builtin_memset(&key_dst, 0, sizeof(key_dst));
__builtin_memcpy(key_dst.ip, daddr, IPV6_BYTE_LENGTH);
key_dst.port = tcph.dest;
struct ip_port_outbound *original_dst =
@ -1390,8 +1441,10 @@ int tproxy_egress(struct __sk_buff *skb) {
// Get source ip/port from our packet header.
// Decap header to get fullcone tuple.
decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src,
sizeof(ori_src));
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src,
sizeof(ori_src)))) {
return TC_ACT_SHOT;
}
// Rewrite udp src ip
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, src_ip, ori_src.ip,
@ -1435,39 +1488,6 @@ int tproxy_egress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
// This function will modify the content of src_key.
static __always_inline struct pid_pname *
lookup_src_pid_map(__u8 ipversion, struct ip_port_proto *src_key) {
// Lookup twice or third. First for unspecific address, second for interface
// address.
// Lookup pid in src_pid_map.
struct pid_pname *pid_pname;
if ((pid_pname = bpf_map_lookup_elem(&src_pid_map, src_key))) {
return pid_pname;
}
// Second look-up.
// Set to unspecific address.
if (ipversion == 6) {
__builtin_memset(src_key, 0, sizeof(struct ip_port_proto));
} else {
src_key->ip[3] = 0;
}
if ((pid_pname = bpf_map_lookup_elem(&src_pid_map, src_key))) {
return pid_pname;
}
if (ipversion == 6) {
return NULL;
}
// Third look-up for IPv4 packet.
// Lookup IPv6 unspecific address.
// https://github.com/torvalds/linux/blob/62fb9874f5da54fdb243003b386128037319b219/net/ipv4/af_inet.c#L475
src_key->ip[2] = 0;
return bpf_map_lookup_elem(&src_pid_map, src_key);
}
__u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0};
__u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1};
@ -1563,40 +1583,11 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
} else {
// Normal packets.
// Prepare key.
struct ip_port_proto src_key;
src_key.proto = l4proto;
__builtin_memcpy(src_key.ip, saddr, IPV6_BYTE_LENGTH);
src_key.port = sport;
struct pid_pname *pid_pname = lookup_src_pid_map(ipversion, &src_key);
if (pid_pname) {
// Get tproxy pid and compare if they are equal.
__u32 *pid_tproxy;
if (!(pid_tproxy =
bpf_map_lookup_elem(&param_map, &control_plane_pid_key))) {
bpf_printk("control_plane_pid is not set.");
return TC_ACT_SHOT;
}
if (pid_pname->pid == *pid_tproxy) {
// Control plane to direct.
// bpf_printk("Control plane to direct.");
return TC_ACT_OK;
}
} else {
if ((skb->mark & 0x80) == 0x80) {
bpf_printk("No pid_pname found. But it should not happen: %pI6:%u (%u)",
saddr, bpf_ntohs(sport), l4proto);
return TC_ACT_OK;
}
}
// Not from tproxy; from other processes.
if (l4proto == IPPROTO_TCP) {
// Backup for further use.
tcp_state_syn = tcph.syn && !tcph.ack;
struct ip_port key_src;
__builtin_memset(&key_src, 0, sizeof(key_src));
// Use daddr as key in WAN because tproxy (control plane) also lookups the
// map element using income client ip (that is daddr).
__builtin_memcpy(key_src.ip, daddr, IPV6_BYTE_LENGTH);
@ -1611,6 +1602,11 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
} else {
flag[1] = IpVersionType_4;
}
struct pid_pname *pid_pname;
if (pid_is_control_plane(skb, &pid_pname)) {
// From control plane. Direct.
return TC_ACT_OK;
}
if (pid_pname) {
__builtin_memcpy(&flag[2], pid_pname->pname, TASK_COMM_LEN);
}
@ -1654,6 +1650,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
if (unlikely(tcp_state_syn)) {
struct ip_port_outbound value_dst;
__builtin_memset(&value_dst, 0, sizeof(value_dst));
__builtin_memcpy(value_dst.ip, daddr, IPV6_BYTE_LENGTH);
value_dst.port = tcph.dest;
value_dst.outbound = outbound;
@ -1683,6 +1680,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
} else if (l4proto == IPPROTO_UDP) {
// Backup for further use.
struct ip_port_outbound new_hdr;
__builtin_memset(&new_hdr, 0, sizeof(new_hdr));
__builtin_memcpy(new_hdr.ip, daddr, IPV6_BYTE_LENGTH);
new_hdr.port = udph.dest;
@ -1693,6 +1691,11 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
} else {
flag[1] = IpVersionType_4;
}
struct pid_pname *pid_pname;
if (pid_is_control_plane(skb, &pid_pname)) {
// From control plane. Direct.
return TC_ACT_OK;
}
if (pid_pname) {
__builtin_memcpy(&flag[2], pid_pname->pname, TASK_COMM_LEN);
}
@ -1824,9 +1827,10 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
if (l4proto == IPPROTO_TCP) {
// Lookup original dest as sip and sport.
struct ip_port key_dst;
__builtin_memset(&key_dst, 0, sizeof(key_dst));
// Use daddr as key in WAN because tproxy (control plane) also lookups the
// map element using income client ip (that is daddr).
__builtin_memcpy(key_dst.ip, daddr, sizeof(key_dst.ip));
__builtin_memcpy(key_dst.ip, daddr, IPV6_BYTE_LENGTH);
key_dst.port = tcph.dest;
struct ip_port_outbound *original_dst =
bpf_map_lookup_elem(&tcp_dst_map, &key_dst);
@ -1858,8 +1862,10 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// Get source ip/port from our packet header.
// Decap header to get fullcone tuple.
decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src,
sizeof(ori_src));
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&ori_src, sizeof(ori_src)))) {
return TC_ACT_SHOT;
}
// Rewrite udp src ip
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, saddr, ori_src.ip,
@ -1894,9 +1900,9 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// Should send the packet to tproxy.
// Get tproxy ip and port.
__be32 tproxy_ip[4];
// saddr should be tproxy ip.
__builtin_memcpy(tproxy_ip, saddr, sizeof(tproxy_ip));
__be32 *tproxy_ip = saddr;
// __builtin_memcpy(tproxy_ip, saddr, sizeof(tproxy_ip));
__be16 *tproxy_port = bpf_map_lookup_elem(&param_map, &tproxy_port_key);
if (!tproxy_port) {
return TC_ACT_OK;
@ -1930,59 +1936,13 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
static int __always_inline build_key_by_sk(struct sock *sk,
struct ip_port_proto *src_key) {
// Build key.
__builtin_memset(src_key, 0, sizeof(struct ip_port_proto));
__u16 sk_type = BPF_CORE_READ(sk, sk_type);
if (sk_type == SOCK_STREAM) {
src_key->proto = IPPROTO_TCP;
// bpf_printk("TCP bind");
} else if (sk_type == SOCK_DGRAM) {
src_key->proto = IPPROTO_UDP;
// bpf_printk("UDP bind");
} else if (sk_type == SOCK_RAW) {
__u16 sk_proto = BPF_CORE_READ(sk, sk_protocol);
if (sk_proto == IPPROTO_TCP) {
src_key->proto = IPPROTO_TCP;
// bpf_printk("RAW TCP bind");
} else if (sk_proto == IPPROTO_TCP) {
src_key->proto = IPPROTO_UDP;
// bpf_printk("RAW UDP bind");
} else {
return -ERANGE;
}
} else {
return -ERANGE;
static int __always_inline update_map_elem_by_cookie(const __u64 cookie) {
if (!cookie) {
bpf_printk("zero cookie");
return -EINVAL;
}
struct inet_sock *inet = (struct inet_sock *)sk;
unsigned short family = BPF_CORE_READ(sk, __sk_common.skc_family);
if (family == AF_INET) {
src_key->ip[2] = bpf_htonl(0x0000ffff);
src_key->ip[3] = BPF_CORE_READ(inet, inet_saddr);
} else if (family == AF_INET6) {
BPF_CORE_READ_INTO(&src_key->ip, inet, pinet6, saddr.in6_u.u6_addr32);
} else {
if (family == AF_UNSPEC) {
bpf_printk("oh shit AF_UNSPEC");
}
return -ERANGE;
}
src_key->port = BPF_CORE_READ(inet, inet_sport);
return 0;
}
static int __always_inline update_map_elem_by_sk(struct sock *sk) {
int ret;
// Build key.
struct ip_port_proto src_key;
if ((ret = build_key_by_sk(sk, &src_key))) {
return ret;
}
// Build value.
struct pid_pname val;
__builtin_memset(&val, 0, sizeof(struct pid_pname));
@ -1993,97 +1953,53 @@ static int __always_inline update_map_elem_by_sk(struct sock *sk) {
}
// Update map.
/// TODO: We can use BPF_NOEXIST here to improve the performance.
/// But will the socket be released after processes dead abnormally?
if ((ret = bpf_map_update_elem(&src_pid_map, &src_key, &val, BPF_ANY))) {
if ((ret =
bpf_map_update_elem(&cookie_pid_map, &cookie, &val, BPF_NOEXIST))) {
// bpf_printk("setup_mapping_from_sk: failed update map: %d", ret);
return ret;
}
// bpf_printk("setup_mapping_from_sk: %pI6:%u (%d)", src_key.ip,
// bpf_ntohs(src_key.port), src_key.proto);
// bpf_printk("setup_mapping_from_sk: -> %s (%d)", val.pname, val.pid);
bpf_printk("setup_mapping: %llu -> %s (%d)", cookie, val.pname, val.pid);
return 0;
}
// Remove sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv4/IPv6 TCP/UDP send.
SEC("fexit/inet_release")
int BPF_PROG(inet_release, struct sock *sk, int ret) {
if (unlikely(ret)) {
return 0;
// Create cookie to pid, pname mapping.
SEC("cgroup/sock_create")
int tproxy_wan_cg_sock_create(struct bpf_sock *sk) {
update_map_elem_by_cookie(bpf_get_socket_cookie(sk));
return 1;
}
// Remove cookie to pid, pname mapping.
SEC("cgroup/sock_release")
int tproxy_wan_cg_sock_release(struct bpf_sock *sk) {
__u64 cookie = bpf_get_socket_cookie(sk);
if (!cookie) {
bpf_printk("zero cookie");
return 1;
}
// Build key.
struct ip_port_proto src_key;
if ((ret = build_key_by_sk(sk, &src_key))) {
return 0;
}
if ((ret = bpf_map_delete_elem(&src_pid_map, &src_key))) {
// bpf_printk("setup_mapping_from_sk: failed update map: %d", ret);
return 0;
}
return 0;
bpf_map_delete_elem(&cookie_pid_map, &cookie);
return 1;
}
// Get sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv4/IPv6 TCP/UDP send.
SEC("fexit/inet_send_prepare")
int BPF_PROG(inet_send_prepare, struct sock *sk, int ret) {
if (unlikely(ret)) {
return 0;
}
update_map_elem_by_sk(sk);
return 0;
}
// SEC("cgroup/connect4")
// int tproxy_wan_cg_connect4(struct bpf_sock_addr *ctx) {
// update_map_elem_by_cookie(bpf_get_socket_cookie(ctx));
// return 1;
// }
// SEC("cgroup/connect6")
// int tproxy_wan_cg_connect6(struct bpf_sock_addr *ctx) {
// update_map_elem_by_cookie(bpf_get_socket_cookie(ctx));
// return 1;
// }
// SEC("cgroup/sendmsg4")
// int tproxy_wan_cg_sendmsg4(struct bpf_sock_addr *ctx) {
// update_map_elem_by_cookie(bpf_get_socket_cookie(ctx));
// return 1;
// }
// SEC("cgroup/sendmsg6")
// int tproxy_wan_cg_sendmsg6(struct bpf_sock_addr *ctx) {
// update_map_elem_by_cookie(bpf_get_socket_cookie(ctx));
// return 1;
// }
// Get sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv4 TCP/UDP listen.
SEC("fexit/inet_bind")
int BPF_PROG(inet_bind, struct socket *sock, struct sockaddr *uaddr,
int addr_len, int ret) {
if (ret) {
return 0;
}
update_map_elem_by_sk(sock->sk);
return 0;
}
// Get sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv4 TCP connect.
// We use fentry because it "Build a SYN and send it off".
// https://github.com/torvalds/linux/blob/62fb9874f5da54fdb243003b386128037319b219/net/ipv4/tcp_output.c#L3820
SEC("fentry/tcp_connect")
int BPF_PROG(tcp_connect, struct sock *sk) {
update_map_elem_by_sk(sk);
return 0;
}
// Get sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv4 UDP sendto/sendmsg.
SEC("fexit/inet_autobind")
int BPF_PROG(inet_autobind, struct sock *sk, int ret) {
if (ret) {
return 0;
}
update_map_elem_by_sk(sk);
return 0;
}
// Get sip, sport to pid, pname mapping.
// kernel 5.5+
// IPv6 TCP/UDP listen.
SEC("fexit/inet6_bind")
int BPF_PROG(inet6_bind, struct socket *sock, struct sockaddr *uaddr,
int addr_len, int ret) {
if (ret) {
return 0;
}
update_map_elem_by_sk(sock->sk);
return 0;
}
SEC("license") const char __license[] = "Dual BSD/GPL";
SEC("license") const char __license[] = "Dual BSD/GPL";

View File

@ -0,0 +1,75 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) since 2023, mzz2017 <mzz@tuta.io>
*/
package control
import (
_ "embed"
"fmt"
"os"
"path/filepath"
"reflect"
"strings"
"text/template"
)
type ProgField struct {
Name string
Ebpf string
}
//go:embed objects.tmpl
var tmpl []byte
func generate(output string) error {
var lanProgFields []ProgField
var wanProgFields []ProgField
tBpfProg := reflect.ValueOf(bpfObjects{}).FieldByName("bpfPrograms").Type()
for i := 0; i < tBpfProg.NumField(); i++ {
structField := tBpfProg.Field(i)
switch {
case strings.HasPrefix(structField.Name, "TproxyLan"):
lanProgFields = append(lanProgFields, ProgField{
Name: structField.Name,
Ebpf: structField.Tag.Get("ebpf"),
})
case strings.HasPrefix(structField.Name, "TproxyWan"):
wanProgFields = append(wanProgFields, ProgField{
Name: structField.Name,
Ebpf: structField.Tag.Get("ebpf"),
})
default:
return fmt.Errorf("unexpected prefix, should be TproxyWan or TproxyLan: %v", structField.Name)
}
}
t, err := template.New("").Parse(string(tmpl))
if err != nil {
return err
}
f, err := os.OpenFile(output, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
return err
}
defer f.Close()
abs, err := filepath.Abs(output)
if err != nil {
return err
}
fmt.Printf("Write to %v\n", abs)
if err = t.Execute(f, map[string]interface{}{
"WanProgFields": wanProgFields,
"LanProgFields": lanProgFields,
}); err != nil {
return err
}
return nil
}
func GenerateObjects(output string) {
if err := generate(output); err != nil {
os.Exit(1)
}
}

View File

@ -0,0 +1,21 @@
// Code is generated; DO NOT EDIT.
package control
import "github.com/cilium/ebpf"
type bpfObjectsLan struct {
{{- range .LanProgFields }}
{{ .Name }} *ebpf.Program `ebpf:"{{ .Ebpf }}"`
{{- end }}
bpfMaps
}
type bpfObjectsWan struct {
{{- range .WanProgFields }}
{{ .Name }} *ebpf.Program `ebpf:"{{ .Ebpf }}"`
{{- end }}
bpfMaps
}

View File

@ -179,7 +179,7 @@ func (b *RoutingMatcherBuilder) AddL4Proto(f *config_parser.Function, values con
})
}
func (b *RoutingMatcherBuilder) AddIpVersion(f *config_parser.Function, values consts.IpVersion, outbound string) {
func (b *RoutingMatcherBuilder) AddIpVersion(f *config_parser.Function, values consts.IpVersionType, outbound string) {
if b.err != nil {
return
}

View File

@ -135,7 +135,7 @@ func (c *ControlPlane) handlePkt(data []byte, lConn *net.UDPConn, lAddrPort neti
if c.log.IsLevelEnabled(logrus.DebugLevel) && len(dnsMessage.Questions) > 0 {
q := dnsMessage.Questions[0]
c.log.Debugf("UDP(DNS) %v <-[%v]-> %v: %v %v",
RefineSourceToShow(lAddrPort, dest.Addr()), outbound.Name, RefineAddrPortToShow(dest), q.Name, q.Type,
RefineSourceToShow(lAddrPort, addrHdr.Dest.Addr()), outbound.Name, RefineAddrPortToShow(dest), q.Name, q.Type,
)
}
}

View File

@ -36,7 +36,8 @@ func newDirect(fullCone bool) proxy.Dialer {
func (d *direct) Dial(network, addr string) (c net.Conn, err error) {
switch network {
case "tcp":
return d.netDialer.Dial(network, addr)
conn, err := d.netDialer.Dial(network, addr)
return conn, err
case "udp":
if d.fullCone {
conn, err := net.ListenUDP(network, nil)

View File

@ -25,7 +25,7 @@ type MatcherBuilder interface {
AddSourceIp(f *config_parser.Function, values []netip.Prefix, outbound string)
AddSourcePort(f *config_parser.Function, values [][2]uint16, outbound string)
AddL4Proto(f *config_parser.Function, values consts.L4ProtoType, outbound string)
AddIpVersion(f *config_parser.Function, values consts.IpVersion, outbound string)
AddIpVersion(f *config_parser.Function, values consts.IpVersionType, outbound string)
AddSourceMac(f *config_parser.Function, values [][6]byte, outbound string)
AddProcessName(f *config_parser.Function, values [][consts.TaskCommLen]byte, outbound string)
AddFinal(outbound string)
@ -144,7 +144,7 @@ func ApplyMatcherBuilder(log *logrus.Logger, builder MatcherBuilder, rules []*co
}
builder.AddL4Proto(f, l4protoType, outbound)
case consts.Function_IpVersion:
var ipVersion consts.IpVersion
var ipVersion consts.IpVersionType
for _, v := range paramValueGroup {
switch v {
case "4":
@ -195,7 +195,7 @@ func (d *DefaultMatcherBuilder) AddSourcePort(f *config_parser.Function, values
}
func (d *DefaultMatcherBuilder) AddL4Proto(f *config_parser.Function, values consts.L4ProtoType, outbound string) {
}
func (d *DefaultMatcherBuilder) AddIpVersion(f *config_parser.Function, values consts.IpVersion, outbound string) {
func (d *DefaultMatcherBuilder) AddIpVersion(f *config_parser.Function, values consts.IpVersionType, outbound string) {
}
func (d *DefaultMatcherBuilder) AddSourceMac(f *config_parser.Function, values [][6]byte, outbound string) {
}

View File

@ -17,7 +17,7 @@ sudo tc filter del dev $wan ingress
sudo tc filter del dev $wan egress
sudo tc filter add dev $lan ingress bpf direct-action obj foo.o sec tc/ingress
sudo tc filter add dev $lan egress bpf direct-action obj foo.o sec tc/egress
# sudo tc filter add dev $wan ingress bpf direct-action obj foo.o sec tc/wan_ingress
# sudo tc filter add dev $wan egress bpf direct-action obj foo.o sec tc/wan_egress
sudo tc filter add dev $wan ingress bpf direct-action obj foo.o sec tc/wan_ingress
sudo tc filter add dev $wan egress bpf direct-action obj foo.o sec tc/wan_egress
exit 0

View File

@ -49,7 +49,7 @@ func UnmarshalGeoIp(log *logrus.Logger, filepath, code string) (*GeoIP, error) {
return nil, err
}
return nil, fmt.Errorf("country code %v not found in %v", code, filepath)
return nil, fmt.Errorf("code %v not found in %v", code, filepath)
}
func UnmarshalGeoSite(log *logrus.Logger, filepath, code string) (*GeoSite, error) {
@ -63,7 +63,7 @@ func UnmarshalGeoSite(log *logrus.Logger, filepath, code string) (*GeoSite, erro
return &geosite, nil
case errCodeNotFound:
return nil, fmt.Errorf("list %V not found in %v", code, filepath)
return nil, fmt.Errorf("code %v not found in %v", code, filepath)
case errFailedToReadBytes, errFailedToReadExpectedLenBytes,
errInvalidGeodataFile, errInvalidGeodataVarintLength:
@ -86,5 +86,5 @@ func UnmarshalGeoSite(log *logrus.Logger, filepath, code string) (*GeoSite, erro
return nil, err
}
return nil, fmt.Errorf("list %v not found in %v", code, filepath)
return nil, fmt.Errorf("code %v not found in %v", code, filepath)
}