patch/optimize(bpf): improve wan tcp hijack datapath performance (#481)

Co-authored-by: Sumire (菫) <151038614+sumire88@users.noreply.github.com>
This commit is contained in:
/gray 2024-03-31 13:03:20 +08:00 committed by GitHub
parent 36fa05b105
commit b6c3f69bf3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 160 additions and 20 deletions

View File

@ -153,6 +153,7 @@ var (
SkAssignFeatureVersion = internal.Version{5, 7, 0} SkAssignFeatureVersion = internal.Version{5, 7, 0}
ChecksumFeatureVersion = internal.Version{5, 8, 0} ChecksumFeatureVersion = internal.Version{5, 8, 0}
ProgTypeSkLookupFeatureVersion = internal.Version{5, 9, 0} ProgTypeSkLookupFeatureVersion = internal.Version{5, 9, 0}
SockmapFeatureVersion = internal.Version{5, 10, 0}
UserspaceBatchUpdateLpmTrieFeatureVersion = internal.Version{5, 13, 0} UserspaceBatchUpdateLpmTrieFeatureVersion = internal.Version{5, 13, 0}
HelperBpfGetFuncIpVersionFeatureVersion = internal.Version{5, 15, 0} HelperBpfGetFuncIpVersionFeatureVersion = internal.Version{5, 15, 0}
) )

View File

@ -106,7 +106,7 @@ func NewControlPlane(
kernelVersion.String(), kernelVersion.String(),
requirement.String()) requirement.String())
} }
if requirement := consts.CgSocketCookieFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) { if requirement := consts.SockmapFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) {
return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again", return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again",
kernelVersion.String(), kernelVersion.String(),
requirement.String()) requirement.String())
@ -221,6 +221,9 @@ func NewControlPlane(
if err = core.setupSkPidMonitor(); err != nil { if err = core.setupSkPidMonitor(); err != nil {
log.WithError(err).Warnln("cgroup2 is not enabled; pname routing cannot be used") log.WithError(err).Warnln("cgroup2 is not enabled; pname routing cannot be used")
} }
if err = core.setupLocalTcpFastRedirect(); err != nil {
log.WithError(err).Warnln("failed to setup local tcp fast redirect")
}
for _, ifname := range global.WanInterface { for _, ifname := range global.WanInterface {
if err = core.bindWan(ifname, global.AutoConfigKernelParameter); err != nil { if err = core.bindWan(ifname, global.AutoConfigKernelParameter); err != nil {
return nil, fmt.Errorf("bindWan: %v: %w", ifname, err) return nil, fmt.Errorf("bindWan: %v: %w", ifname, err)

View File

@ -15,6 +15,7 @@ import (
"sync" "sync"
"github.com/cilium/ebpf" "github.com/cilium/ebpf"
"github.com/cilium/ebpf/link"
ciliumLink "github.com/cilium/ebpf/link" ciliumLink "github.com/cilium/ebpf/link"
"github.com/daeuniverse/dae/common" "github.com/daeuniverse/dae/common"
"github.com/daeuniverse/dae/common/consts" "github.com/daeuniverse/dae/common/consts"
@ -382,6 +383,32 @@ func (c *controlPlaneCore) setupSkPidMonitor() error {
return nil return nil
} }
func (c *controlPlaneCore) setupLocalTcpFastRedirect() (err error) {
cgroupPath, err := detectCgroupPath()
if err != nil {
return
}
cg, err := link.AttachCgroup(link.CgroupOptions{
Path: cgroupPath,
Program: c.bpf.LocalTcpSockops, // todo@gray: rename
Attach: ebpf.AttachCGroupSockOps,
})
if err != nil {
return fmt.Errorf("AttachCgroupSockOps: %w", err)
}
c.deferFuncs = append(c.deferFuncs, cg.Close)
if err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: c.bpf.FastSock.FD(),
Program: c.bpf.SkMsgFastRedirect,
Attach: ebpf.AttachSkMsgVerdict,
}); err != nil {
return fmt.Errorf("AttachSkMsgVerdict: %w", err)
}
return nil
}
func (c *controlPlaneCore) bindWan(ifname string, autoConfigKernelParameter bool) error { func (c *controlPlaneCore) bindWan(ifname string, autoConfigKernelParameter bool) error {
return c._bindWan(ifname) return c._bindWan(ifname)
} }

View File

@ -193,6 +193,17 @@ struct {
__uint(pinning, LIBBPF_PIN_BY_NAME); __uint(pinning, LIBBPF_PIN_BY_NAME);
} routing_tuples_map SEC(".maps"); } routing_tuples_map SEC(".maps");
/* Sockets in fast_sock map are used for fast-redirecting via
* sk_msg/fast_redirect. Sockets are automactically deleted from map once
* closed, so we don't need to worry about stale entries.
*/
struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__type(key, struct tuples_key);
__type(value, __u64);
__uint(max_entries, 65535);
} fast_sock SEC(".maps");
// Link to type: // Link to type:
#define LinkType_None 0 #define LinkType_None 0
#define LinkType_Ethernet 1 #define LinkType_Ethernet 1
@ -1158,22 +1169,7 @@ int tproxy_wan_egress(struct __sk_buff *skb)
get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);
// We should know if this packet is from tproxy.
// We do not need to check the source ip because we have skipped packets not
// from localhost.
__be16 tproxy_port = PARAM.tproxy_port;
if (!tproxy_port)
return TC_ACT_OK;
bool tproxy_response = tproxy_port == tuples.five.sport;
if (tproxy_response) {
// WAN response won't reach here, must be a LAN response.
return TC_ACT_PIPE;
}
// Normal packets. // Normal packets.
if (l4proto == IPPROTO_TCP) { if (l4proto == IPPROTO_TCP) {
// Backup for further use. // Backup for further use.
tcp_state_syn = tcph.syn && !tcph.ack; tcp_state_syn = tcph.syn && !tcph.ack;
@ -1611,4 +1607,117 @@ int tproxy_wan_cg_sendmsg6(struct bpf_sock_addr *ctx)
return 1; return 1;
} }
SEC("sockops")
int local_tcp_sockops(struct bpf_sock_ops *skops)
{
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
__u32 pid = BPF_CORE_READ(task, pid);
/* Only local TCP connection has non-zero pids. */
if (pid == 0)
return 0;
struct tuples_key tuple = {};
tuple.l4proto = IPPROTO_TCP;
tuple.sport = bpf_htonl(skops->local_port) >> 16;
tuple.dport = skops->remote_port >> 16;
if (skops->family == AF_INET) {
tuple.sip.u6_addr32[2] = bpf_htonl(0x0000ffff);
tuple.sip.u6_addr32[3] = skops->local_ip4;
tuple.dip.u6_addr32[2] = bpf_htonl(0x0000ffff);
tuple.dip.u6_addr32[3] = skops->remote_ip4;
} else if (skops->family == AF_INET6) {
tuple.sip.u6_addr32[3] = skops->local_ip6[3];
tuple.sip.u6_addr32[2] = skops->local_ip6[2];
tuple.sip.u6_addr32[1] = skops->local_ip6[1];
tuple.sip.u6_addr32[0] = skops->local_ip6[0];
tuple.dip.u6_addr32[3] = skops->remote_ip6[3];
tuple.dip.u6_addr32[2] = skops->remote_ip6[2];
tuple.dip.u6_addr32[1] = skops->remote_ip6[1];
tuple.dip.u6_addr32[0] = skops->remote_ip6[0];
} else {
return 0;
}
switch (skops->op) {
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: // dae sockets
{
struct tuples_key rev_tuple = {};
rev_tuple.l4proto = IPPROTO_TCP;
rev_tuple.sport = tuple.dport;
rev_tuple.dport = tuple.sport;
__builtin_memcpy(&rev_tuple.sip, &tuple.dip, IPV6_BYTE_LENGTH);
__builtin_memcpy(&rev_tuple.dip, &tuple.sip, IPV6_BYTE_LENGTH);
struct routing_result *routing_result;
routing_result = bpf_map_lookup_elem(&routing_tuples_map, &rev_tuple);
if (!routing_result || !routing_result->pid)
break;
if (!bpf_sock_hash_update(skops, &fast_sock, &tuple, BPF_ANY))
bpf_printk("fast_sock added: %pI4:%lu -> %pI4:%lu",
&tuple.sip.u6_addr32[3], bpf_ntohs(tuple.sport),
&tuple.dip.u6_addr32[3], bpf_ntohs(tuple.dport));
break;
}
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: // local client sockets
{
struct routing_result *routing_result;
routing_result = bpf_map_lookup_elem(&routing_tuples_map, &tuple);
if (!routing_result || !routing_result->pid)
break;
if (!bpf_sock_hash_update(skops, &fast_sock, &tuple, BPF_ANY))
bpf_printk("fast_sock added: %pI4:%lu -> %pI4:%lu",
&tuple.sip.u6_addr32[3], bpf_ntohs(tuple.sport),
&tuple.dip.u6_addr32[3], bpf_ntohs(tuple.dport));
break;
}
default:
break;
}
return 0;
}
SEC("sk_msg/fast_redirect")
int sk_msg_fast_redirect(struct sk_msg_md *msg)
{
struct tuples_key rev_tuple = {};
rev_tuple.l4proto = IPPROTO_TCP;
rev_tuple.sport = msg->remote_port >> 16;
rev_tuple.dport = bpf_htonl(msg->local_port) >> 16;
if (msg->family == AF_INET) {
rev_tuple.sip.u6_addr32[2] = bpf_htonl(0x0000ffff);
rev_tuple.sip.u6_addr32[3] = msg->remote_ip4;
rev_tuple.dip.u6_addr32[2] = bpf_htonl(0x0000ffff);
rev_tuple.dip.u6_addr32[3] = msg->local_ip4;
} else if (msg->family == AF_INET6) {
rev_tuple.sip.u6_addr32[3] = msg->remote_ip6[3];
rev_tuple.sip.u6_addr32[2] = msg->remote_ip6[2];
rev_tuple.sip.u6_addr32[1] = msg->remote_ip6[1];
rev_tuple.sip.u6_addr32[0] = msg->remote_ip6[0];
rev_tuple.dip.u6_addr32[3] = msg->local_ip6[3];
rev_tuple.dip.u6_addr32[2] = msg->local_ip6[2];
rev_tuple.dip.u6_addr32[1] = msg->local_ip6[1];
rev_tuple.dip.u6_addr32[0] = msg->local_ip6[0];
} else {
return SK_PASS;
}
if (bpf_msg_redirect_hash(msg, &fast_sock, &rev_tuple, BPF_F_INGRESS) == SK_PASS)
bpf_printk("tcp fast redirect: %pI4:%lu -> %pI4:%lu",
&rev_tuple.sip.u6_addr32[3], bpf_ntohs(rev_tuple.sport),
&rev_tuple.dip.u6_addr32[3], bpf_ntohs(rev_tuple.dport));
return SK_PASS;
}
SEC("license") const char __license[] = "Dual BSD/GPL"; SEC("license") const char __license[] = "Dual BSD/GPL";

View File

@ -19,11 +19,11 @@ This feature requires the kernel version of machine on which dae install >= 5.8.
Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs. Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs.
`Bind to WAN: >= 5.8` `Bind to WAN: >= 5.10`
You need bind dae to WAN interface, if you want dae to provide network service for local programs. You need bind dae to WAN interface, if you want dae to provide network service for local programs.
This feature requires kernel version of the machine >= 5.8. This feature requires kernel version of the machine >= 5.10.
Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces. Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces.

View File

@ -17,11 +17,11 @@
如果你只在 `lan_interface` 中填写了接口,而未在 `wan_interface` 中填写内容,那么本地程序将无法被代理。如果你期望代理本地程序,需要在 `wan_interface` 中填写 `auto` 或是手动输入 WAN 接口。 如果你只在 `lan_interface` 中填写了接口,而未在 `wan_interface` 中填写内容,那么本地程序将无法被代理。如果你期望代理本地程序,需要在 `wan_interface` 中填写 `auto` 或是手动输入 WAN 接口。
`绑定到 WAN 接口: >= 5.8` `绑定到 WAN 接口: >= 5.10`
如果你想为本地程序提供代理服务,需要把 dae 绑定到 WAN 接口上。 如果你想为本地程序提供代理服务,需要把 dae 绑定到 WAN 接口上。
该特性要求 dae 所在的设备的内核版本 >= 5.8 该特性要求 dae 所在的设备的内核版本 >= 5.10
如果你只在 `wan_interface` 中填写了接口或 `auto`,而未在 `lan_interface` 中填写内容,那么从局域网中传来的流量将无法被代理。如果你想同时代理本机和局域网流量,请同时填写 `wan_interface``lan_interface` 如果你只在 `wan_interface` 中填写了接口或 `auto`,而未在 `lan_interface` 中填写内容,那么从局域网中传来的流量将无法被代理。如果你想同时代理本机和局域网流量,请同时填写 `wan_interface``lan_interface`