mirror of
https://github.com/daeuniverse/dae.git
synced 2024-12-23 01:04:40 +07:00
patch/optimize(bpf): improve wan tcp hijack datapath performance (#481)
Co-authored-by: Sumire (菫) <151038614+sumire88@users.noreply.github.com>
This commit is contained in:
parent
36fa05b105
commit
b6c3f69bf3
@ -153,6 +153,7 @@ var (
|
||||
SkAssignFeatureVersion = internal.Version{5, 7, 0}
|
||||
ChecksumFeatureVersion = internal.Version{5, 8, 0}
|
||||
ProgTypeSkLookupFeatureVersion = internal.Version{5, 9, 0}
|
||||
SockmapFeatureVersion = internal.Version{5, 10, 0}
|
||||
UserspaceBatchUpdateLpmTrieFeatureVersion = internal.Version{5, 13, 0}
|
||||
HelperBpfGetFuncIpVersionFeatureVersion = internal.Version{5, 15, 0}
|
||||
)
|
||||
|
@ -106,7 +106,7 @@ func NewControlPlane(
|
||||
kernelVersion.String(),
|
||||
requirement.String())
|
||||
}
|
||||
if requirement := consts.CgSocketCookieFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) {
|
||||
if requirement := consts.SockmapFeatureVersion; len(global.WanInterface) > 0 && kernelVersion.Less(requirement) {
|
||||
return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again",
|
||||
kernelVersion.String(),
|
||||
requirement.String())
|
||||
@ -221,6 +221,9 @@ func NewControlPlane(
|
||||
if err = core.setupSkPidMonitor(); err != nil {
|
||||
log.WithError(err).Warnln("cgroup2 is not enabled; pname routing cannot be used")
|
||||
}
|
||||
if err = core.setupLocalTcpFastRedirect(); err != nil {
|
||||
log.WithError(err).Warnln("failed to setup local tcp fast redirect")
|
||||
}
|
||||
for _, ifname := range global.WanInterface {
|
||||
if err = core.bindWan(ifname, global.AutoConfigKernelParameter); err != nil {
|
||||
return nil, fmt.Errorf("bindWan: %v: %w", ifname, err)
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/link"
|
||||
ciliumLink "github.com/cilium/ebpf/link"
|
||||
"github.com/daeuniverse/dae/common"
|
||||
"github.com/daeuniverse/dae/common/consts"
|
||||
@ -382,6 +383,32 @@ func (c *controlPlaneCore) setupSkPidMonitor() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *controlPlaneCore) setupLocalTcpFastRedirect() (err error) {
|
||||
cgroupPath, err := detectCgroupPath()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
cg, err := link.AttachCgroup(link.CgroupOptions{
|
||||
Path: cgroupPath,
|
||||
Program: c.bpf.LocalTcpSockops, // todo@gray: rename
|
||||
Attach: ebpf.AttachCGroupSockOps,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("AttachCgroupSockOps: %w", err)
|
||||
}
|
||||
c.deferFuncs = append(c.deferFuncs, cg.Close)
|
||||
|
||||
if err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: c.bpf.FastSock.FD(),
|
||||
Program: c.bpf.SkMsgFastRedirect,
|
||||
Attach: ebpf.AttachSkMsgVerdict,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("AttachSkMsgVerdict: %w", err)
|
||||
}
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (c *controlPlaneCore) bindWan(ifname string, autoConfigKernelParameter bool) error {
|
||||
return c._bindWan(ifname)
|
||||
}
|
||||
|
@ -193,6 +193,17 @@ struct {
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
} routing_tuples_map SEC(".maps");
|
||||
|
||||
/* Sockets in fast_sock map are used for fast-redirecting via
|
||||
* sk_msg/fast_redirect. Sockets are automactically deleted from map once
|
||||
* closed, so we don't need to worry about stale entries.
|
||||
*/
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKHASH);
|
||||
__type(key, struct tuples_key);
|
||||
__type(value, __u64);
|
||||
__uint(max_entries, 65535);
|
||||
} fast_sock SEC(".maps");
|
||||
|
||||
// Link to type:
|
||||
#define LinkType_None 0
|
||||
#define LinkType_Ethernet 1
|
||||
@ -1158,22 +1169,7 @@ int tproxy_wan_egress(struct __sk_buff *skb)
|
||||
|
||||
get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto);
|
||||
|
||||
// We should know if this packet is from tproxy.
|
||||
// We do not need to check the source ip because we have skipped packets not
|
||||
// from localhost.
|
||||
__be16 tproxy_port = PARAM.tproxy_port;
|
||||
|
||||
if (!tproxy_port)
|
||||
return TC_ACT_OK;
|
||||
bool tproxy_response = tproxy_port == tuples.five.sport;
|
||||
|
||||
if (tproxy_response) {
|
||||
// WAN response won't reach here, must be a LAN response.
|
||||
return TC_ACT_PIPE;
|
||||
}
|
||||
|
||||
// Normal packets.
|
||||
|
||||
if (l4proto == IPPROTO_TCP) {
|
||||
// Backup for further use.
|
||||
tcp_state_syn = tcph.syn && !tcph.ack;
|
||||
@ -1611,4 +1607,117 @@ int tproxy_wan_cg_sendmsg6(struct bpf_sock_addr *ctx)
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("sockops")
|
||||
int local_tcp_sockops(struct bpf_sock_ops *skops)
|
||||
{
|
||||
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
|
||||
__u32 pid = BPF_CORE_READ(task, pid);
|
||||
|
||||
/* Only local TCP connection has non-zero pids. */
|
||||
if (pid == 0)
|
||||
return 0;
|
||||
|
||||
struct tuples_key tuple = {};
|
||||
|
||||
tuple.l4proto = IPPROTO_TCP;
|
||||
tuple.sport = bpf_htonl(skops->local_port) >> 16;
|
||||
tuple.dport = skops->remote_port >> 16;
|
||||
if (skops->family == AF_INET) {
|
||||
tuple.sip.u6_addr32[2] = bpf_htonl(0x0000ffff);
|
||||
tuple.sip.u6_addr32[3] = skops->local_ip4;
|
||||
tuple.dip.u6_addr32[2] = bpf_htonl(0x0000ffff);
|
||||
tuple.dip.u6_addr32[3] = skops->remote_ip4;
|
||||
} else if (skops->family == AF_INET6) {
|
||||
tuple.sip.u6_addr32[3] = skops->local_ip6[3];
|
||||
tuple.sip.u6_addr32[2] = skops->local_ip6[2];
|
||||
tuple.sip.u6_addr32[1] = skops->local_ip6[1];
|
||||
tuple.sip.u6_addr32[0] = skops->local_ip6[0];
|
||||
tuple.dip.u6_addr32[3] = skops->remote_ip6[3];
|
||||
tuple.dip.u6_addr32[2] = skops->remote_ip6[2];
|
||||
tuple.dip.u6_addr32[1] = skops->remote_ip6[1];
|
||||
tuple.dip.u6_addr32[0] = skops->remote_ip6[0];
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (skops->op) {
|
||||
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: // dae sockets
|
||||
{
|
||||
struct tuples_key rev_tuple = {};
|
||||
|
||||
rev_tuple.l4proto = IPPROTO_TCP;
|
||||
rev_tuple.sport = tuple.dport;
|
||||
rev_tuple.dport = tuple.sport;
|
||||
__builtin_memcpy(&rev_tuple.sip, &tuple.dip, IPV6_BYTE_LENGTH);
|
||||
__builtin_memcpy(&rev_tuple.dip, &tuple.sip, IPV6_BYTE_LENGTH);
|
||||
|
||||
struct routing_result *routing_result;
|
||||
|
||||
routing_result = bpf_map_lookup_elem(&routing_tuples_map, &rev_tuple);
|
||||
if (!routing_result || !routing_result->pid)
|
||||
break;
|
||||
|
||||
if (!bpf_sock_hash_update(skops, &fast_sock, &tuple, BPF_ANY))
|
||||
bpf_printk("fast_sock added: %pI4:%lu -> %pI4:%lu",
|
||||
&tuple.sip.u6_addr32[3], bpf_ntohs(tuple.sport),
|
||||
&tuple.dip.u6_addr32[3], bpf_ntohs(tuple.dport));
|
||||
break;
|
||||
}
|
||||
|
||||
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: // local client sockets
|
||||
{
|
||||
struct routing_result *routing_result;
|
||||
|
||||
routing_result = bpf_map_lookup_elem(&routing_tuples_map, &tuple);
|
||||
if (!routing_result || !routing_result->pid)
|
||||
break;
|
||||
|
||||
if (!bpf_sock_hash_update(skops, &fast_sock, &tuple, BPF_ANY))
|
||||
bpf_printk("fast_sock added: %pI4:%lu -> %pI4:%lu",
|
||||
&tuple.sip.u6_addr32[3], bpf_ntohs(tuple.sport),
|
||||
&tuple.dip.u6_addr32[3], bpf_ntohs(tuple.dport));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("sk_msg/fast_redirect")
|
||||
int sk_msg_fast_redirect(struct sk_msg_md *msg)
|
||||
{
|
||||
struct tuples_key rev_tuple = {};
|
||||
|
||||
rev_tuple.l4proto = IPPROTO_TCP;
|
||||
rev_tuple.sport = msg->remote_port >> 16;
|
||||
rev_tuple.dport = bpf_htonl(msg->local_port) >> 16;
|
||||
if (msg->family == AF_INET) {
|
||||
rev_tuple.sip.u6_addr32[2] = bpf_htonl(0x0000ffff);
|
||||
rev_tuple.sip.u6_addr32[3] = msg->remote_ip4;
|
||||
rev_tuple.dip.u6_addr32[2] = bpf_htonl(0x0000ffff);
|
||||
rev_tuple.dip.u6_addr32[3] = msg->local_ip4;
|
||||
} else if (msg->family == AF_INET6) {
|
||||
rev_tuple.sip.u6_addr32[3] = msg->remote_ip6[3];
|
||||
rev_tuple.sip.u6_addr32[2] = msg->remote_ip6[2];
|
||||
rev_tuple.sip.u6_addr32[1] = msg->remote_ip6[1];
|
||||
rev_tuple.sip.u6_addr32[0] = msg->remote_ip6[0];
|
||||
rev_tuple.dip.u6_addr32[3] = msg->local_ip6[3];
|
||||
rev_tuple.dip.u6_addr32[2] = msg->local_ip6[2];
|
||||
rev_tuple.dip.u6_addr32[1] = msg->local_ip6[1];
|
||||
rev_tuple.dip.u6_addr32[0] = msg->local_ip6[0];
|
||||
} else {
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
if (bpf_msg_redirect_hash(msg, &fast_sock, &rev_tuple, BPF_F_INGRESS) == SK_PASS)
|
||||
bpf_printk("tcp fast redirect: %pI4:%lu -> %pI4:%lu",
|
||||
&rev_tuple.sip.u6_addr32[3], bpf_ntohs(rev_tuple.sport),
|
||||
&rev_tuple.dip.u6_addr32[3], bpf_ntohs(rev_tuple.dport));
|
||||
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
SEC("license") const char __license[] = "Dual BSD/GPL";
|
||||
|
@ -19,11 +19,11 @@ This feature requires the kernel version of machine on which dae install >= 5.8.
|
||||
|
||||
Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs.
|
||||
|
||||
`Bind to WAN: >= 5.8`
|
||||
`Bind to WAN: >= 5.10`
|
||||
|
||||
You need bind dae to WAN interface, if you want dae to provide network service for local programs.
|
||||
|
||||
This feature requires kernel version of the machine >= 5.8.
|
||||
This feature requires kernel version of the machine >= 5.10.
|
||||
|
||||
Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces.
|
||||
|
||||
|
@ -17,11 +17,11 @@
|
||||
|
||||
如果你只在 `lan_interface` 中填写了接口,而未在 `wan_interface` 中填写内容,那么本地程序将无法被代理。如果你期望代理本地程序,需要在 `wan_interface` 中填写 `auto` 或是手动输入 WAN 接口。
|
||||
|
||||
`绑定到 WAN 接口: >= 5.8`
|
||||
`绑定到 WAN 接口: >= 5.10`
|
||||
|
||||
如果你想为本地程序提供代理服务,需要把 dae 绑定到 WAN 接口上。
|
||||
|
||||
该特性要求 dae 所在的设备的内核版本 >= 5.8。
|
||||
该特性要求 dae 所在的设备的内核版本 >= 5.10。
|
||||
|
||||
如果你只在 `wan_interface` 中填写了接口或 `auto`,而未在 `lan_interface` 中填写内容,那么从局域网中传来的流量将无法被代理。如果你想同时代理本机和局域网流量,请同时填写 `wan_interface` 和 `lan_interface`。
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user