From 0d29b6fccc0ac02d799b453c3ac607f32946cca8 Mon Sep 17 00:00:00 2001 From: mzz2017 <2017@duck.com> Date: Sat, 4 Feb 2023 11:38:01 +0800 Subject: [PATCH] fix: checksum --- README.md | 13 +- common/consts/ebpf.go | 11 +- component/control/bpf_utils.go | 24 +- component/control/control_plane.go | 9 +- component/control/control_plane_core.go | 98 +++- component/control/kern/tproxy.c | 511 +++++++++++-------- component/control/routing_matcher_builder.go | 22 +- go.mod | 1 + go.sum | 3 + 9 files changed, 429 insertions(+), 263 deletions(-) diff --git a/README.md b/README.md index b318da0..03090c0 100644 --- a/README.md +++ b/README.md @@ -31,19 +31,19 @@ See [example.dae](https://github.com/v2rayA/dae/blob/main/example.dae). Use `uname -r` to check the kernel version on your machine. -**Bind to LAN: >= 5.2** +**Bind to LAN: >= 5.8** You need bind dae to LAN interface, if you want to provide network service for LAN as an intermediate device. -This feature requires the kernel version of machine on which dae install >= 5.2. +This feature requires the kernel version of machine on which dae install >= 5.8. Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs. -**Bind to WAN: >= 5.7** +**Bind to WAN: >= 5.8** You need bind dae to WAN interface, if you want dae to provide network service for local programs. -This feature requires kernel version of the machine >= 5.7. +This feature requires kernel version of the machine >= 5.8. Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces. @@ -54,7 +54,7 @@ Usually, mainstream desktop distributions have these items turned on. But in ord Use following commands to check the kernel configuration items on your machine. ```shell -zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r) +zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)} ``` **Bind to LAN** @@ -72,14 +72,13 @@ CONFIG_DEBUG_INFO_BTF Check them using command like: ```shell -(zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r)) | grep 'CONFIG_DEBUG_INFO_BTF=' +(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep 'CONFIG_DEBUG_INFO_BTF=' ``` ## TODO 1. Check dns upstream and source loop (whether upstream is also a client of us) and remind the user to add sip rule. 1. Domain routing performance optimization. -1. DisableL4Checksum by link. 1. Handle the case that nodes do not support UDP. 1. Handle the case that nodes do not support IPv6. 1. L4Checksum problem. diff --git a/common/consts/ebpf.go b/common/consts/ebpf.go index 4bd6b69..92f549d 100644 --- a/common/consts/ebpf.go +++ b/common/consts/ebpf.go @@ -96,6 +96,11 @@ const ( IpVersion_X IpVersionType = 3 ) -var BasicFeatureVersion = internal.Version{5, 2, 0} -var FtraceFeatureVersion = internal.Version{5, 5, 0} -var CgSocketCookieFeatureVersion = internal.Version{5, 7, 0} +var ( + BasicFeatureVersion = internal.Version{5, 2, 0} + // Deprecated: Ftrace does not support arm64 yet (Linux 6.2). + FtraceFeatureVersion = internal.Version{5, 5, 0} + BatchUpdateFeatureVersion = internal.Version{5, 6, 0} + CgSocketCookieFeatureVersion = internal.Version{5, 7, 0} + ChecksumFeatureVersion = internal.Version{5, 8, 0} +) diff --git a/component/control/bpf_utils.go b/component/control/bpf_utils.go index ef61627..3474043 100644 --- a/component/control/bpf_utils.go +++ b/component/control/bpf_utils.go @@ -12,6 +12,7 @@ import ( "fmt" "github.com/cilium/ebpf" "github.com/v2rayA/dae/common" + "github.com/v2rayA/dae/common/consts" "github.com/v2rayA/dae/pkg/ebpf_internal" "net/netip" "os" @@ -29,15 +30,6 @@ type _bpfPortRange struct { PortEnd uint16 } -type _bpfMatchSet struct { - // TODO: Need sync with C code. - Value [16]byte - Type uint8 - Not bool - Outbound uint8 - _ [1]byte -} - func (r _bpfPortRange) Encode() (b [16]byte) { binary.LittleEndian.PutUint16(b[:2], r.PortStart) binary.LittleEndian.PutUint16(b[2:], r.PortEnd) @@ -78,7 +70,7 @@ func cidrToBpfLpmKey(prefix netip.Prefix) _bpfLpmKey { func BatchUpdate(m *ebpf.Map, keys interface{}, values interface{}, opts *ebpf.BatchOptions) (n int, err error) { var old bool version, e := internal.KernelVersion() - if e != nil || version.Less(internal.Version{5, 6, 0}) { + if e != nil || version.Less(consts.BatchUpdateFeatureVersion) { old = true } if !old { @@ -155,3 +147,15 @@ func detectCgroupPath() (string, error) { return "", errors.New("cgroup2 not mounted") } + +func (p bpfIfParams) CheckVersionRequirement(version *internal.Version) (err error) { + if !p.TxL4CksmIp4Offload || + !p.TxL4CksmIp6Offload { + // Need calc checksum on CPU. And need BPF_F_ADJ_ROOM_NO_CSUM_RESET. + if version.Less(consts.ChecksumFeatureVersion) { + return fmt.Errorf("your NIC does not support checksum offload and your kernel version %v does not support related BPF features; expect >=%v; upgrade your kernel and try again", version.String(), + consts.ChecksumFeatureVersion.String()) + } + } + return nil +} diff --git a/component/control/control_plane.go b/component/control/control_plane.go index 79994bc..38b7fca 100644 --- a/component/control/control_plane.go +++ b/component/control/control_plane.go @@ -66,13 +66,18 @@ func NewControlPlane( if e != nil { return nil, fmt.Errorf("failed to get kernel version: %w", e) } - if kernelVersion.Less(consts.BasicFeatureVersion) { - return nil, fmt.Errorf("your kernel version %v does not satisfy basic requirement; expect >=%v", c.kernelVersion.String(), consts.BasicFeatureVersion.String()) + // Must judge version from high to low to reduce the number of user upgrading kernel. + if kernelVersion.Less(consts.ChecksumFeatureVersion) { + return nil, fmt.Errorf("your kernel version %v does not support checksum related features; expect >=%v; upgrade your kernel and try again", kernelVersion.String(), + consts.ChecksumFeatureVersion.String()) } if len(wanInterface) > 0 && kernelVersion.Less(consts.CgSocketCookieFeatureVersion) { return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again", kernelVersion.String(), consts.CgSocketCookieFeatureVersion.String()) } + if kernelVersion.Less(consts.BasicFeatureVersion) { + return nil, fmt.Errorf("your kernel version %v does not satisfy basic requirement; expect >=%v", c.kernelVersion.String(), consts.BasicFeatureVersion.String()) + } // Allow the current process to lock memory for eBPF resources. if err = rlimit.RemoveMemlock(); err != nil { diff --git a/component/control/control_plane_core.go b/component/control/control_plane_core.go index 2f2371c..fcb9f83 100644 --- a/component/control/control_plane_core.go +++ b/component/control/control_plane_core.go @@ -9,6 +9,7 @@ import ( "fmt" "github.com/cilium/ebpf" ciliumLink "github.com/cilium/ebpf/link" + "github.com/safchain/ethtool" "github.com/sirupsen/logrus" "github.com/v2rayA/dae/common" "github.com/v2rayA/dae/common/consts" @@ -17,6 +18,7 @@ import ( "golang.org/x/sys/unix" "net/netip" "os" + "regexp" ) type ControlPlaneCore struct { @@ -42,23 +44,16 @@ func (c *ControlPlaneCore) Close() (err error) { return err } -func (c *ControlPlaneCore) BindLan(ifname string) error { - c.log.Infof("Bind to LAN: %v", ifname) - link, err := netlink.LinkByName(ifname) - if err != nil { - return err - } - // Insert an elem into IfindexIpsMap. +func getifParamsFromLink(link netlink.Link) (ifParams bpfIfParams, err error) { // TODO: We should monitor IP change of the link. ipnets, err := netlink.AddrList(link, netlink.FAMILY_ALL) if err != nil { - return err + return bpfIfParams{}, err } - // TODO: If we monitor IP change of the link, we should remove code below. if len(ipnets) == 0 { - return fmt.Errorf("interface %v has no ip", ifname) + return bpfIfParams{}, fmt.Errorf("interface %v has no ip", link.Attrs().Name) } - var linkIp bpfIfIp + // Get first Ip4 and Ip6. for _, ipnet := range ipnets { ip, ok := netip.AddrFromSlice(ipnet.IP) if !ok { @@ -67,38 +62,84 @@ func (c *ControlPlaneCore) BindLan(ifname string) error { if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { continue } - if (ip.Is6() && linkIp.HasIp6) || - (ip.Is4() && linkIp.HasIp4) { + if (ip.Is6() && ifParams.HasIp6) || + (ip.Is4() && ifParams.HasIp4) { continue } ip6format := ip.As16() if ip.Is4() { - linkIp.HasIp4 = true - linkIp.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:]) + ifParams.HasIp4 = true + ifParams.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:]) } else { - linkIp.HasIp6 = true - linkIp.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:]) + ifParams.HasIp6 = true + ifParams.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:]) } - if linkIp.HasIp4 && linkIp.HasIp6 { + if ifParams.HasIp4 && ifParams.HasIp6 { break } } - if err := c.bpf.IfindexTproxyIpMap.Update(uint32(link.Attrs().Index), linkIp, ebpf.UpdateAny); err != nil { + // Get link offload features. + et, err := ethtool.NewEthtool() + if err != nil { + return bpfIfParams{}, err + } + defer et.Close() + features, err := et.Features(link.Attrs().Name) + if err != nil { + return bpfIfParams{}, err + } + if features["tx-checksum-ip-generic"] { + ifParams.TxL4CksmIp4Offload = true + ifParams.TxL4CksmIp6Offload = true + } + if features["tx-checksum-ipv4"] { + ifParams.TxL4CksmIp4Offload = true + } + if features["tx-checksum-ipv6"] { + ifParams.TxL4CksmIp6Offload = true + } + if features["rx-checksum"] { + ifParams.RxCksmOffload = true + } + switch { + case regexp.MustCompile(`^docker\d+$`).MatchString(link.Attrs().Name): + ifParams.UseNonstandardOffloadAlgorithm = true + default: + } + return ifParams, nil +} + +func (c *ControlPlaneCore) BindLan(ifname string) error { + c.log.Infof("Bind to LAN: %v", ifname) + link, err := netlink.LinkByName(ifname) + if err != nil { + return err + } + + /// Insert an elem into IfindexParamsMap. + ifParams, err := getifParamsFromLink(link) + if err != nil { + return err + } + if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil { + return err + } + if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil { return fmt.Errorf("update IfindexIpsMap: %w", err) } // FIXME: not only this link ip. - if linkIp.HasIp4 { + if ifParams.HasIp4 { if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{ PrefixLen: 128, - Data: linkIp.Ip4, + Data: ifParams.Ip4, }, uint32(1), ebpf.UpdateAny); err != nil { return fmt.Errorf("update IfindexIpsMap: %w", err) } } - if linkIp.HasIp6 { + if ifParams.HasIp6 { if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{ PrefixLen: 128, - Data: linkIp.Ip6, + Data: ifParams.Ip6, }, uint32(1), ebpf.UpdateAny); err != nil { return fmt.Errorf("update IfindexIpsMap: %w", err) } @@ -169,6 +210,17 @@ func (c *ControlPlaneCore) BindWan(ifname string) error { if err != nil { return err } + /// Insert an elem into IfindexParamsMap. + ifParams, err := getifParamsFromLink(link) + if err != nil { + return err + } + if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil { + return err + } + if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil { + return fmt.Errorf("update IfindexIpsMap: %w", err) + } /// Set-up SrcPidMapper. /// Attach programs to support pname routing. diff --git a/component/control/kern/tproxy.c b/component/control/kern/tproxy.c index 6743b02..3a3d41e 100644 --- a/component/control/kern/tproxy.c +++ b/component/control/kern/tproxy.c @@ -69,8 +69,10 @@ enum { // Param keys: static const __u32 zero_key = 0; static const __u32 tproxy_port_key = 1; -static const __u32 disable_l4_tx_checksum_key = 2; -static const __u32 disable_l4_rx_checksum_key = 3; +static const __u32 disable_l4_tx_checksum_key + __attribute__((unused, deprecated)) = 2; +static const __u32 disable_l4_rx_checksum_key + __attribute__((unused, deprecated)) = 3; static const __u32 control_plane_pid_key = 4; struct ip_port { @@ -154,20 +156,25 @@ struct { } dns_upstream_map SEC(".maps"); // Interface Ips: -struct if_ip { +struct if_params { __be32 ip4[4]; __be32 ip6[4]; - bool hasIp4; - bool hasIp6; + + bool has_ip4; + bool has_ip6; + bool rx_cksm_offload; + bool tx_l4_cksm_ip4_offload; + bool tx_l4_cksm_ip6_offload; + bool use_nonstandard_offload_algorithm; }; struct { __uint(type, BPF_MAP_TYPE_HASH); - __type(key, __u32); // ifindex - __type(value, struct if_ip); // ip + __type(key, __u32); // ifindex + __type(value, struct if_params); // ip __uint(max_entries, MAX_INTERFACE_NUM); /// NOTICE: No persistence. // __uint(pinning, LIBBPF_PIN_BY_NAME); -} ifindex_tproxy_ip_map SEC(".maps"); +} ifindex_params_map SEC(".maps"); // Array of LPM tries: struct lpm_key { @@ -228,8 +235,7 @@ struct port_range { */ struct match_set { union { - /// NOTICE: MUST sync with component/control/bpf_utils.go. - __u32 __value; // Placeholder for bpf2go. + __u8 __value[16]; // Placeholder for bpf2go. __u32 index; struct port_range port_range; @@ -237,8 +243,8 @@ struct match_set { enum IpVersionType ip_version; __u32 pname[TASK_COMM_LEN / 4]; }; + bool not ; // A subrule flag (this is not a match_set flag). enum MatchType type; - bool not ; // A subrule flag (this is not a match_set flag). __u8 outbound; // User-defined value range is [0, 252]. }; struct { @@ -310,7 +316,8 @@ static __always_inline __u32 l4_checksum_off(__u8 proto, __u8 ihl) { static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, __u8 proto, __u8 ihl, __be32 old_ip[4], - __be32 new_ip[4], bool is_dest) { + __be32 new_ip[4], bool is_dest, + bool calc_l4_cksm) { // Nothing to do. if (equal_ipv6_format(old_ip, new_ip)) { return 0; @@ -327,14 +334,24 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, } if (ipversion == 4) { + __be32 _old_ip = old_ip[3]; __be32 _new_ip = new_ip[3]; + if (calc_l4_cksm) { - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip, - l4flags | sizeof(_new_ip)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; + int ret; + // __sum16 test; + // bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test)); + // bpf_printk("rewrite ip before: %x, %pI4->%pI4", test, &_old_ip, + // &_new_ip); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip, + l4flags | sizeof(_new_ip)))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; + } } + // bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test)); + // bpf_printk("rewrite ip after: %x", test); if ((ret = bpf_l3_csum_replace(skb, IPV4_CSUM_OFF, _old_ip, _new_ip, sizeof(_new_ip)))) { @@ -349,11 +366,14 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, return ret; } } else { - __s64 cksm = - bpf_csum_diff(new_ip, IPV6_BYTE_LENGTH, old_ip, IPV6_BYTE_LENGTH, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; + + if (calc_l4_cksm) { + __s64 cksm = + bpf_csum_diff(old_ip, IPV6_BYTE_LENGTH, new_ip, IPV6_BYTE_LENGTH, 0); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; + } } // bpf_printk("%pI6 -> %pI6", old_ip, new_ip); @@ -370,7 +390,8 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto, __u8 ihl, __be16 old_port, - __be16 new_port, bool is_dest) { + __be16 new_port, bool is_dest, + bool calc_l4_cksm) { // Nothing to do. if (old_port == new_port) { return 0; @@ -397,22 +418,34 @@ static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto, } l4flags |= BPF_F_MARK_MANGLED_0; break; + + default: + return -EINVAL; } // bpf_printk("%u -> %u", bpf_ntohs(old_port), bpf_ntohs(new_port)); int ret; - if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port, - l4flags | sizeof(new_port)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); + // __sum16 test; + // if (!bpf_skb_load_bytes(skb, cksm_off, &test, sizeof(test))) { + // bpf_printk("rewrite port before: %x, %u->%u", test, bpf_ntohs(old_port), + // bpf_ntohs(new_port)); + // } + if (calc_l4_cksm) { + if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port, + l4flags | sizeof(new_port)))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; + } + } + // if (!bpf_skb_load_bytes(skb, cksm_off, &test, sizeof(test))) { + // bpf_printk("rewrite port aftetr: %x", test); + // } + + if ((ret = bpf_skb_store_bytes(skb, port_off, &new_port, sizeof(new_port), + 0))) { return ret; } - ret = bpf_skb_store_bytes(skb, port_off, &new_port, sizeof(new_port), 0); - - if (ret) { - return ret; - } - return 0; } @@ -560,16 +593,12 @@ parse_transport(const struct __sk_buff *skb, struct ethhdr *ethh, return 1; } -static __always_inline int get_tproxy_ip(__u8 ipversion, __u32 ifindex, - __be32 tproxy_ip[4]) { - struct if_ip *if_ip = bpf_map_lookup_elem(&ifindex_tproxy_ip_map, &ifindex); - if (unlikely(!if_ip)) { - return -1; - } - if (ipversion == 4 && (*if_ip).hasIp4) { - __builtin_memcpy(tproxy_ip, (*if_ip).ip4, IPV6_BYTE_LENGTH); - } else if (ipversion == 6 && (*if_ip).hasIp6) { - __builtin_memcpy(tproxy_ip, (*if_ip).ip6, IPV6_BYTE_LENGTH); +static __always_inline int +get_tproxy_ip(__u8 ipversion, struct if_params *ifparams, __be32 tproxy_ip[4]) { + if (ipversion == 4 && (*ifparams).has_ip4) { + __builtin_memcpy(tproxy_ip, (*ifparams).ip4, IPV6_BYTE_LENGTH); + } else if (ipversion == 6 && (*ifparams).has_ip6) { + __builtin_memcpy(tproxy_ip, (*ifparams).ip6, IPV6_BYTE_LENGTH); } else { // Should TC_ACT_OK outer. return -EFAULT; @@ -577,11 +606,12 @@ static __always_inline int get_tproxy_ip(__u8 ipversion, __u32 ifindex, return 0; } -static __always_inline int ip_is_host(__u8 ipversion, __u32 ifindex, +static __always_inline int ip_is_host(__u8 ipversion, + struct if_params *ifparams, const __be32 ip[4], __be32 tproxy_ip[4]) { if (tproxy_ip) { int ret; - if ((ret = get_tproxy_ip(ipversion, ifindex, tproxy_ip))) { + if ((ret = get_tproxy_ip(ipversion, ifparams, tproxy_ip))) { return ret; } } @@ -593,7 +623,8 @@ static __always_inline int ip_is_host(__u8 ipversion, __u32 ifindex, } static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen, - __u32 ihl, __u16 len_diff) { + __u32 ihl, __u16 len_diff, + bool calc_l4_cksm) { if (unlikely(!len_diff)) { return 0; } @@ -614,20 +645,21 @@ static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen, // Calculate checksum and store the new value. int ret; - - __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); - // replace twice because len exists both pseudo hdr and hdr. - if ((ret = bpf_l4_csum_replace( - skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; - } - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; + if (calc_l4_cksm) { + __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); + // replace twice because len exists both pseudo hdr and hdr. + if ((ret = bpf_l4_csum_replace( + skb, udp_csum_off, oldlen, newlen, + sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); + return ret; + } + if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen, + sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); + return ret; + } } if ((ret = bpf_skb_store_bytes( skb, (__u32)ETH_HLEN + ihl * 4 + offsetof(struct udphdr, len), @@ -677,7 +709,8 @@ static __always_inline int adjust_ipv4_len(struct __sk_buff *skb, __u16 oldlen, static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, __u8 ipversion, __u8 ihl, __be16 iphdr_tot_len, - void *newhdr, __u32 newhdrlen) { + void *newhdr, __u32 newhdrlen, + bool calc_l4_cksm) { if (unlikely(newhdrlen % 4 != 0)) { bpf_printk("encap_after_udp_hdr: unexpected newhdrlen value %u :must " "be a multiple of 4", @@ -699,7 +732,9 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, return ret; } // Add room for new udp payload header. - if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET, 0))) { + if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET, + calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET + : 0))) { bpf_printk("UDP ADJUST ROOM(encap): %d", ret); return ret; } @@ -719,18 +754,21 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, } // Rewrite udp len. - if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen))) { + if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen, + calc_l4_cksm))) { bpf_printk("adjust_udp_len: %d", ret); return ret; } // Rewrite udp payload. - __u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; + if (calc_l4_cksm) { + __u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl); + __s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace 2: %d", ret); + return ret; + } } if ((ret = bpf_skb_store_bytes(skb, udp_payload_off, newhdr, newhdrlen, 0))) { bpf_printk("bpf_skb_store_bytes 2: %d", ret); @@ -742,7 +780,8 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, __u8 ipversion, __u8 ihl, __be16 ipv4hdr_tot_len, void *to, - __u32 decap_hdrlen) { + __u32 decap_hdrlen, + bool calc_l4_cksm) { if (unlikely(decap_hdrlen % 4 != 0)) { bpf_printk("encap_after_udp_hdr: unexpected decap_hdrlen value %u :must " "be a multiple of 4", @@ -784,7 +823,9 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, } // Adjust room to decap the header. - if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET, 0))) { + if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET, + calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET + : 0))) { bpf_printk("UDP ADJUST ROOM(decap): %d", ret); return ret; } @@ -798,18 +839,21 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, } // Rewrite udp len. - if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen))) { + if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen, + calc_l4_cksm))) { bpf_printk("adjust_udp_len: %d", ret); return ret; } // Rewrite udp checksum. - __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0); - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; + if (calc_l4_cksm) { + __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); + __s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0); + if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm, + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace 2: %d", ret); + return ret; + } } return 0; } @@ -961,7 +1005,7 @@ routing(const __u32 flag[6], const void *l4_hdr, const __be32 saddr[4], "outbound: %u", match_set->type, match_set->not, match_set->outbound); #endif - if (*p_u32 & match_set->__value) { + if (*p_u32 & *(__u32 *)&match_set->__value) { good_subrule = true; } } else if (match_set->type == MatchType_DomainSet) { @@ -1056,7 +1100,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { struct ipv6hdr ipv6h; struct tcphdr tcph; struct udphdr udph; - __sum16 bak_cksm = 0; + // __sum16 bak_cksm = 0; __u8 ihl; __u8 ipversion; __u8 l4proto; @@ -1091,9 +1135,18 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { __builtin_memcpy(saddr, &ipv6h.saddr, IPV6_BYTE_LENGTH); } + __u32 ifindex = skb->ifindex; + struct if_params *ifparams = + bpf_map_lookup_elem(&ifindex_params_map, &ifindex); + if (unlikely(!ifparams)) { + return -1; + } + // Never disable checksum in rx. + bool disable_checksum = false; + // If this packet is sent to this host and not a DNS packet, accept it. __u32 tproxy_ip[4]; - int to_host = ip_is_host(ipversion, skb->ifindex, daddr, tproxy_ip); + int to_host = ip_is_host(ipversion, ifparams, daddr, tproxy_ip); if (to_host < 0) { // error // bpf_printk("to_host: %ld", to_host); return TC_ACT_OK; @@ -1114,7 +1167,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { if (l4proto == IPPROTO_TCP) { // Backup for further use. - bak_cksm = tcph.check; + // bak_cksm = tcph.check; tcp_state_syn = tcph.syn && !tcph.ack; struct ip_port key_src; __builtin_memset(&key_src, 0, sizeof(key_src)); @@ -1178,19 +1231,19 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { __u32 *dst_ip = daddr; __u16 dst_port = tcph.dest; if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, dst_ip, tproxy_ip, - true))) { + true, !disable_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, dst_port, *tproxy_port, - true))) { + true, !disable_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } } } else if (l4proto == IPPROTO_UDP) { // Backup for further use. - bak_cksm = udph.check; + // bak_cksm = udph.check; struct ip_port_outbound new_hdr; __builtin_memset(&new_hdr, 0, sizeof(new_hdr)); __builtin_memcpy(new_hdr.ip, daddr, IPV6_BYTE_LENGTH); @@ -1229,22 +1282,23 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { // Rewrite to control plane. // Encap a header to transmit fullcone tuple. - if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, - &new_hdr, sizeof(new_hdr)))) { + if ((ret = + encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &new_hdr, + sizeof(new_hdr), !disable_checksum))) { return TC_ACT_SHOT; } // Rewrite udp dst ip. // bpf_printk("rewrite dst ip from %pI4", &ori_dst.ip); if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, new_hdr.ip, - tproxy_ip, true))) { + tproxy_ip, true, !disable_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } // Rewrite udp dst port. if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, new_hdr.port, *tproxy_port, - true))) { + true, !disable_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1258,19 +1312,14 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { // bpf_skb_load_bytes(skb, i, &t, 1); // bpf_printk("%02x", t); // } - __u8 *disable_l4_checksum = - bpf_map_lookup_elem(¶m_map, &disable_l4_rx_checksum_key); - if (!disable_l4_checksum) { - bpf_printk("Forgot to set disable_l4_checksum?"); - return TC_ACT_SHOT; - } - if (*disable_l4_checksum) { + + // Disable checksum. + if (disable_checksum) { + // Set checksum zero. __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); - // Restore the checksum or set it zero. - if (*disable_l4_checksum == DisableL4ChecksumPolicy_SetZero) { - bak_cksm = 0; - } + __sum16 bak_cksm = 0; bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); + bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET); } return TC_ACT_OK; } @@ -1353,6 +1402,7 @@ int tproxy_lan_egress(struct __sk_buff *skb) { return TC_ACT_OK; } + // bpf_printk("ipsummed: %d", bpf_get_ipsummed(skb)); // Parse saddr and daddr as ipv6 format. __be32 saddr[4]; __be32 daddr[4]; @@ -1382,18 +1432,28 @@ int tproxy_lan_egress(struct __sk_buff *skb) { return TC_ACT_OK; } + __u32 ifindex = skb->ifindex; + struct if_params *ifparams = + bpf_map_lookup_elem(&ifindex_params_map, &ifindex); + if (unlikely(!ifparams)) { + return -1; + } + + bool disable_checksum = ipversion == 4 ? ifparams->tx_l4_cksm_ip4_offload + : ifparams->tx_l4_cksm_ip6_offload; + // If not from tproxy, accept it. __be16 *tproxy_port = bpf_map_lookup_elem(¶m_map, &tproxy_port_key); if (!tproxy_port || *tproxy_port != sport) { return TC_ACT_OK; } __be32 tproxy_ip[4]; - ret = ip_is_host(ipversion, skb->ifindex, saddr, tproxy_ip); + ret = ip_is_host(ipversion, ifparams, saddr, tproxy_ip); if (!(ret == 1) || !equal_ipv6_format(saddr, tproxy_ip)) { return TC_ACT_OK; } - __sum16 bak_cksm = 0; + // __sum16 bak_cksm = 0; if (l4proto == IPPROTO_TCP) { @@ -1412,24 +1472,24 @@ int tproxy_lan_egress(struct __sk_buff *skb) { } // Backup for further use. - bak_cksm = tcph.check; + // bak_cksm = tcph.check; __u32 *src_ip = saddr; __u16 src_port = tcph.source; - if (rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, src_ip, original_dst->ip, - false) < 0) { + if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, src_ip, + original_dst->ip, false, !disable_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } - if (rewrite_port(skb, IPPROTO_TCP, ihl, src_port, original_dst->port, - false) < 0) { + if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, src_port, original_dst->port, + false, !disable_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } } else if (l4proto == IPPROTO_UDP) { // Backup for further use. - bak_cksm = udph.check; + // bak_cksm = udph.check; __u32 *src_ip = saddr; __u16 src_port = udph.source; /// NOTICE: Actually, we do not need symmetrical headers in client and @@ -1441,20 +1501,20 @@ int tproxy_lan_egress(struct __sk_buff *skb) { // Decap header to get fullcone tuple. if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src, - sizeof(ori_src)))) { + sizeof(ori_src), !disable_checksum))) { return TC_ACT_SHOT; } // Rewrite udp src ip if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, src_ip, ori_src.ip, - false))) { + false, !disable_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } // Rewrite udp src port if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, src_port, ori_src.port, - false))) { + false, !disable_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1470,19 +1530,12 @@ int tproxy_lan_egress(struct __sk_buff *skb) { // } } - __u8 *disable_l4_checksum = - bpf_map_lookup_elem(¶m_map, &disable_l4_tx_checksum_key); - if (!disable_l4_checksum) { - bpf_printk("Forgot to set disable_l4_checksum?"); - return TC_ACT_SHOT; - } - if (*disable_l4_checksum) { + if (disable_checksum) { __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); - // Restore the checksum or set it zero. - if (*disable_l4_checksum == DisableL4ChecksumPolicy_SetZero) { - bak_cksm = 0; - } + // Set checksum zero to pass. + __sum16 bak_cksm = 0; bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); + bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET); } return TC_ACT_OK; } @@ -1490,6 +1543,24 @@ int tproxy_lan_egress(struct __sk_buff *skb) { __u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0}; __u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1}; +static __always_inline bool wan_disable_checksum(const __u32 ifindex, + const __u8 ipversion) { + + struct if_params *ifparams = + bpf_map_lookup_elem(&ifindex_params_map, &ifindex); + if (unlikely(!ifparams)) { + return -1; + } + bool tx_offloaded = (ipversion == 4 && ifparams->tx_l4_cksm_ip4_offload) || + (ipversion == 6 && ifparams->tx_l4_cksm_ip6_offload); + // If tx offloaded, we get bad checksum of packets because we redirect packet + // before the NIC processing. So we have no choice but disable l4 checksum. + + bool disable_l4_checksum = tx_offloaded; + + return disable_l4_checksum; +} + // Routing and redirect the packet back. // We cannot modify the dest address here. So we cooperate with wan_ingress. SEC("tc/wan_egress") @@ -1572,13 +1643,6 @@ int tproxy_wan_egress(struct __sk_buff *skb) { sizeof(ethh.h_source), 0))) { return TC_ACT_SHOT; }; - - // Redirect. - if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) { - bpf_printk("Shot bpf_redirect: %d", ret); - return TC_ACT_SHOT; - } - return TC_ACT_REDIRECT; } else { // Normal packets. @@ -1644,38 +1708,31 @@ int tproxy_wan_egress(struct __sk_buff *skb) { return TC_ACT_OK; } else if (unlikely(outbound == OUTBOUND_BLOCK)) { return TC_ACT_SHOT; - } else { - // Rewrite to control plane. - - if (unlikely(tcp_state_syn)) { - struct ip_port_outbound value_dst; - __builtin_memset(&value_dst, 0, sizeof(value_dst)); - __builtin_memcpy(value_dst.ip, daddr, IPV6_BYTE_LENGTH); - value_dst.port = tcph.dest; - value_dst.outbound = outbound; - // bpf_printk("UPDATE: %pI6:%u", key_src.ip, bpf_ntohs(key_src.port)); - bpf_map_update_elem(&tcp_dst_map, &key_src, &value_dst, BPF_ANY); - } - - // Write mac. - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - ethh.h_source, sizeof(ethh.h_source), - 0))) { - return TC_ACT_SHOT; - } - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), - special_mac_to_tproxy, - sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - }; - - // Redirect. - if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) { - bpf_printk("Shot bpf_redirect: %d", ret); - return TC_ACT_SHOT; - } - return TC_ACT_REDIRECT; } + // Rewrite to control plane. + + if (unlikely(tcp_state_syn)) { + struct ip_port_outbound value_dst; + __builtin_memset(&value_dst, 0, sizeof(value_dst)); + __builtin_memcpy(value_dst.ip, daddr, IPV6_BYTE_LENGTH); + value_dst.port = tcph.dest; + value_dst.outbound = outbound; + // bpf_printk("UPDATE: %pI6:%u", key_src.ip, bpf_ntohs(key_src.port)); + bpf_map_update_elem(&tcp_dst_map, &key_src, &value_dst, BPF_ANY); + } + + // Write mac. + if ((ret = + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), + ethh.h_source, sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + } + if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), + special_mac_to_tproxy, + sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + }; + } else if (l4proto == IPPROTO_UDP) { // Backup for further use. struct ip_port_outbound new_hdr; @@ -1719,38 +1776,49 @@ int tproxy_wan_egress(struct __sk_buff *skb) { return TC_ACT_OK; } else if (unlikely(new_hdr.outbound == OUTBOUND_BLOCK)) { return TC_ACT_SHOT; - } else { - // Rewrite to control plane. + } - // Write mac. - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - ethh.h_source, sizeof(ethh.h_source), - 0))) { - return TC_ACT_SHOT; - } - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), - special_mac_to_tproxy, - sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - }; + // Rewrite to control plane. - // Encap a header to transmit fullcone tuple. - if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, - &new_hdr, sizeof(new_hdr)))) { - return TC_ACT_SHOT; - } + // Write mac. + if ((ret = + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), + ethh.h_source, sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + } + if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), + special_mac_to_tproxy, + sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + }; - // Redirect from egress to ingress. - if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) { - bpf_printk("Shot bpf_redirect: %d", ret); - return TC_ACT_SHOT; - } - return TC_ACT_REDIRECT; + bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion); + // Encap a header to transmit fullcone tuple. + if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, + &new_hdr, sizeof(new_hdr), + // It is a part of ingress link. + !disable_l4_checksum))) { + return TC_ACT_SHOT; } } } - return TC_ACT_OK; + // // Print packet in hex for debugging (checksum or something else). + // if ((l4proto == IPPROTO_TCP ? tcph.dest : udph.dest) == bpf_htons(8443)) { + // bpf_printk("PRINT OUTPUT PACKET"); + // for (__u32 i = 0; i < skb->len && i < 500; i++) { + // __u8 t = 0; + // bpf_skb_load_bytes(skb, i, &t, 1); + // bpf_printk("%02x", t); + // } + // } + + // Redirect from egress to ingress. + if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) { + bpf_printk("Shot bpf_redirect: %d", ret); + return TC_ACT_SHOT; + } + return TC_ACT_REDIRECT; } SEC("tc/wan_ingress") @@ -1810,6 +1878,17 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { return TC_ACT_OK; } + bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion); + + // // Print packet in hex for debugging (checksum or something else). + // if (dport == bpf_htons(8443)) { + // bpf_printk("PRINT BEFORE PACKET"); + // for (__u32 i = 0; i < skb->len && i < 500; i++) { + // __u8 t = 0; + // bpf_skb_load_bytes(skb, i, &t, 1); + // bpf_printk("%02x", t); + // } + // } if (tproxy_response) { // Send the tproxy response packet to origin. @@ -1841,13 +1920,13 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { } // Rewrite sip and sport. - if (rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, saddr, original_dst->ip, - false) < 0) { + if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, saddr, + original_dst->ip, false, !disable_l4_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } - if (rewrite_port(skb, IPPROTO_TCP, ihl, sport, original_dst->port, - false) < 0) { + if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, sport, original_dst->port, + false, !disable_l4_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1861,21 +1940,22 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // Get source ip/port from our packet header. // Decap header to get fullcone tuple. - if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, - &ori_src, sizeof(ori_src)))) { + if ((ret = + decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src, + sizeof(ori_src), !disable_l4_checksum))) { return TC_ACT_SHOT; } // Rewrite udp src ip if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, saddr, ori_src.ip, - false))) { + false, !disable_l4_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } // Rewrite udp src port - if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port, - false))) { + if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port, false, + !disable_l4_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1891,7 +1971,8 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // } } // Rewrite dip to host ip. - if (rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true) < 0) { + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true, + !disable_l4_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } @@ -1909,29 +1990,45 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // bpf_printk("should send to: %pI6:%u", tproxy_ip, // bpf_ntohs(*tproxy_port)); - if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip, - true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - // (1) Use daddr as saddr to pass NIC verification. Notice that we do not - // modify the so tproxy will send packet to it. - if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false))) { + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip, true, + !disable_l4_checksum))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } - // Rewrite udp dst port. - if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true))) { + // Rewrite dst port. + if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true, + !disable_l4_checksum))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } + + // (1) Use daddr as saddr to pass NIC verification. Notice that we do not + // modify the so tproxy will send packet to it. + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false, + !disable_l4_checksum))) { + bpf_printk("Shot IP: %d", ret); + return TC_ACT_SHOT; + } + } + + // // Print packet in hex for debugging (checksum or something else). + // if (dport == bpf_htons(8443)) { + // bpf_printk("PRINT AFTER PACKET"); + // for (__u32 i = 0; i < skb->len && i < 500; i++) { + // __u8 t = 0; + // bpf_skb_load_bytes(skb, i, &t, 1); + // bpf_printk("%02x", t); + // } + // } + if (disable_l4_checksum) { + __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); + // Set checksum zero. + __sum16 bak_cksm = 0; + bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); + bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET); } - __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); - // Restore the checksum or set it zero. - __sum16 bak_cksm = 0; - bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); return TC_ACT_OK; } diff --git a/component/control/routing_matcher_builder.go b/component/control/routing_matcher_builder.go index 4bb8d49..5196cc7 100644 --- a/component/control/routing_matcher_builder.go +++ b/component/control/routing_matcher_builder.go @@ -27,7 +27,7 @@ type RoutingMatcherBuilder struct { *routing.DefaultMatcherBuilder outboundName2Id map[string]uint8 bpf *bpfObjects - rules []_bpfMatchSet + rules []bpfMatchSet SimulatedLpmTries [][]netip.Prefix SimulatedDomainSet []DomainSet Final string @@ -74,7 +74,7 @@ func (b *RoutingMatcherBuilder) AddDomain(f *config_parser.Function, key string, RuleIndex: len(b.rules), Domains: values, }) - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Type: uint8(consts.MatchType_DomainSet), Not: f.Not, Outbound: b.OutboundToId(outbound), @@ -94,7 +94,7 @@ func (b *RoutingMatcherBuilder) AddSourceMac(f *config_parser.Function, macAddrs } lpmTrieIndex := len(b.SimulatedLpmTries) b.SimulatedLpmTries = append(b.SimulatedLpmTries, values) - set := _bpfMatchSet{ + set := bpfMatchSet{ Value: [16]byte{}, Type: uint8(consts.MatchType_Mac), Not: f.Not, @@ -111,7 +111,7 @@ func (b *RoutingMatcherBuilder) AddIp(f *config_parser.Function, values []netip. } lpmTrieIndex := len(b.SimulatedLpmTries) b.SimulatedLpmTries = append(b.SimulatedLpmTries, values) - set := _bpfMatchSet{ + set := bpfMatchSet{ Value: [16]byte{}, Type: uint8(consts.MatchType_IpSet), Not: f.Not, @@ -127,7 +127,7 @@ func (b *RoutingMatcherBuilder) AddPort(f *config_parser.Function, values [][2]u if i == len(values)-1 { outbound = _outbound } - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Type: uint8(consts.MatchType_Port), Value: _bpfPortRange{ PortStart: value[0], @@ -145,7 +145,7 @@ func (b *RoutingMatcherBuilder) AddSourceIp(f *config_parser.Function, values [] } lpmTrieIndex := len(b.SimulatedLpmTries) b.SimulatedLpmTries = append(b.SimulatedLpmTries, values) - set := _bpfMatchSet{ + set := bpfMatchSet{ Value: [16]byte{}, Type: uint8(consts.MatchType_SourceIpSet), Not: f.Not, @@ -161,7 +161,7 @@ func (b *RoutingMatcherBuilder) AddSourcePort(f *config_parser.Function, values if i == len(values)-1 { outbound = _outbound } - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Type: uint8(consts.MatchType_SourcePort), Value: _bpfPortRange{ PortStart: value[0], @@ -177,7 +177,7 @@ func (b *RoutingMatcherBuilder) AddL4Proto(f *config_parser.Function, values con if b.err != nil { return } - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Value: [16]byte{byte(values)}, Type: uint8(consts.MatchType_L4Proto), Not: f.Not, @@ -189,7 +189,7 @@ func (b *RoutingMatcherBuilder) AddIpVersion(f *config_parser.Function, values c if b.err != nil { return } - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Value: [16]byte{byte(values)}, Type: uint8(consts.MatchType_IpVersion), Not: f.Not, @@ -203,7 +203,7 @@ func (b *RoutingMatcherBuilder) AddProcessName(f *config_parser.Function, values if i == len(values)-1 { outbound = _outbound } - matchSet := _bpfMatchSet{ + matchSet := bpfMatchSet{ Type: uint8(consts.MatchType_ProcessName), Not: f.Not, Outbound: b.OutboundToId(outbound), @@ -218,7 +218,7 @@ func (b *RoutingMatcherBuilder) AddFinal(outbound string) { return } b.Final = outbound - b.rules = append(b.rules, _bpfMatchSet{ + b.rules = append(b.rules, bpfMatchSet{ Type: uint8(consts.MatchType_Final), Outbound: b.OutboundToId(outbound), }) diff --git a/go.mod b/go.mod index a415c82..c003efe 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mzz2017/disk-bloom v1.0.1 // indirect + github.com/safchain/ethtool v0.2.0 // indirect github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.8.1 // indirect diff --git a/go.sum b/go.sum index 47f9bda..084d72c 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/safchain/ethtool v0.2.0 h1:dILxMBqDnQfX192cCAPjZr9v2IgVXeElHPy435Z/IdE= +github.com/safchain/ethtool v0.2.0/go.mod h1:WkKB1DnNtvsMlDmQ50sgwowDJV/hGbJSOvJoEXs1AJQ= github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb h1:XfLJSPIOUX+osiMraVgIrMR27uMXnRJWGm1+GL8/63U= github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb/go.mod h1:bR6DqgcAl1zTcOX8/pE2Qkj9XO00eCNqmKb7lXP8EAg= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= @@ -106,6 +108,7 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201202213521-69691e467435/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=