diff --git a/component/control/control_plane.go b/component/control/control_plane.go index 39e4070..906a0e8 100644 --- a/component/control/control_plane.go +++ b/component/control/control_plane.go @@ -79,9 +79,9 @@ func NewControlPlane( kernelVersion.String(), consts.CgSocketCookieFeatureVersion.String()) } - if len(lanInterface) > 0 && c.kernelVersion.Less(consts.SkAssignFeatureVersion) { + if len(lanInterface) > 0 && kernelVersion.Less(consts.SkAssignFeatureVersion) { return nil, fmt.Errorf("your kernel version %v does not support bind to LAN; expect >=%v; remove lan_interface in config file and try again", - c.kernelVersion.String(), + kernelVersion.String(), consts.SkAssignFeatureVersion.String()) } if kernelVersion.Less(consts.BasicFeatureVersion) { diff --git a/component/control/control_plane_core.go b/component/control/control_plane_core.go index cda5237..dd375a1 100644 --- a/component/control/control_plane_core.go +++ b/component/control/control_plane_core.go @@ -203,21 +203,6 @@ func (c *ControlPlaneCore) BindLan(ifname string) error { if err := netlink.FilterAdd(filterIngress); err != nil { return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err) } - filterEgress := &netlink.BpfFilter{ - FilterAttrs: netlink.FilterAttrs{ - LinkIndex: link.Attrs().Index, - Parent: netlink.HANDLE_MIN_EGRESS, - Handle: netlink.MakeHandle(0, 1), - Protocol: unix.ETH_P_ALL, - Priority: 0, - }, - Fd: c.bpf.bpfPrograms.TproxyLanEgress.FD(), - Name: consts.AppName + "_egress", - DirectAction: true, - } - if err := netlink.FilterAdd(filterEgress); err != nil { - return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err) - } return nil } diff --git a/component/control/kern/tproxy.c b/component/control/kern/tproxy.c index c4890b3..798bce3 100644 --- a/component/control/kern/tproxy.c +++ b/component/control/kern/tproxy.c @@ -661,35 +661,6 @@ parse_transport(const struct __sk_buff *skb, struct ethhdr *ethh, } } -static __always_inline int -get_tproxy_ip(__u8 ipversion, struct if_params *ifparams, __be32 tproxy_ip[4]) { - if (ipversion == 4 && (*ifparams).has_ip4) { - __builtin_memcpy(tproxy_ip, (*ifparams).ip4, IPV6_BYTE_LENGTH); - } else if (ipversion == 6 && (*ifparams).has_ip6) { - __builtin_memcpy(tproxy_ip, (*ifparams).ip6, IPV6_BYTE_LENGTH); - } else { - // Should TC_ACT_OK outer. - return -EFAULT; - } - return 0; -} - -static __always_inline int ip_is_host(__u8 ipversion, - struct if_params *ifparams, - const __be32 ip[4], __be32 tproxy_ip[4]) { - if (tproxy_ip) { - int ret; - if ((ret = get_tproxy_ip(ipversion, ifparams, tproxy_ip))) { - return ret; - } - } - - struct lpm_key lpm_key; - lpm_key.trie_key.prefixlen = IPV6_BYTE_LENGTH * 8; - __builtin_memcpy(lpm_key.data, ip, IPV6_BYTE_LENGTH); - return bpf_map_lookup_elem(&host_ip_lpm, &lpm_key) ? 1 : 0; -} - static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen, __u32 ihl, __u16 len_diff, bool calc_l4_cksm) { @@ -1414,172 +1385,6 @@ static __always_inline bool pid_is_control_plane(struct __sk_buff *skb, } } -/** - FIXME: We can do packet modification as early as possible (for example, at - lwt point) to avoid weird checksum offload problems by docker, etc. They do - not obey the checksum specification. At present, we specially judge docker - interfaces and disable checksum for them. - - References: - https://github.com/torvalds/linux/blob/v6.1/samples/bpf/test_lwt_bpf.sh - https://blog.csdn.net/Rong_Toa/article/details/109392163 -*/ -// Do SNAT. -SEC("tc/egress") -int tproxy_lan_egress(struct __sk_buff *skb) { - struct ethhdr ethh; - struct iphdr iph; - struct ipv6hdr ipv6h; - struct tcphdr tcph; - struct udphdr udph; - __u8 ipversion; - __u8 l4proto; - __u8 ihl; - int ret = parse_transport(skb, ðh, &iph, &ipv6h, &tcph, &udph, &ihl, - &ipversion, &l4proto); - if (ret) { - return TC_ACT_OK; - } - - // bpf_printk("ipsummed: %d", bpf_get_ipsummed(skb)); - // Parse saddr and daddr as ipv6 format. - __be32 saddr[4]; - __be32 daddr[4]; - __be16 ipv4_tot_len = 0; - if (ipversion == 4) { - saddr[0] = 0; - saddr[1] = 0; - saddr[2] = bpf_htonl(0x0000ffff); - saddr[3] = iph.saddr; - - daddr[0] = 0; - daddr[1] = 0; - daddr[2] = bpf_htonl(0x0000ffff); - daddr[3] = iph.daddr; - - ipv4_tot_len = iph.tot_len; - } else { - __builtin_memcpy(daddr, &ipv6h.daddr, IPV6_BYTE_LENGTH); - __builtin_memcpy(saddr, &ipv6h.saddr, IPV6_BYTE_LENGTH); - } - __be16 sport; - if (l4proto == IPPROTO_TCP) { - sport = tcph.source; - } else if (l4proto == IPPROTO_UDP) { - sport = udph.source; - } else { - return TC_ACT_OK; - } - - __u32 ifindex = skb->ifindex; - struct if_params *ifparams = - bpf_map_lookup_elem(&ifindex_params_map, &ifindex); - if (unlikely(!ifparams)) { - bpf_printk("no ifparams found for ifindex %u", ifindex); - return TC_ACT_OK; - } - - bool disable_checksum = ipversion == 4 ? ifparams->tx_l4_cksm_ip4_offload - : ifparams->tx_l4_cksm_ip6_offload; - - // If not from tproxy, accept it. - __be16 *tproxy_port = bpf_map_lookup_elem(¶m_map, &tproxy_port_key); - if (!tproxy_port || *tproxy_port != sport) { - return TC_ACT_OK; - } - __be32 tproxy_ip[4]; - ret = ip_is_host(ipversion, ifparams, saddr, tproxy_ip); - if (!(ret == 1) || !equal16(saddr, tproxy_ip)) { - return TC_ACT_OK; - } - - // __sum16 bak_cksm = 0; - - if (l4proto == IPPROTO_TCP) { - - // Lookup original dest. - struct ip_port key_dst; - __builtin_memset(&key_dst, 0, sizeof(key_dst)); - __builtin_memcpy(key_dst.ip, daddr, IPV6_BYTE_LENGTH); - key_dst.port = tcph.dest; - struct ip_port_outbound *original_dst = - bpf_map_lookup_elem(&tcp_dst_map, &key_dst); - if (!original_dst) { - bpf_printk("[%X]Bad Connection: to: %pI6:%u", bpf_ntohl(tcph.seq), - key_dst.ip, bpf_ntohs(key_dst.port)); - // Do not impact previous connections. - return TC_ACT_SHOT; - } - - // Backup for further use. - // bak_cksm = tcph.check; - - __u32 *src_ip = saddr; - __u16 src_port = tcph.source; - if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, src_ip, - original_dst->ip, false, !disable_checksum))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, src_port, original_dst->port, - false, !disable_checksum))) { - bpf_printk("Shot Port: %d", ret); - return TC_ACT_SHOT; - } - } else if (l4proto == IPPROTO_UDP) { - - // Backup for further use. - // bak_cksm = udph.check; - __u32 *src_ip = saddr; - __u16 src_port = udph.source; - /// NOTICE: Actually, we do not need symmetrical headers in client and - /// server. We use it for convinience. This behavior may change in the - /// future. Outbound here is useless and redundant. - struct ip_port_outbound ori_src; - - // Get source ip/port from our packet header. - - // Decap header to get fullcone tuple. - if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src, - sizeof(ori_src), !disable_checksum))) { - return TC_ACT_SHOT; - } - - // Rewrite udp src ip - if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, src_ip, ori_src.ip, - false, !disable_checksum))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - - // Rewrite udp src port - if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, src_port, ori_src.port, - false, !disable_checksum))) { - bpf_printk("Shot Port: %d", ret); - return TC_ACT_SHOT; - } - - // bpf_printk("real from: %pI4:%u", &ori_src.ip, bpf_ntohs(ori_src.port)); - - // Print packet in hex for debugging (checksum or something else). - // bpf_printk("UDP EGRESS OK"); - // for (__u32 i = 0; i < skb->len && i < 1500; i++) { - // __u8 t = 0; - // bpf_skb_load_bytes(skb, i, &t, 1); - // bpf_printk("%02x", t); - // } - } - - if (disable_checksum) { - __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); - // Set checksum zero to pass. - __sum16 bak_cksm = 0; - bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); - bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET); - } - return TC_ACT_OK; -} - __u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0}; __u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1};