From 4145a795590b98a6d502c0a3a998210b4c14e807 Mon Sep 17 00:00:00 2001 From: mzz <30586220+mzz2017@users.noreply.github.com> Date: Sat, 11 Feb 2023 13:34:12 +0800 Subject: [PATCH] feat: add lan_snat_direct option for non-transparent-bridge user (#14) --- README.md | 1 - common/consts/ebpf.go | 3 +- common/utils.go | 7 +- config/config.go | 3 +- control/control_plane.go | 14 +- control/kern/tproxy.c | 336 ++++++++++++++------------------------- example.dae | 19 ++- insert.sh | 9 +- 8 files changed, 151 insertions(+), 241 deletions(-) diff --git a/README.md b/README.md index ffb6915..9a2c756 100644 --- a/README.md +++ b/README.md @@ -121,5 +121,4 @@ See [example.dae](https://github.com/v2rayA/dae/blob/main/example.dae). But the problem is, after the Linux network stack, before entering the network card, we modify the source IP of this packet, causing the Linux network stack to only make a simple checksum, and the NIC also assumes that this packet is not sent from local, so no further checksum completing. 1. MACv2 extension extraction. 1. Log to userspace. -1. Should not do connectivity check for fixed group nodes. 1. ... diff --git a/common/consts/ebpf.go b/common/consts/ebpf.go index 4904eae..ed89d47 100644 --- a/common/consts/ebpf.go +++ b/common/consts/ebpf.go @@ -23,7 +23,8 @@ const ( BigEndianTproxyPortKey DisableL4TxChecksumKey DisableL4RxChecksumKey - ControlPlaneOidKey + ControlPlanePidKey + ControlPlaneNatDirectKey OneKey ParamKey = 1 ) diff --git a/common/utils.go b/common/utils.go index 74c1155..cb9c643 100644 --- a/common/utils.go +++ b/common/utils.go @@ -273,11 +273,12 @@ func FuzzyDecode(to interface{}, val string) bool { } v.SetUint(i) case reflect.Bool: - if val == "true" || val == "1" { + switch strings.ToLower(val) { + case "true", "1", "y", "yes": v.SetBool(true) - } else if val == "false" || val == "0" { + case "false", "0", "n", "no": v.SetBool(false) - } else { + default: return false } case reflect.String: diff --git a/config/config.go b/config/config.go index 1baff44..b6ca01c 100644 --- a/config/config.go +++ b/config/config.go @@ -20,8 +20,9 @@ type Global struct { UdpCheckDns string `mapstructure:"udp_check_dns" default:"cloudflare-dns.com:53"` CheckInterval time.Duration `mapstructure:"check_interval" default:"30s"` CheckTolerance time.Duration `mapstructure:"check_tolerance" default:"0"` - DnsUpstream common.UrlOrEmpty `mapstructure:"dns_upstream" require:""` + DnsUpstream common.UrlOrEmpty `mapstructure:"dns_upstream" required:""` LanInterface []string `mapstructure:"lan_interface"` + LanNatDirect bool `mapstructure:"lan_nat_direct" required:""` WanInterface []string `mapstructure:"wan_interface"` } diff --git a/control/control_plane.go b/control/control_plane.go index a43e802..cf67164 100644 --- a/control/control_plane.go +++ b/control/control_plane.go @@ -135,14 +135,16 @@ func NewControlPlane( }() // Write params. - if err = bpf.ParamMap.Update(consts.DisableL4TxChecksumKey, consts.DisableL4ChecksumPolicy_SetZero, ebpf.UpdateAny); err != nil { + var lanNatDirect uint32 + if global.LanNatDirect { + lanNatDirect = 1 + } else { + lanNatDirect = 0 + } + if err = bpf.ParamMap.Update(consts.ControlPlaneNatDirectKey, lanNatDirect, ebpf.UpdateAny); err != nil { return nil, err } - if err = bpf.ParamMap.Update(consts.DisableL4RxChecksumKey, consts.DisableL4ChecksumPolicy_SetZero, ebpf.UpdateAny); err != nil { - return nil, err - } - // Write tproxy (control plane) PID. - if err = bpf.ParamMap.Update(consts.ControlPlaneOidKey, uint32(os.Getpid()), ebpf.UpdateAny); err != nil { + if err = bpf.ParamMap.Update(consts.ControlPlanePidKey, uint32(os.Getpid()), ebpf.UpdateAny); err != nil { return nil, err } // Write ip_proto to hdr_size mapping for IPv6 extension extraction (it is just for eBPF code insns optimization). diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index a057ef5..5dcc35b 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -47,7 +47,6 @@ #define MAX_INTERFACE_NUM 128 #define MAX_MATCH_SET_LEN (32 * 3) #define MAX_LPM_SIZE 20480 -//#define MAX_LPM_SIZE 20480 #define MAX_LPM_NUM (MAX_MATCH_SET_LEN + 8) #define MAX_DST_MAPPING_NUM (65536 * 2) #define MAX_SRC_PID_PNAME_MAPPING_NUM (65536) @@ -83,6 +82,7 @@ static const __u32 disable_l4_tx_checksum_key static const __u32 disable_l4_rx_checksum_key __attribute__((unused, deprecated)) = 3; static const __u32 control_plane_pid_key = 4; +static const __u32 control_plane_nat_direct_key = 5; // Outbound Connectivity Map: @@ -341,6 +341,34 @@ struct { // Functions: +static void __always_inline get_tuples(struct tuples *tuples, + const struct iphdr *iph, + const struct ipv6hdr *ipv6h, + const struct tcphdr *tcph, + const struct udphdr *udph, + __u8 ipversion, __u8 l4proto) { + __builtin_memset(tuples, 0, sizeof(*tuples)); + tuples->l4proto = l4proto; + if (ipversion == 4) { + tuples->src.ip[2] = bpf_htonl(0x0000ffff); + tuples->src.ip[3] = iph->saddr; + + tuples->dst.ip[2] = bpf_htonl(0x0000ffff); + tuples->dst.ip[3] = iph->daddr; + + } else { + __builtin_memcpy(tuples->dst.ip, &ipv6h->daddr, IPV6_BYTE_LENGTH); + __builtin_memcpy(tuples->src.ip, &ipv6h->saddr, IPV6_BYTE_LENGTH); + } + if (l4proto == IPPROTO_TCP) { + tuples->src.port = tcph->source; + tuples->dst.port = tcph->dest; + } else { + tuples->src.port = udph->source; + tuples->dst.port = udph->dest; + } +} + static __always_inline bool equal16(const __be32 x[4], const __be32 y[4]) { #if __clang_major__ >= 10 return ((__be64 *)x)[0] == ((__be64 *)y)[0] && @@ -365,49 +393,9 @@ static __always_inline __u32 l4_checksum_off(__u8 proto, __u8 ihl) { return ETH_HLEN + ihl * 4 + l4_checksum_rel_off(proto); } -static __always_inline int bpf_update_offload_l4cksm_32(struct __sk_buff *skb, - __u32 l4_cksm_off, - __be32 old, - __be32 new) { - int ret; - __sum16 cksm; - if ((ret = bpf_skb_load_bytes(skb, l4_cksm_off, &cksm, sizeof(cksm)))) { - return ret; - } - // bpf_printk("before: %x", bpf_ntohs(cksm)); - cksm = - bpf_htons(bpf_ntohs(cksm) + bpf_ntohs(*(__be16 *)&new) + - bpf_ntohs(*((__be16 *)&new + 1)) - bpf_ntohs(*(__be16 *)&old) - - bpf_ntohs(*((__be16 *)&old + 1))); - if ((ret = bpf_skb_store_bytes(skb, l4_cksm_off, &cksm, sizeof(cksm), 0))) { - return ret; - } - // bpf_printk("after: %x", bpf_ntohs(cksm)); - return 0; -} - -static __always_inline int bpf_update_offload_l4cksm_16(struct __sk_buff *skb, - __u32 l4_cksm_off, - __be16 old, - __be16 new) { - int ret; - __sum16 cksm; - if ((ret = bpf_skb_load_bytes(skb, l4_cksm_off, &cksm, sizeof(cksm)))) { - return ret; - } - // bpf_printk("before: %x", bpf_ntohs(cksm)); - cksm = bpf_htons(bpf_ntohs(cksm) + bpf_ntohs(new) - bpf_ntohs(old)); - if ((ret = bpf_skb_store_bytes(skb, l4_cksm_off, &cksm, sizeof(cksm), 0))) { - return ret; - } - // bpf_printk("after: %x", bpf_ntohs(cksm)); - return 0; -} - static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, __u8 proto, __u8 ihl, __be32 old_ip[4], - __be32 new_ip[4], bool is_dest, - bool calc_l4_cksm) { + __be32 new_ip[4], bool is_dest) { // Nothing to do. if (equal16(old_ip, new_ip)) { return 0; @@ -427,25 +415,16 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, __be32 _old_ip = old_ip[3]; __be32 _new_ip = new_ip[3]; - if (calc_l4_cksm) { - int ret; - // __sum16 test; - // bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test)); - // bpf_printk("rewrite ip before: %x, %pI4->%pI4", test, &_old_ip, - // &_new_ip); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip, - l4flags | sizeof(_new_ip)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } - } else { - // NIC checksum offload path. But problem remains. FIXME. - if ((ret = bpf_update_offload_l4cksm_32(skb, l4_cksm_off, _old_ip, - _new_ip))) { - bpf_printk("bpf_update_offload_cksm_32: %d", ret); - return ret; - } + int ret; + // __sum16 test; + // bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test)); + // bpf_printk("rewrite ip before: %x, %pI4->%pI4", test, &_old_ip, + // &_new_ip); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip, + l4flags | sizeof(_new_ip)))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; } // bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test)); // bpf_printk("rewrite ip after: %x", test); @@ -464,13 +443,11 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, } } else { - if (calc_l4_cksm) { - __s64 cksm = - bpf_csum_diff(old_ip, IPV6_BYTE_LENGTH, new_ip, IPV6_BYTE_LENGTH, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } + __s64 cksm = + bpf_csum_diff(old_ip, IPV6_BYTE_LENGTH, new_ip, IPV6_BYTE_LENGTH, 0); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; } // bpf_printk("%pI6 -> %pI6", old_ip, new_ip); @@ -487,8 +464,7 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion, static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto, __u8 ihl, __be16 old_port, - __be16 new_port, bool is_dest, - bool calc_l4_cksm) { + __be16 new_port, bool is_dest) { // Nothing to do. if (old_port == new_port) { return 0; @@ -528,12 +504,11 @@ static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto, // bpf_printk("rewrite port before: %x, %u->%u", test, bpf_ntohs(old_port), // bpf_ntohs(new_port)); // } - if (calc_l4_cksm) { - if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port, - l4flags | sizeof(new_port)))) { - bpf_printk("bpf_l4_csum_replace: %d", ret); - return ret; - } + + if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port, + l4flags | sizeof(new_port)))) { + bpf_printk("bpf_l4_csum_replace: %d", ret); + return ret; } // if (!bpf_skb_load_bytes(skb, cksm_off, &test, sizeof(test))) { // bpf_printk("rewrite port aftetr: %x", test); @@ -693,8 +668,7 @@ parse_transport(const struct __sk_buff *skb, struct ethhdr *ethh, } static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen, - __u32 ihl, __u16 len_diff, - bool calc_l4_cksm) { + __u32 ihl, __u16 len_diff) { if (unlikely(!len_diff)) { return 0; } @@ -716,28 +690,21 @@ static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen, // Calculate checksum and store the new value. int ret; __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); - if (calc_l4_cksm) { - // replace twice because len exists both pseudo hdr and hdr. - if ((ret = bpf_l4_csum_replace( - skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; - } - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen, - sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); - return ret; - } - } else { - // NIC checksum offload path. But problem remains. FIXME. - if ((ret = - bpf_update_offload_l4cksm_16(skb, udp_csum_off, oldlen, newlen))) { - bpf_printk("bpf_update_offload_cksm: %d", ret); - return ret; - } + + // replace twice because len exists both pseudo hdr and hdr. + if ((ret = bpf_l4_csum_replace( + skb, udp_csum_off, oldlen, newlen, + sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); + return ret; } + if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen, + sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace newudplen: %d", ret); + return ret; + } + if ((ret = bpf_skb_store_bytes( skb, (__u32)ETH_HLEN + ihl * 4 + offsetof(struct udphdr, len), &newlen, sizeof(oldlen), 0))) { @@ -786,8 +753,7 @@ static __always_inline int adjust_ipv4_len(struct __sk_buff *skb, __u16 oldlen, static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, __u8 ipversion, __u8 ihl, __be16 iphdr_tot_len, - void *newhdr, __u32 newhdrlen, - bool calc_l4_cksm) { + void *newhdr, __u32 newhdrlen) { if (unlikely(newhdrlen % 4 != 0)) { bpf_printk("encap_after_udp_hdr: unexpected newhdrlen value %u :must " "be a multiple of 4", @@ -810,8 +776,7 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, } // Add room for new udp payload header. if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET, - calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET - : 0))) { + BPF_F_ADJ_ROOM_NO_CSUM_RESET))) { bpf_printk("UDP ADJUST ROOM(encap): %d", ret); return ret; } @@ -831,21 +796,19 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, } // Rewrite udp len. - if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen, - calc_l4_cksm))) { + if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen))) { bpf_printk("adjust_udp_len: %d", ret); return ret; } // Rewrite udp payload. - if (calc_l4_cksm) { - __u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; - } + + __u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl); + __s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0); + if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace 2: %d", ret); + return ret; } if ((ret = bpf_skb_store_bytes(skb, udp_payload_off, newhdr, newhdrlen, 0))) { bpf_printk("bpf_skb_store_bytes 2: %d", ret); @@ -857,8 +820,7 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, __u8 ipversion, __u8 ihl, __be16 ipv4hdr_tot_len, void *to, - __u32 decap_hdrlen, - bool calc_l4_cksm) { + __u32 decap_hdrlen) { if (unlikely(decap_hdrlen % 4 != 0)) { bpf_printk("encap_after_udp_hdr: unexpected decap_hdrlen value %u :must " "be a multiple of 4", @@ -901,8 +863,7 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, // Adjust room to decap the header. if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET, - calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET - : 0))) { + BPF_F_ADJ_ROOM_NO_CSUM_RESET))) { bpf_printk("UDP ADJUST ROOM(decap): %d", ret); return ret; } @@ -916,21 +877,19 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, } // Rewrite udp len. - if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen, - calc_l4_cksm))) { + if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen))) { bpf_printk("adjust_udp_len: %d", ret); return ret; } // Rewrite udp checksum. - if (calc_l4_cksm) { - __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0); - if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; - } + + __u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl); + __s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0); + if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm, + BPF_F_MARK_MANGLED_0))) { + bpf_printk("bpf_l4_csum_replace 2: %d", ret); + return ret; } return 0; } @@ -1198,26 +1157,8 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { } // Prepare five tuples. - struct tuples tuples = {0}; - tuples.l4proto = l4proto; - if (ipversion == 4) { - tuples.src.ip[2] = bpf_htonl(0x0000ffff); - tuples.src.ip[3] = iph.saddr; - - tuples.dst.ip[2] = bpf_htonl(0x0000ffff); - tuples.dst.ip[3] = iph.daddr; - - } else { - __builtin_memcpy(tuples.dst.ip, &ipv6h.daddr, IPV6_BYTE_LENGTH); - __builtin_memcpy(tuples.src.ip, &ipv6h.saddr, IPV6_BYTE_LENGTH); - } - if (l4proto == IPPROTO_TCP) { - tuples.src.port = tcph.source; - tuples.dst.port = tcph.dest; - } else { - tuples.src.port = udph.source; - tuples.dst.port = udph.dest; - } + struct tuples tuples; + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, ipversion, l4proto); /** ip rule add fwmark 0x8000000/0x8000000 table 2023 @@ -1235,7 +1176,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { __u32 tuple_size; struct bpf_sock *sk; bool is_old_conn = false; - __u32 flag[6] = {0}; + __u32 flag[6]; void *l4hdr; if (ipversion == 4) { @@ -1278,6 +1219,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) { // Routing for new connection. new_connection: + __builtin_memset(flag, 0, sizeof(flag)); if (l4proto == IPPROTO_TCP) { if (!(tcph.syn && !tcph.ack)) { // Not a new TCP connection. @@ -1317,6 +1259,12 @@ new_connection: } #endif if (outbound == OUTBOUND_DIRECT) { + __u32 *nat; + if ((nat = + bpf_map_lookup_elem(¶m_map, &control_plane_nat_direct_key)) && + *nat) { + goto control_plane_tproxy; + } goto direct; } else if (unlikely(outbound == OUTBOUND_BLOCK)) { goto block; @@ -1335,6 +1283,7 @@ new_connection: goto block; } +control_plane_tproxy: // Save routing result. if ((ret = bpf_map_update_elem(&routing_tuples_map, &tuples, &outbound, BPF_ANY))) { @@ -1444,24 +1393,6 @@ static __always_inline bool pid_is_control_plane(struct __sk_buff *skb, __u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0}; __u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1}; -static __always_inline bool wan_disable_checksum(const __u32 ifindex, - const __u8 ipversion) { - - struct if_params *ifparams = - bpf_map_lookup_elem(&ifindex_params_map, &ifindex); - if (unlikely(!ifparams)) { - return -1; - } - bool tx_offloaded = (ipversion == 4 && ifparams->tx_l4_cksm_ip4_offload) || - (ipversion == 6 && ifparams->tx_l4_cksm_ip6_offload); - // If tx offloaded, we get bad checksum of packets because we redirect packet - // before the NIC processing. So we have no choice but disable l4 checksum. - - bool disable_l4_checksum = tx_offloaded; - - return disable_l4_checksum; -} - // Routing and redirect the packet back. // We cannot modify the dest address here. So we cooperate with wan_ingress. SEC("tc/wan_egress") @@ -1490,28 +1421,8 @@ int tproxy_wan_egress(struct __sk_buff *skb) { } // Backup for further use. - __be16 ipv4_tot_len = 0; - struct tuples tuples = {0}; - tuples.l4proto = l4proto; - if (ipversion == 4) { - tuples.src.ip[2] = bpf_htonl(0x0000ffff); - tuples.src.ip[3] = iph.saddr; - - tuples.dst.ip[2] = bpf_htonl(0x0000ffff); - tuples.dst.ip[3] = iph.daddr; - - ipv4_tot_len = iph.tot_len; - } else { - __builtin_memcpy(tuples.dst.ip, &ipv6h.daddr, IPV6_BYTE_LENGTH); - __builtin_memcpy(tuples.src.ip, &ipv6h.saddr, IPV6_BYTE_LENGTH); - } - if (l4proto == IPPROTO_TCP) { - tuples.src.port = tcph.source; - tuples.dst.port = tcph.dest; - } else { - tuples.src.port = udph.source; - tuples.dst.port = udph.dest; - } + struct tuples tuples; + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, ipversion, l4proto); // We should know if this packet is from tproxy. // We do not need to check the source ip because we have skipped packets not @@ -1549,7 +1460,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) { struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0); if (sk) { - // Not a tproxy response. + // Not a tproxy WAN response. It is a tproxy LAN response. tproxy_response = false; bpf_sk_release(sk); return TC_ACT_OK; @@ -1710,8 +1621,8 @@ int tproxy_wan_egress(struct __sk_buff *skb) { } new_hdr.outbound = ret; #if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) - bpf_printk("udp(wan): outbound: %u, %pI6:%u", new_hdr.outbound, tuples.dst.ip, - bpf_ntohs(new_hdr.port)); + bpf_printk("udp(wan): outbound: %u, %pI6:%u", new_hdr.outbound, + tuples.dst.ip, bpf_ntohs(new_hdr.port)); #endif if (new_hdr.outbound == OUTBOUND_DIRECT) { @@ -1747,12 +1658,10 @@ int tproxy_wan_egress(struct __sk_buff *skb) { return TC_ACT_SHOT; }; - bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion); // Encap a header to transmit fullcone tuple. - if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, - &new_hdr, sizeof(new_hdr), - // It is a part of ingress link. - !disable_l4_checksum))) { + if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, + ipversion == 4 ? iph.tot_len : 0, &new_hdr, + sizeof(new_hdr)))) { return TC_ACT_SHOT; } } @@ -1833,8 +1742,6 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { return TC_ACT_OK; } - bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion); - // // Print packet in hex for debugging (checksum or something else). // if (dport == bpf_htons(8443)) { // bpf_printk("PRINT BEFORE PACKET"); @@ -1875,12 +1782,12 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // Rewrite sip and sport. if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, saddr, - original_dst->ip, false, !disable_l4_checksum))) { + original_dst->ip, false))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, sport, original_dst->port, - false, !disable_l4_checksum))) { + false))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1894,22 +1801,21 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // Get source ip/port from our packet header. // Decap header to get fullcone tuple. - if ((ret = - decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src, - sizeof(ori_src), !disable_l4_checksum))) { + if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, + &ori_src, sizeof(ori_src)))) { return TC_ACT_SHOT; } // Rewrite udp src ip if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, saddr, ori_src.ip, - false, !disable_l4_checksum))) { + false))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } // Rewrite udp src port - if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port, false, - !disable_l4_checksum))) { + if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port, + false))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } @@ -1925,8 +1831,7 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // } } // Rewrite dip to host ip. - if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true, - !disable_l4_checksum))) { + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } @@ -1944,23 +1849,21 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // bpf_printk("should send to: %pI6:%u", tproxy_ip, // bpf_ntohs(*tproxy_port)); - if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip, true, - !disable_l4_checksum))) { + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip, + true))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } // Rewrite dst port. - if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true, - !disable_l4_checksum))) { + if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true))) { bpf_printk("Shot Port: %d", ret); return TC_ACT_SHOT; } // (1) Use daddr as saddr to pass NIC verification. Notice that we do not // modify the so tproxy will send packet to it. - if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false, - !disable_l4_checksum))) { + if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false))) { bpf_printk("Shot IP: %d", ret); return TC_ACT_SHOT; } @@ -1975,13 +1878,6 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { // bpf_printk("%02x", t); // } // } - if (disable_l4_checksum) { - __u32 l4_cksm_off = l4_checksum_off(l4proto, ihl); - // Set checksum zero. - __sum16 bak_cksm = 0; - bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0); - bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET); - } return TC_ACT_OK; } diff --git a/example.dae b/example.dae index 9674102..36751dc 100644 --- a/example.dae +++ b/example.dae @@ -6,7 +6,7 @@ global { log_level: info # Node connectivity check. - # Url and dns should have both IPv4 and IPv6. + # Url and DNS should have both IPv4 and IPv6. tcp_check_url: 'http://cp.cloudflare.com' udp_check_dns: 'dns.google:53' check_interval: 30s @@ -19,20 +19,25 @@ global { # IPv4 or IPv6 to use according to group policy (such as min latency policy). # Please make sure DNS traffic will go through and be forwarded by dae. # The upstream DNS answer MUST NOT be polluted. - # The request to dns upstream follows routing defined below. + # The request to DNS upstream follows the routing defined below. dns_upstream: 'tcp+udp://dns.google:53' # The LAN interface to bind. Use it if you only want to proxy LAN instead of localhost. # Multiple interfaces split by ",". # lan_interface: docker0 + # SNAT for incoming connection to avoid MAC learning. + # Set it true if you are NOT using dae as a transparent bridge, but will reduce forwarding + # performance for direct traffic. + lan_nat_direct: true + # The WAN interface to bind. Use it if you want to proxy localhost # Multiple interfaces split by ",". wan_interface: wlp5s0 } -# Subscriptions defined here will be resolved as nodes and merged as a part of global node pool. -# Support to give subscription a tag and filter nodes from given subscription in group section. +# Subscriptions defined here will be resolved as nodes and merged as a part of the global node pool. +# Support to give the subscription a tag, and filter nodes from a given subscription in the group section. subscription { # Add your subscription links here. my_sub: 'https://www.example.com/subscription/link' @@ -40,7 +45,7 @@ subscription { 'https://example.com/no_tag_link' } -# Nodes defined here will be merged as a part of global node pool. +# Nodes defined here will be merged as a part of the global node pool. node { # Add your node links here. # Support socks5, http, https, ss, ssr, vmess, vless, trojan, trojan-go @@ -51,7 +56,7 @@ node { # Node group (outbound). group { my_group { - # Filter nodes from global node pool defined by subscription and node section above. + # Filter nodes from the global node pool defined by the subscription and node section above. # Pass node names as input of keyword/regex filter. filter: name(regex:'HK|SG|TW', keyword:'JP', keyword: SH) && !name(keyword:"GCP") @@ -66,7 +71,7 @@ group { } group2 { - # Filter nodes from global node pool defined by subscription and node section above. + # Filter nodes from the global node pool defined by the subscription and node section above. # Pass node names as input of keyword/regex filter. filter: subtag(regex: '^my_', another_sub) && !name(keyword: 'ExpireAt:') diff --git a/insert.sh b/insert.sh index ae71a5c..3b6b02d 100755 --- a/insert.sh +++ b/insert.sh @@ -17,7 +17,12 @@ sudo tc filter del dev $wan ingress sudo tc filter del dev $wan egress sudo tc filter add dev $lan ingress bpf direct-action obj foo.o sec tc/ingress -sudo tc filter add dev $wan ingress bpf direct-action obj foo.o sec tc/wan_ingress -sudo tc filter add dev $wan egress bpf direct-action obj foo.o sec tc/wan_egress +# sudo tc filter add dev $wan ingress bpf direct-action obj foo.o sec tc/wan_ingress +# sudo tc filter add dev $wan egress bpf direct-action obj foo.o sec tc/wan_egress + +sudo tc filter del dev $lan ingress +sudo tc filter del dev $lan egress +sudo tc filter del dev $wan ingress +sudo tc filter del dev $wan egress exit 0 \ No newline at end of file