fix: checksum

This commit is contained in:
mzz2017 2023-02-04 11:38:01 +08:00
parent b117dfa7d5
commit 0d29b6fccc
9 changed files with 429 additions and 263 deletions

View File

@ -31,19 +31,19 @@ See [example.dae](https://github.com/v2rayA/dae/blob/main/example.dae).
Use `uname -r` to check the kernel version on your machine.
**Bind to LAN: >= 5.2**
**Bind to LAN: >= 5.8**
You need bind dae to LAN interface, if you want to provide network service for LAN as an intermediate device.
This feature requires the kernel version of machine on which dae install >= 5.2.
This feature requires the kernel version of machine on which dae install >= 5.8.
Note that if you bind dae to LAN only, dae only provide network service for traffic from LAN, and not impact local programs.
**Bind to WAN: >= 5.7**
**Bind to WAN: >= 5.8**
You need bind dae to WAN interface, if you want dae to provide network service for local programs.
This feature requires kernel version of the machine >= 5.7.
This feature requires kernel version of the machine >= 5.8.
Note that if you bind dae to WAN only, dae only provide network service for local programs and not impact traffic coming in from other interfaces.
@ -54,7 +54,7 @@ Usually, mainstream desktop distributions have these items turned on. But in ord
Use following commands to check the kernel configuration items on your machine.
```shell
zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r)
zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}
```
**Bind to LAN**
@ -72,14 +72,13 @@ CONFIG_DEBUG_INFO_BTF
Check them using command like:
```shell
(zcat /proc/config.gz || cat /boot/config || cat /boot/config-$(uname -r)) | grep 'CONFIG_DEBUG_INFO_BTF='
(zcat /proc/config.gz || cat /boot/{config,config-$(uname -r)}) | grep 'CONFIG_DEBUG_INFO_BTF='
```
## TODO
1. Check dns upstream and source loop (whether upstream is also a client of us) and remind the user to add sip rule.
1. Domain routing performance optimization.
1. DisableL4Checksum by link.
1. Handle the case that nodes do not support UDP.
1. Handle the case that nodes do not support IPv6.
1. L4Checksum problem.

View File

@ -96,6 +96,11 @@ const (
IpVersion_X IpVersionType = 3
)
var BasicFeatureVersion = internal.Version{5, 2, 0}
var FtraceFeatureVersion = internal.Version{5, 5, 0}
var CgSocketCookieFeatureVersion = internal.Version{5, 7, 0}
var (
BasicFeatureVersion = internal.Version{5, 2, 0}
// Deprecated: Ftrace does not support arm64 yet (Linux 6.2).
FtraceFeatureVersion = internal.Version{5, 5, 0}
BatchUpdateFeatureVersion = internal.Version{5, 6, 0}
CgSocketCookieFeatureVersion = internal.Version{5, 7, 0}
ChecksumFeatureVersion = internal.Version{5, 8, 0}
)

View File

@ -12,6 +12,7 @@ import (
"fmt"
"github.com/cilium/ebpf"
"github.com/v2rayA/dae/common"
"github.com/v2rayA/dae/common/consts"
"github.com/v2rayA/dae/pkg/ebpf_internal"
"net/netip"
"os"
@ -29,15 +30,6 @@ type _bpfPortRange struct {
PortEnd uint16
}
type _bpfMatchSet struct {
// TODO: Need sync with C code.
Value [16]byte
Type uint8
Not bool
Outbound uint8
_ [1]byte
}
func (r _bpfPortRange) Encode() (b [16]byte) {
binary.LittleEndian.PutUint16(b[:2], r.PortStart)
binary.LittleEndian.PutUint16(b[2:], r.PortEnd)
@ -78,7 +70,7 @@ func cidrToBpfLpmKey(prefix netip.Prefix) _bpfLpmKey {
func BatchUpdate(m *ebpf.Map, keys interface{}, values interface{}, opts *ebpf.BatchOptions) (n int, err error) {
var old bool
version, e := internal.KernelVersion()
if e != nil || version.Less(internal.Version{5, 6, 0}) {
if e != nil || version.Less(consts.BatchUpdateFeatureVersion) {
old = true
}
if !old {
@ -155,3 +147,15 @@ func detectCgroupPath() (string, error) {
return "", errors.New("cgroup2 not mounted")
}
func (p bpfIfParams) CheckVersionRequirement(version *internal.Version) (err error) {
if !p.TxL4CksmIp4Offload ||
!p.TxL4CksmIp6Offload {
// Need calc checksum on CPU. And need BPF_F_ADJ_ROOM_NO_CSUM_RESET.
if version.Less(consts.ChecksumFeatureVersion) {
return fmt.Errorf("your NIC does not support checksum offload and your kernel version %v does not support related BPF features; expect >=%v; upgrade your kernel and try again", version.String(),
consts.ChecksumFeatureVersion.String())
}
}
return nil
}

View File

@ -66,13 +66,18 @@ func NewControlPlane(
if e != nil {
return nil, fmt.Errorf("failed to get kernel version: %w", e)
}
if kernelVersion.Less(consts.BasicFeatureVersion) {
return nil, fmt.Errorf("your kernel version %v does not satisfy basic requirement; expect >=%v", c.kernelVersion.String(), consts.BasicFeatureVersion.String())
// Must judge version from high to low to reduce the number of user upgrading kernel.
if kernelVersion.Less(consts.ChecksumFeatureVersion) {
return nil, fmt.Errorf("your kernel version %v does not support checksum related features; expect >=%v; upgrade your kernel and try again", kernelVersion.String(),
consts.ChecksumFeatureVersion.String())
}
if len(wanInterface) > 0 && kernelVersion.Less(consts.CgSocketCookieFeatureVersion) {
return nil, fmt.Errorf("your kernel version %v does not support bind to WAN; expect >=%v; remove wan_interface in config file and try again", kernelVersion.String(),
consts.CgSocketCookieFeatureVersion.String())
}
if kernelVersion.Less(consts.BasicFeatureVersion) {
return nil, fmt.Errorf("your kernel version %v does not satisfy basic requirement; expect >=%v", c.kernelVersion.String(), consts.BasicFeatureVersion.String())
}
// Allow the current process to lock memory for eBPF resources.
if err = rlimit.RemoveMemlock(); err != nil {

View File

@ -9,6 +9,7 @@ import (
"fmt"
"github.com/cilium/ebpf"
ciliumLink "github.com/cilium/ebpf/link"
"github.com/safchain/ethtool"
"github.com/sirupsen/logrus"
"github.com/v2rayA/dae/common"
"github.com/v2rayA/dae/common/consts"
@ -17,6 +18,7 @@ import (
"golang.org/x/sys/unix"
"net/netip"
"os"
"regexp"
)
type ControlPlaneCore struct {
@ -42,23 +44,16 @@ func (c *ControlPlaneCore) Close() (err error) {
return err
}
func (c *ControlPlaneCore) BindLan(ifname string) error {
c.log.Infof("Bind to LAN: %v", ifname)
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
// Insert an elem into IfindexIpsMap.
func getifParamsFromLink(link netlink.Link) (ifParams bpfIfParams, err error) {
// TODO: We should monitor IP change of the link.
ipnets, err := netlink.AddrList(link, netlink.FAMILY_ALL)
if err != nil {
return err
return bpfIfParams{}, err
}
// TODO: If we monitor IP change of the link, we should remove code below.
if len(ipnets) == 0 {
return fmt.Errorf("interface %v has no ip", ifname)
return bpfIfParams{}, fmt.Errorf("interface %v has no ip", link.Attrs().Name)
}
var linkIp bpfIfIp
// Get first Ip4 and Ip6.
for _, ipnet := range ipnets {
ip, ok := netip.AddrFromSlice(ipnet.IP)
if !ok {
@ -67,38 +62,84 @@ func (c *ControlPlaneCore) BindLan(ifname string) error {
if ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() {
continue
}
if (ip.Is6() && linkIp.HasIp6) ||
(ip.Is4() && linkIp.HasIp4) {
if (ip.Is6() && ifParams.HasIp6) ||
(ip.Is4() && ifParams.HasIp4) {
continue
}
ip6format := ip.As16()
if ip.Is4() {
linkIp.HasIp4 = true
linkIp.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
ifParams.HasIp4 = true
ifParams.Ip4 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
} else {
linkIp.HasIp6 = true
linkIp.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
ifParams.HasIp6 = true
ifParams.Ip6 = common.Ipv6ByteSliceToUint32Array(ip6format[:])
}
if linkIp.HasIp4 && linkIp.HasIp6 {
if ifParams.HasIp4 && ifParams.HasIp6 {
break
}
}
if err := c.bpf.IfindexTproxyIpMap.Update(uint32(link.Attrs().Index), linkIp, ebpf.UpdateAny); err != nil {
// Get link offload features.
et, err := ethtool.NewEthtool()
if err != nil {
return bpfIfParams{}, err
}
defer et.Close()
features, err := et.Features(link.Attrs().Name)
if err != nil {
return bpfIfParams{}, err
}
if features["tx-checksum-ip-generic"] {
ifParams.TxL4CksmIp4Offload = true
ifParams.TxL4CksmIp6Offload = true
}
if features["tx-checksum-ipv4"] {
ifParams.TxL4CksmIp4Offload = true
}
if features["tx-checksum-ipv6"] {
ifParams.TxL4CksmIp6Offload = true
}
if features["rx-checksum"] {
ifParams.RxCksmOffload = true
}
switch {
case regexp.MustCompile(`^docker\d+$`).MatchString(link.Attrs().Name):
ifParams.UseNonstandardOffloadAlgorithm = true
default:
}
return ifParams, nil
}
func (c *ControlPlaneCore) BindLan(ifname string) error {
c.log.Infof("Bind to LAN: %v", ifname)
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
}
/// Insert an elem into IfindexParamsMap.
ifParams, err := getifParamsFromLink(link)
if err != nil {
return err
}
if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil {
return err
}
if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
// FIXME: not only this link ip.
if linkIp.HasIp4 {
if ifParams.HasIp4 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip4,
Data: ifParams.Ip4,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
}
if linkIp.HasIp6 {
if ifParams.HasIp6 {
if err := c.bpf.HostIpLpm.Update(_bpfLpmKey{
PrefixLen: 128,
Data: linkIp.Ip6,
Data: ifParams.Ip6,
}, uint32(1), ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
@ -169,6 +210,17 @@ func (c *ControlPlaneCore) BindWan(ifname string) error {
if err != nil {
return err
}
/// Insert an elem into IfindexParamsMap.
ifParams, err := getifParamsFromLink(link)
if err != nil {
return err
}
if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil {
return err
}
if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil {
return fmt.Errorf("update IfindexIpsMap: %w", err)
}
/// Set-up SrcPidMapper.
/// Attach programs to support pname routing.

View File

@ -69,8 +69,10 @@ enum {
// Param keys:
static const __u32 zero_key = 0;
static const __u32 tproxy_port_key = 1;
static const __u32 disable_l4_tx_checksum_key = 2;
static const __u32 disable_l4_rx_checksum_key = 3;
static const __u32 disable_l4_tx_checksum_key
__attribute__((unused, deprecated)) = 2;
static const __u32 disable_l4_rx_checksum_key
__attribute__((unused, deprecated)) = 3;
static const __u32 control_plane_pid_key = 4;
struct ip_port {
@ -154,20 +156,25 @@ struct {
} dns_upstream_map SEC(".maps");
// Interface Ips:
struct if_ip {
struct if_params {
__be32 ip4[4];
__be32 ip6[4];
bool hasIp4;
bool hasIp6;
bool has_ip4;
bool has_ip6;
bool rx_cksm_offload;
bool tx_l4_cksm_ip4_offload;
bool tx_l4_cksm_ip6_offload;
bool use_nonstandard_offload_algorithm;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, __u32); // ifindex
__type(value, struct if_ip); // ip
__type(key, __u32); // ifindex
__type(value, struct if_params); // ip
__uint(max_entries, MAX_INTERFACE_NUM);
/// NOTICE: No persistence.
// __uint(pinning, LIBBPF_PIN_BY_NAME);
} ifindex_tproxy_ip_map SEC(".maps");
} ifindex_params_map SEC(".maps");
// Array of LPM tries:
struct lpm_key {
@ -228,8 +235,7 @@ struct port_range {
*/
struct match_set {
union {
/// NOTICE: MUST sync with component/control/bpf_utils.go.
__u32 __value; // Placeholder for bpf2go.
__u8 __value[16]; // Placeholder for bpf2go.
__u32 index;
struct port_range port_range;
@ -237,8 +243,8 @@ struct match_set {
enum IpVersionType ip_version;
__u32 pname[TASK_COMM_LEN / 4];
};
bool not ; // A subrule flag (this is not a match_set flag).
enum MatchType type;
bool not ; // A subrule flag (this is not a match_set flag).
__u8 outbound; // User-defined value range is [0, 252].
};
struct {
@ -310,7 +316,8 @@ static __always_inline __u32 l4_checksum_off(__u8 proto, __u8 ihl) {
static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion,
__u8 proto, __u8 ihl, __be32 old_ip[4],
__be32 new_ip[4], bool is_dest) {
__be32 new_ip[4], bool is_dest,
bool calc_l4_cksm) {
// Nothing to do.
if (equal_ipv6_format(old_ip, new_ip)) {
return 0;
@ -327,14 +334,24 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion,
}
if (ipversion == 4) {
__be32 _old_ip = old_ip[3];
__be32 _new_ip = new_ip[3];
if (calc_l4_cksm) {
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip,
l4flags | sizeof(_new_ip)))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
return ret;
int ret;
// __sum16 test;
// bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test));
// bpf_printk("rewrite ip before: %x, %pI4->%pI4", test, &_old_ip,
// &_new_ip);
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, _old_ip, _new_ip,
l4flags | sizeof(_new_ip)))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
return ret;
}
}
// bpf_skb_load_bytes(skb, l4_cksm_off, &test, sizeof(test));
// bpf_printk("rewrite ip after: %x", test);
if ((ret = bpf_l3_csum_replace(skb, IPV4_CSUM_OFF, _old_ip, _new_ip,
sizeof(_new_ip)))) {
@ -349,11 +366,14 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion,
return ret;
}
} else {
__s64 cksm =
bpf_csum_diff(new_ip, IPV6_BYTE_LENGTH, old_ip, IPV6_BYTE_LENGTH, 0);
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
return ret;
if (calc_l4_cksm) {
__s64 cksm =
bpf_csum_diff(old_ip, IPV6_BYTE_LENGTH, new_ip, IPV6_BYTE_LENGTH, 0);
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, l4flags))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
return ret;
}
}
// bpf_printk("%pI6 -> %pI6", old_ip, new_ip);
@ -370,7 +390,8 @@ static __always_inline int rewrite_ip(struct __sk_buff *skb, __u8 ipversion,
static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto,
__u8 ihl, __be16 old_port,
__be16 new_port, bool is_dest) {
__be16 new_port, bool is_dest,
bool calc_l4_cksm) {
// Nothing to do.
if (old_port == new_port) {
return 0;
@ -397,22 +418,34 @@ static __always_inline int rewrite_port(struct __sk_buff *skb, __u8 proto,
}
l4flags |= BPF_F_MARK_MANGLED_0;
break;
default:
return -EINVAL;
}
// bpf_printk("%u -> %u", bpf_ntohs(old_port), bpf_ntohs(new_port));
int ret;
if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port,
l4flags | sizeof(new_port)))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
// __sum16 test;
// if (!bpf_skb_load_bytes(skb, cksm_off, &test, sizeof(test))) {
// bpf_printk("rewrite port before: %x, %u->%u", test, bpf_ntohs(old_port),
// bpf_ntohs(new_port));
// }
if (calc_l4_cksm) {
if ((ret = bpf_l4_csum_replace(skb, cksm_off, old_port, new_port,
l4flags | sizeof(new_port)))) {
bpf_printk("bpf_l4_csum_replace: %d", ret);
return ret;
}
}
// if (!bpf_skb_load_bytes(skb, cksm_off, &test, sizeof(test))) {
// bpf_printk("rewrite port aftetr: %x", test);
// }
if ((ret = bpf_skb_store_bytes(skb, port_off, &new_port, sizeof(new_port),
0))) {
return ret;
}
ret = bpf_skb_store_bytes(skb, port_off, &new_port, sizeof(new_port), 0);
if (ret) {
return ret;
}
return 0;
}
@ -560,16 +593,12 @@ parse_transport(const struct __sk_buff *skb, struct ethhdr *ethh,
return 1;
}
static __always_inline int get_tproxy_ip(__u8 ipversion, __u32 ifindex,
__be32 tproxy_ip[4]) {
struct if_ip *if_ip = bpf_map_lookup_elem(&ifindex_tproxy_ip_map, &ifindex);
if (unlikely(!if_ip)) {
return -1;
}
if (ipversion == 4 && (*if_ip).hasIp4) {
__builtin_memcpy(tproxy_ip, (*if_ip).ip4, IPV6_BYTE_LENGTH);
} else if (ipversion == 6 && (*if_ip).hasIp6) {
__builtin_memcpy(tproxy_ip, (*if_ip).ip6, IPV6_BYTE_LENGTH);
static __always_inline int
get_tproxy_ip(__u8 ipversion, struct if_params *ifparams, __be32 tproxy_ip[4]) {
if (ipversion == 4 && (*ifparams).has_ip4) {
__builtin_memcpy(tproxy_ip, (*ifparams).ip4, IPV6_BYTE_LENGTH);
} else if (ipversion == 6 && (*ifparams).has_ip6) {
__builtin_memcpy(tproxy_ip, (*ifparams).ip6, IPV6_BYTE_LENGTH);
} else {
// Should TC_ACT_OK outer.
return -EFAULT;
@ -577,11 +606,12 @@ static __always_inline int get_tproxy_ip(__u8 ipversion, __u32 ifindex,
return 0;
}
static __always_inline int ip_is_host(__u8 ipversion, __u32 ifindex,
static __always_inline int ip_is_host(__u8 ipversion,
struct if_params *ifparams,
const __be32 ip[4], __be32 tproxy_ip[4]) {
if (tproxy_ip) {
int ret;
if ((ret = get_tproxy_ip(ipversion, ifindex, tproxy_ip))) {
if ((ret = get_tproxy_ip(ipversion, ifparams, tproxy_ip))) {
return ret;
}
}
@ -593,7 +623,8 @@ static __always_inline int ip_is_host(__u8 ipversion, __u32 ifindex,
}
static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen,
__u32 ihl, __u16 len_diff) {
__u32 ihl, __u16 len_diff,
bool calc_l4_cksm) {
if (unlikely(!len_diff)) {
return 0;
}
@ -614,20 +645,21 @@ static __always_inline int adjust_udp_len(struct __sk_buff *skb, __u16 oldlen,
// Calculate checksum and store the new value.
int ret;
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
// replace twice because len exists both pseudo hdr and hdr.
if ((ret = bpf_l4_csum_replace(
skb, udp_csum_off, oldlen, newlen,
sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace newudplen: %d", ret);
return ret;
}
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen,
sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace newudplen: %d", ret);
return ret;
if (calc_l4_cksm) {
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
// replace twice because len exists both pseudo hdr and hdr.
if ((ret = bpf_l4_csum_replace(
skb, udp_csum_off, oldlen, newlen,
sizeof(oldlen) | BPF_F_PSEUDO_HDR | // udp len is in the pseudo hdr
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace newudplen: %d", ret);
return ret;
}
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, oldlen, newlen,
sizeof(oldlen) | BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace newudplen: %d", ret);
return ret;
}
}
if ((ret = bpf_skb_store_bytes(
skb, (__u32)ETH_HLEN + ihl * 4 + offsetof(struct udphdr, len),
@ -677,7 +709,8 @@ static __always_inline int adjust_ipv4_len(struct __sk_buff *skb, __u16 oldlen,
static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb,
__u8 ipversion, __u8 ihl,
__be16 iphdr_tot_len,
void *newhdr, __u32 newhdrlen) {
void *newhdr, __u32 newhdrlen,
bool calc_l4_cksm) {
if (unlikely(newhdrlen % 4 != 0)) {
bpf_printk("encap_after_udp_hdr: unexpected newhdrlen value %u :must "
"be a multiple of 4",
@ -699,7 +732,9 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb,
return ret;
}
// Add room for new udp payload header.
if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET, 0))) {
if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET,
calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET
: 0))) {
bpf_printk("UDP ADJUST ROOM(encap): %d", ret);
return ret;
}
@ -719,18 +754,21 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb,
}
// Rewrite udp len.
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen))) {
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, newhdrlen,
calc_l4_cksm))) {
bpf_printk("adjust_udp_len: %d", ret);
return ret;
}
// Rewrite udp payload.
__u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0);
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
if (calc_l4_cksm) {
__u32 l4_cksm_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0);
if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
}
}
if ((ret = bpf_skb_store_bytes(skb, udp_payload_off, newhdr, newhdrlen, 0))) {
bpf_printk("bpf_skb_store_bytes 2: %d", ret);
@ -742,7 +780,8 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb,
static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb,
__u8 ipversion, __u8 ihl,
__be16 ipv4hdr_tot_len, void *to,
__u32 decap_hdrlen) {
__u32 decap_hdrlen,
bool calc_l4_cksm) {
if (unlikely(decap_hdrlen % 4 != 0)) {
bpf_printk("encap_after_udp_hdr: unexpected decap_hdrlen value %u :must "
"be a multiple of 4",
@ -784,7 +823,9 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb,
}
// Adjust room to decap the header.
if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET, 0))) {
if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET,
calc_l4_cksm ? BPF_F_ADJ_ROOM_NO_CSUM_RESET
: 0))) {
bpf_printk("UDP ADJUST ROOM(decap): %d", ret);
return ret;
}
@ -798,18 +839,21 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb,
}
// Rewrite udp len.
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen))) {
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen,
calc_l4_cksm))) {
bpf_printk("adjust_udp_len: %d", ret);
return ret;
}
// Rewrite udp checksum.
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0);
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
if (calc_l4_cksm) {
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0);
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
}
}
return 0;
}
@ -961,7 +1005,7 @@ routing(const __u32 flag[6], const void *l4_hdr, const __be32 saddr[4],
"outbound: %u",
match_set->type, match_set->not, match_set->outbound);
#endif
if (*p_u32 & match_set->__value) {
if (*p_u32 & *(__u32 *)&match_set->__value) {
good_subrule = true;
}
} else if (match_set->type == MatchType_DomainSet) {
@ -1056,7 +1100,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
struct ipv6hdr ipv6h;
struct tcphdr tcph;
struct udphdr udph;
__sum16 bak_cksm = 0;
// __sum16 bak_cksm = 0;
__u8 ihl;
__u8 ipversion;
__u8 l4proto;
@ -1091,9 +1135,18 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
__builtin_memcpy(saddr, &ipv6h.saddr, IPV6_BYTE_LENGTH);
}
__u32 ifindex = skb->ifindex;
struct if_params *ifparams =
bpf_map_lookup_elem(&ifindex_params_map, &ifindex);
if (unlikely(!ifparams)) {
return -1;
}
// Never disable checksum in rx.
bool disable_checksum = false;
// If this packet is sent to this host and not a DNS packet, accept it.
__u32 tproxy_ip[4];
int to_host = ip_is_host(ipversion, skb->ifindex, daddr, tproxy_ip);
int to_host = ip_is_host(ipversion, ifparams, daddr, tproxy_ip);
if (to_host < 0) { // error
// bpf_printk("to_host: %ld", to_host);
return TC_ACT_OK;
@ -1114,7 +1167,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
if (l4proto == IPPROTO_TCP) {
// Backup for further use.
bak_cksm = tcph.check;
// bak_cksm = tcph.check;
tcp_state_syn = tcph.syn && !tcph.ack;
struct ip_port key_src;
__builtin_memset(&key_src, 0, sizeof(key_src));
@ -1178,19 +1231,19 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
__u32 *dst_ip = daddr;
__u16 dst_port = tcph.dest;
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, dst_ip, tproxy_ip,
true))) {
true, !disable_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, dst_port, *tproxy_port,
true))) {
true, !disable_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
}
} else if (l4proto == IPPROTO_UDP) {
// Backup for further use.
bak_cksm = udph.check;
// bak_cksm = udph.check;
struct ip_port_outbound new_hdr;
__builtin_memset(&new_hdr, 0, sizeof(new_hdr));
__builtin_memcpy(new_hdr.ip, daddr, IPV6_BYTE_LENGTH);
@ -1229,22 +1282,23 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
// Rewrite to control plane.
// Encap a header to transmit fullcone tuple.
if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&new_hdr, sizeof(new_hdr)))) {
if ((ret =
encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &new_hdr,
sizeof(new_hdr), !disable_checksum))) {
return TC_ACT_SHOT;
}
// Rewrite udp dst ip.
// bpf_printk("rewrite dst ip from %pI4", &ori_dst.ip);
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, new_hdr.ip,
tproxy_ip, true))) {
tproxy_ip, true, !disable_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
// Rewrite udp dst port.
if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, new_hdr.port, *tproxy_port,
true))) {
true, !disable_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
@ -1258,19 +1312,14 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
// bpf_skb_load_bytes(skb, i, &t, 1);
// bpf_printk("%02x", t);
// }
__u8 *disable_l4_checksum =
bpf_map_lookup_elem(&param_map, &disable_l4_rx_checksum_key);
if (!disable_l4_checksum) {
bpf_printk("Forgot to set disable_l4_checksum?");
return TC_ACT_SHOT;
}
if (*disable_l4_checksum) {
// Disable checksum.
if (disable_checksum) {
// Set checksum zero.
__u32 l4_cksm_off = l4_checksum_off(l4proto, ihl);
// Restore the checksum or set it zero.
if (*disable_l4_checksum == DisableL4ChecksumPolicy_SetZero) {
bak_cksm = 0;
}
__sum16 bak_cksm = 0;
bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0);
bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET);
}
return TC_ACT_OK;
}
@ -1353,6 +1402,7 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
// bpf_printk("ipsummed: %d", bpf_get_ipsummed(skb));
// Parse saddr and daddr as ipv6 format.
__be32 saddr[4];
__be32 daddr[4];
@ -1382,18 +1432,28 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
__u32 ifindex = skb->ifindex;
struct if_params *ifparams =
bpf_map_lookup_elem(&ifindex_params_map, &ifindex);
if (unlikely(!ifparams)) {
return -1;
}
bool disable_checksum = ipversion == 4 ? ifparams->tx_l4_cksm_ip4_offload
: ifparams->tx_l4_cksm_ip6_offload;
// If not from tproxy, accept it.
__be16 *tproxy_port = bpf_map_lookup_elem(&param_map, &tproxy_port_key);
if (!tproxy_port || *tproxy_port != sport) {
return TC_ACT_OK;
}
__be32 tproxy_ip[4];
ret = ip_is_host(ipversion, skb->ifindex, saddr, tproxy_ip);
ret = ip_is_host(ipversion, ifparams, saddr, tproxy_ip);
if (!(ret == 1) || !equal_ipv6_format(saddr, tproxy_ip)) {
return TC_ACT_OK;
}
__sum16 bak_cksm = 0;
// __sum16 bak_cksm = 0;
if (l4proto == IPPROTO_TCP) {
@ -1412,24 +1472,24 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
}
// Backup for further use.
bak_cksm = tcph.check;
// bak_cksm = tcph.check;
__u32 *src_ip = saddr;
__u16 src_port = tcph.source;
if (rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, src_ip, original_dst->ip,
false) < 0) {
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, src_ip,
original_dst->ip, false, !disable_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
if (rewrite_port(skb, IPPROTO_TCP, ihl, src_port, original_dst->port,
false) < 0) {
if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, src_port, original_dst->port,
false, !disable_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
} else if (l4proto == IPPROTO_UDP) {
// Backup for further use.
bak_cksm = udph.check;
// bak_cksm = udph.check;
__u32 *src_ip = saddr;
__u16 src_port = udph.source;
/// NOTICE: Actually, we do not need symmetrical headers in client and
@ -1441,20 +1501,20 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
// Decap header to get fullcone tuple.
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src,
sizeof(ori_src)))) {
sizeof(ori_src), !disable_checksum))) {
return TC_ACT_SHOT;
}
// Rewrite udp src ip
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, src_ip, ori_src.ip,
false))) {
false, !disable_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
// Rewrite udp src port
if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, src_port, ori_src.port,
false))) {
false, !disable_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
@ -1470,19 +1530,12 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
// }
}
__u8 *disable_l4_checksum =
bpf_map_lookup_elem(&param_map, &disable_l4_tx_checksum_key);
if (!disable_l4_checksum) {
bpf_printk("Forgot to set disable_l4_checksum?");
return TC_ACT_SHOT;
}
if (*disable_l4_checksum) {
if (disable_checksum) {
__u32 l4_cksm_off = l4_checksum_off(l4proto, ihl);
// Restore the checksum or set it zero.
if (*disable_l4_checksum == DisableL4ChecksumPolicy_SetZero) {
bak_cksm = 0;
}
// Set checksum zero to pass.
__sum16 bak_cksm = 0;
bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0);
bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET);
}
return TC_ACT_OK;
}
@ -1490,6 +1543,24 @@ int tproxy_lan_egress(struct __sk_buff *skb) {
__u8 special_mac_to_tproxy[6] = {2, 0, 2, 3, 0, 0};
__u8 special_mac_from_tproxy[6] = {2, 0, 2, 3, 0, 1};
static __always_inline bool wan_disable_checksum(const __u32 ifindex,
const __u8 ipversion) {
struct if_params *ifparams =
bpf_map_lookup_elem(&ifindex_params_map, &ifindex);
if (unlikely(!ifparams)) {
return -1;
}
bool tx_offloaded = (ipversion == 4 && ifparams->tx_l4_cksm_ip4_offload) ||
(ipversion == 6 && ifparams->tx_l4_cksm_ip6_offload);
// If tx offloaded, we get bad checksum of packets because we redirect packet
// before the NIC processing. So we have no choice but disable l4 checksum.
bool disable_l4_checksum = tx_offloaded;
return disable_l4_checksum;
}
// Routing and redirect the packet back.
// We cannot modify the dest address here. So we cooperate with wan_ingress.
SEC("tc/wan_egress")
@ -1572,13 +1643,6 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
// Redirect.
if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) {
bpf_printk("Shot bpf_redirect: %d", ret);
return TC_ACT_SHOT;
}
return TC_ACT_REDIRECT;
} else {
// Normal packets.
@ -1644,38 +1708,31 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
return TC_ACT_OK;
} else if (unlikely(outbound == OUTBOUND_BLOCK)) {
return TC_ACT_SHOT;
} else {
// Rewrite to control plane.
if (unlikely(tcp_state_syn)) {
struct ip_port_outbound value_dst;
__builtin_memset(&value_dst, 0, sizeof(value_dst));
__builtin_memcpy(value_dst.ip, daddr, IPV6_BYTE_LENGTH);
value_dst.port = tcph.dest;
value_dst.outbound = outbound;
// bpf_printk("UPDATE: %pI6:%u", key_src.ip, bpf_ntohs(key_src.port));
bpf_map_update_elem(&tcp_dst_map, &key_src, &value_dst, BPF_ANY);
}
// Write mac.
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source),
0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
// Redirect.
if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) {
bpf_printk("Shot bpf_redirect: %d", ret);
return TC_ACT_SHOT;
}
return TC_ACT_REDIRECT;
}
// Rewrite to control plane.
if (unlikely(tcp_state_syn)) {
struct ip_port_outbound value_dst;
__builtin_memset(&value_dst, 0, sizeof(value_dst));
__builtin_memcpy(value_dst.ip, daddr, IPV6_BYTE_LENGTH);
value_dst.port = tcph.dest;
value_dst.outbound = outbound;
// bpf_printk("UPDATE: %pI6:%u", key_src.ip, bpf_ntohs(key_src.port));
bpf_map_update_elem(&tcp_dst_map, &key_src, &value_dst, BPF_ANY);
}
// Write mac.
if ((ret =
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
} else if (l4proto == IPPROTO_UDP) {
// Backup for further use.
struct ip_port_outbound new_hdr;
@ -1719,38 +1776,49 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
return TC_ACT_OK;
} else if (unlikely(new_hdr.outbound == OUTBOUND_BLOCK)) {
return TC_ACT_SHOT;
} else {
// Rewrite to control plane.
}
// Write mac.
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source),
0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
// Rewrite to control plane.
// Encap a header to transmit fullcone tuple.
if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&new_hdr, sizeof(new_hdr)))) {
return TC_ACT_SHOT;
}
// Write mac.
if ((ret =
bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest),
ethh.h_source, sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
}
if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source),
special_mac_to_tproxy,
sizeof(ethh.h_source), 0))) {
return TC_ACT_SHOT;
};
// Redirect from egress to ingress.
if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) {
bpf_printk("Shot bpf_redirect: %d", ret);
return TC_ACT_SHOT;
}
return TC_ACT_REDIRECT;
bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion);
// Encap a header to transmit fullcone tuple.
if ((ret = encap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&new_hdr, sizeof(new_hdr),
// It is a part of ingress link.
!disable_l4_checksum))) {
return TC_ACT_SHOT;
}
}
}
return TC_ACT_OK;
// // Print packet in hex for debugging (checksum or something else).
// if ((l4proto == IPPROTO_TCP ? tcph.dest : udph.dest) == bpf_htons(8443)) {
// bpf_printk("PRINT OUTPUT PACKET");
// for (__u32 i = 0; i < skb->len && i < 500; i++) {
// __u8 t = 0;
// bpf_skb_load_bytes(skb, i, &t, 1);
// bpf_printk("%02x", t);
// }
// }
// Redirect from egress to ingress.
if ((ret = bpf_redirect(skb->ifindex, BPF_F_INGRESS)) == TC_ACT_SHOT) {
bpf_printk("Shot bpf_redirect: %d", ret);
return TC_ACT_SHOT;
}
return TC_ACT_REDIRECT;
}
SEC("tc/wan_ingress")
@ -1810,6 +1878,17 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
return TC_ACT_OK;
}
bool disable_l4_checksum = wan_disable_checksum(skb->ifindex, ipversion);
// // Print packet in hex for debugging (checksum or something else).
// if (dport == bpf_htons(8443)) {
// bpf_printk("PRINT BEFORE PACKET");
// for (__u32 i = 0; i < skb->len && i < 500; i++) {
// __u8 t = 0;
// bpf_skb_load_bytes(skb, i, &t, 1);
// bpf_printk("%02x", t);
// }
// }
if (tproxy_response) {
// Send the tproxy response packet to origin.
@ -1841,13 +1920,13 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
}
// Rewrite sip and sport.
if (rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, saddr, original_dst->ip,
false) < 0) {
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_TCP, ihl, saddr,
original_dst->ip, false, !disable_l4_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
if (rewrite_port(skb, IPPROTO_TCP, ihl, sport, original_dst->port,
false) < 0) {
if ((ret = rewrite_port(skb, IPPROTO_TCP, ihl, sport, original_dst->port,
false, !disable_l4_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
@ -1861,21 +1940,22 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// Get source ip/port from our packet header.
// Decap header to get fullcone tuple.
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&ori_src, sizeof(ori_src)))) {
if ((ret =
decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len, &ori_src,
sizeof(ori_src), !disable_l4_checksum))) {
return TC_ACT_SHOT;
}
// Rewrite udp src ip
if ((ret = rewrite_ip(skb, ipversion, IPPROTO_UDP, ihl, saddr, ori_src.ip,
false))) {
false, !disable_l4_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
// Rewrite udp src port
if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port,
false))) {
if ((ret = rewrite_port(skb, IPPROTO_UDP, ihl, sport, ori_src.port, false,
!disable_l4_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
@ -1891,7 +1971,8 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// }
}
// Rewrite dip to host ip.
if (rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true) < 0) {
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, saddr, true,
!disable_l4_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
@ -1909,29 +1990,45 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// bpf_printk("should send to: %pI6:%u", tproxy_ip,
// bpf_ntohs(*tproxy_port));
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip,
true))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
// (1) Use daddr as saddr to pass NIC verification. Notice that we do not
// modify the <sport> so tproxy will send packet to it.
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false))) {
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, daddr, tproxy_ip, true,
!disable_l4_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
// Rewrite udp dst port.
if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true))) {
// Rewrite dst port.
if ((ret = rewrite_port(skb, l4proto, ihl, dport, *tproxy_port, true,
!disable_l4_checksum))) {
bpf_printk("Shot Port: %d", ret);
return TC_ACT_SHOT;
}
// (1) Use daddr as saddr to pass NIC verification. Notice that we do not
// modify the <sport> so tproxy will send packet to it.
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, saddr, daddr, false,
!disable_l4_checksum))) {
bpf_printk("Shot IP: %d", ret);
return TC_ACT_SHOT;
}
}
// // Print packet in hex for debugging (checksum or something else).
// if (dport == bpf_htons(8443)) {
// bpf_printk("PRINT AFTER PACKET");
// for (__u32 i = 0; i < skb->len && i < 500; i++) {
// __u8 t = 0;
// bpf_skb_load_bytes(skb, i, &t, 1);
// bpf_printk("%02x", t);
// }
// }
if (disable_l4_checksum) {
__u32 l4_cksm_off = l4_checksum_off(l4proto, ihl);
// Set checksum zero.
__sum16 bak_cksm = 0;
bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0);
bpf_csum_level(skb, BPF_CSUM_LEVEL_RESET);
}
__u32 l4_cksm_off = l4_checksum_off(l4proto, ihl);
// Restore the checksum or set it zero.
__sum16 bak_cksm = 0;
bpf_skb_store_bytes(skb, l4_cksm_off, &bak_cksm, sizeof(bak_cksm), 0);
return TC_ACT_OK;
}

View File

@ -27,7 +27,7 @@ type RoutingMatcherBuilder struct {
*routing.DefaultMatcherBuilder
outboundName2Id map[string]uint8
bpf *bpfObjects
rules []_bpfMatchSet
rules []bpfMatchSet
SimulatedLpmTries [][]netip.Prefix
SimulatedDomainSet []DomainSet
Final string
@ -74,7 +74,7 @@ func (b *RoutingMatcherBuilder) AddDomain(f *config_parser.Function, key string,
RuleIndex: len(b.rules),
Domains: values,
})
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Type: uint8(consts.MatchType_DomainSet),
Not: f.Not,
Outbound: b.OutboundToId(outbound),
@ -94,7 +94,7 @@ func (b *RoutingMatcherBuilder) AddSourceMac(f *config_parser.Function, macAddrs
}
lpmTrieIndex := len(b.SimulatedLpmTries)
b.SimulatedLpmTries = append(b.SimulatedLpmTries, values)
set := _bpfMatchSet{
set := bpfMatchSet{
Value: [16]byte{},
Type: uint8(consts.MatchType_Mac),
Not: f.Not,
@ -111,7 +111,7 @@ func (b *RoutingMatcherBuilder) AddIp(f *config_parser.Function, values []netip.
}
lpmTrieIndex := len(b.SimulatedLpmTries)
b.SimulatedLpmTries = append(b.SimulatedLpmTries, values)
set := _bpfMatchSet{
set := bpfMatchSet{
Value: [16]byte{},
Type: uint8(consts.MatchType_IpSet),
Not: f.Not,
@ -127,7 +127,7 @@ func (b *RoutingMatcherBuilder) AddPort(f *config_parser.Function, values [][2]u
if i == len(values)-1 {
outbound = _outbound
}
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Type: uint8(consts.MatchType_Port),
Value: _bpfPortRange{
PortStart: value[0],
@ -145,7 +145,7 @@ func (b *RoutingMatcherBuilder) AddSourceIp(f *config_parser.Function, values []
}
lpmTrieIndex := len(b.SimulatedLpmTries)
b.SimulatedLpmTries = append(b.SimulatedLpmTries, values)
set := _bpfMatchSet{
set := bpfMatchSet{
Value: [16]byte{},
Type: uint8(consts.MatchType_SourceIpSet),
Not: f.Not,
@ -161,7 +161,7 @@ func (b *RoutingMatcherBuilder) AddSourcePort(f *config_parser.Function, values
if i == len(values)-1 {
outbound = _outbound
}
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Type: uint8(consts.MatchType_SourcePort),
Value: _bpfPortRange{
PortStart: value[0],
@ -177,7 +177,7 @@ func (b *RoutingMatcherBuilder) AddL4Proto(f *config_parser.Function, values con
if b.err != nil {
return
}
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Value: [16]byte{byte(values)},
Type: uint8(consts.MatchType_L4Proto),
Not: f.Not,
@ -189,7 +189,7 @@ func (b *RoutingMatcherBuilder) AddIpVersion(f *config_parser.Function, values c
if b.err != nil {
return
}
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Value: [16]byte{byte(values)},
Type: uint8(consts.MatchType_IpVersion),
Not: f.Not,
@ -203,7 +203,7 @@ func (b *RoutingMatcherBuilder) AddProcessName(f *config_parser.Function, values
if i == len(values)-1 {
outbound = _outbound
}
matchSet := _bpfMatchSet{
matchSet := bpfMatchSet{
Type: uint8(consts.MatchType_ProcessName),
Not: f.Not,
Outbound: b.OutboundToId(outbound),
@ -218,7 +218,7 @@ func (b *RoutingMatcherBuilder) AddFinal(outbound string) {
return
}
b.Final = outbound
b.rules = append(b.rules, _bpfMatchSet{
b.rules = append(b.rules, bpfMatchSet{
Type: uint8(consts.MatchType_Final),
Outbound: b.OutboundToId(outbound),
})

1
go.mod
View File

@ -32,6 +32,7 @@ require (
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mzz2017/disk-bloom v1.0.1 // indirect
github.com/safchain/ethtool v0.2.0 // indirect
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.1 // indirect

3
go.sum
View File

@ -55,6 +55,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/safchain/ethtool v0.2.0 h1:dILxMBqDnQfX192cCAPjZr9v2IgVXeElHPy435Z/IdE=
github.com/safchain/ethtool v0.2.0/go.mod h1:WkKB1DnNtvsMlDmQ50sgwowDJV/hGbJSOvJoEXs1AJQ=
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb h1:XfLJSPIOUX+osiMraVgIrMR27uMXnRJWGm1+GL8/63U=
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb/go.mod h1:bR6DqgcAl1zTcOX8/pE2Qkj9XO00eCNqmKb7lXP8EAg=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
@ -106,6 +108,7 @@ golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200217220822-9197077df867/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201202213521-69691e467435/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=