optimize(bpf): Load separate programs for L2/L3 netdev (#822)

This commit is contained in:
graymon
2025-06-22 16:08:12 +08:00
committed by GitHub
parent 062d6b39e0
commit 0eb84160a2
4 changed files with 94 additions and 116 deletions

View File

@ -167,8 +167,6 @@ const (
LoopbackIfIndex = 1
)
type LanWanFlag uint8
const (
LinkHdrLen_None uint32 = 0
LinkHdrLen_Ethernet uint32 = 14

View File

@ -139,10 +139,10 @@ func getIfParamsFromLink(link netlink.Link) (ifParams bpfIfParams, err error) {
return ifParams, nil
}
func (c *controlPlaneCore) mapLinkType(ifname string) error {
func (c *controlPlaneCore) linkHdrLen(ifname string) (uint32, error) {
link, err := netlink.LinkByName(ifname)
if err != nil {
return err
return 0, err
}
var linkHdrLen uint32
switch link.Attrs().EncapType {
@ -151,9 +151,10 @@ func (c *controlPlaneCore) mapLinkType(ifname string) error {
case "ether":
linkHdrLen = consts.LinkHdrLen_Ethernet
default:
return nil
c.log.Warnf("Maybe unsupported link type %v, using default link header length", link.Attrs().EncapType)
linkHdrLen = consts.LinkHdrLen_Ethernet
}
return c.bpf.bpfMaps.LinklenMap.Update(uint32(link.Attrs().Index), linkHdrLen, ebpf.UpdateAny)
return linkHdrLen, nil
}
func (c *controlPlaneCore) addQdisc(ifname string) error {
@ -253,7 +254,10 @@ func (c *controlPlaneCore) _bindLan(ifname string) error {
return err
}
_ = c.addQdisc(ifname)
_ = c.mapLinkType(ifname)
linkHdrLen, err := c.linkHdrLen(ifname)
if err != nil {
return err
}
/// Insert an elem into IfindexParamsMap.
ifParams, err := getIfParamsFromLink(link)
if err != nil {
@ -273,10 +277,16 @@ func (c *controlPlaneCore) _bindLan(ifname string) error {
// Priority should be behind of WAN's
Priority: 2,
},
Fd: c.bpf.bpfPrograms.TproxyLanIngress.FD(),
Name: consts.AppName + "_lan_ingress",
DirectAction: true,
}
if linkHdrLen > 0 {
filterIngress.Fd = c.bpf.bpfPrograms.TproxyLanIngressL2.FD()
filterIngress.Name = filterIngress.Name + "_l2"
} else {
filterIngress.Fd = c.bpf.bpfPrograms.TproxyLanIngressL3.FD()
filterIngress.Name = filterIngress.Name + "_l3"
}
// Remove and add.
_ = netlink.FilterDel(filterIngress)
if !c.isReload {
@ -304,10 +314,16 @@ func (c *controlPlaneCore) _bindLan(ifname string) error {
// Priority should be front of WAN's
Priority: 1,
},
Fd: c.bpf.bpfPrograms.TproxyLanEgress.FD(),
Name: consts.AppName + "_lan_egress",
DirectAction: true,
}
if linkHdrLen > 0 {
filterEgress.Fd = c.bpf.bpfPrograms.TproxyLanEgressL2.FD()
filterEgress.Name = filterEgress.Name + "_l2"
} else {
filterEgress.Fd = c.bpf.bpfPrograms.TproxyLanEgressL3.FD()
filterEgress.Name = filterEgress.Name + "_l3"
}
// Remove and add.
_ = netlink.FilterDel(filterEgress)
if !c.isReload {
@ -444,7 +460,10 @@ func (c *controlPlaneCore) _bindWan(ifname string) error {
return fmt.Errorf("cannot bind to loopback interface")
}
_ = c.addQdisc(ifname)
_ = c.mapLinkType(ifname)
linkHdrLen, err := c.linkHdrLen(ifname)
if err != nil {
return err
}
/// Insert an elem into IfindexParamsMap.
ifParams, err := getIfParamsFromLink(link)
@ -465,10 +484,16 @@ func (c *controlPlaneCore) _bindWan(ifname string) error {
Protocol: unix.ETH_P_ALL,
Priority: 2,
},
Fd: c.bpf.bpfPrograms.TproxyWanEgress.FD(),
Name: consts.AppName + "_wan_egress",
DirectAction: true,
}
if linkHdrLen > 0 {
filterEgress.Fd = c.bpf.bpfPrograms.TproxyWanEgressL2.FD()
filterEgress.Name = filterEgress.Name + "_l2"
} else {
filterEgress.Fd = c.bpf.bpfPrograms.TproxyWanEgressL3.FD()
filterEgress.Name = filterEgress.Name + "_l3"
}
_ = netlink.FilterDel(filterEgress)
// Remove and add.
if !c.isReload {
@ -495,10 +520,16 @@ func (c *controlPlaneCore) _bindWan(ifname string) error {
Protocol: unix.ETH_P_ALL,
Priority: 1,
},
Fd: c.bpf.bpfPrograms.TproxyWanIngress.FD(),
Name: consts.AppName + "_wan_ingress",
DirectAction: true,
}
if linkHdrLen > 0 {
filterIngress.Fd = c.bpf.bpfPrograms.TproxyWanIngressL2.FD()
filterIngress.Name = filterIngress.Name + "_l2"
} else {
filterIngress.Fd = c.bpf.bpfPrograms.TproxyWanIngressL3.FD()
filterIngress.Name = filterIngress.Name + "_l3"
}
_ = netlink.FilterDel(filterIngress)
// Remove and add.
if !c.isReload {

View File

@ -17,7 +17,7 @@ struct {
__array(values, int());
} entry_call_map SEC(".maps") = {
.values = {
[0] = &tproxy_wan_egress,
[0] = &tproxy_wan_egress_l2,
},
};
@ -30,9 +30,6 @@ int testpktgen_dport_match(struct __sk_buff *skb)
SEC("tc/setup/dport_match")
int testsetup_dport_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dport(80) -> proxy */
struct match_set ms = {};
struct port_range pr = {80, 80};
@ -69,9 +66,6 @@ int testpktgen_dport_mismatch(struct __sk_buff *skb)
SEC("tc/setup/dport_mismatch")
int testsetup_dport_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dport(80) -> proxy */
struct match_set ms = {};
struct port_range pr = {80, 80};
@ -108,9 +102,6 @@ int testpktgen_ipset_match(struct __sk_buff *skb)
SEC("tc/setup/ipset_match")
int testsetup_ipset_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dip(224.1.0.0/16) -> direct */
struct match_set ms = {};
ms.not = false;
@ -153,9 +144,6 @@ int testpktgen_ipset_mismatch(struct __sk_buff *skb)
SEC("tc/setup/ipset_mismatch")
int testsetup_ipset_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
// dip(224.1.0.0/16) -> direct
struct match_set ms = {};
ms.not = false;
@ -198,9 +186,6 @@ int testpktgen_source_ipset_match(struct __sk_buff *skb)
SEC("tc/setup/source_ipset_match")
int testsetup_source_ipset_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* sip(192.168.50.0/24) -> direct */
struct match_set ms = {};
ms.not = false;
@ -243,9 +228,6 @@ int testpktgen_source_ipset_mismatch(struct __sk_buff *skb)
SEC("tc/setup/source_ipset_mismatch")
int testsetup_source_ipset_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* sip(192.168.50.0/24) -> direct */
struct match_set ms = {};
ms.not = false;
@ -288,9 +270,6 @@ int testpktgen_sport_match(struct __sk_buff *skb)
SEC("tc/setup/sport_match")
int testsetup_sport_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* sport(19000-20000) -> proxy */
struct match_set ms = {};
struct port_range pr = {19000, 20000};
@ -327,9 +306,6 @@ int testpktgen_sport_mismatch(struct __sk_buff *skb)
SEC("tc/setup/sport_mismatch")
int testsetup_sport_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* sport(19230-19232) -> proxy */
struct match_set ms = {};
struct port_range pr = {19230, 19232};
@ -366,9 +342,6 @@ int testpktgen_l4proto_match(struct __sk_buff *skb)
SEC("tc/setup/l4proto_match")
int testsetup_l4proto_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* l4proto(tcp) -> proxy */
struct match_set ms = {};
ms.l4proto_type = L4ProtoType_TCP;
@ -404,9 +377,6 @@ int testpktgen_l4proto_mismatch(struct __sk_buff *skb)
SEC("tc/setup/l4proto_mismatch")
int testsetup_l4proto_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* l4proto(udp) -> proxy */
struct match_set ms = {};
ms.l4proto_type = L4ProtoType_UDP;
@ -442,9 +412,6 @@ int testpktgen_ipversion_match(struct __sk_buff *skb)
SEC("tc/setup/ipversion_match")
int testsetup_ipversion_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* ipversion(4) -> proxy */
struct match_set ms = {};
ms.ip_version = IpVersionType_4;
@ -480,9 +447,6 @@ int testpktgen_ipversion_mismatch(struct __sk_buff *skb)
SEC("tc/setup/ipversion_mismatch")
int testsetup_ipversion_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* ipversion(6) -> proxy */
struct match_set ms = {};
ms.ip_version = IpVersionType_6;
@ -518,9 +482,6 @@ int testpktgen_mac_match(struct __sk_buff *skb)
SEC("tc/setup/mac_match")
int testsetup_mac_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* mac('06:07:08:09:0a:0b') -> proxy */
struct match_set ms = {};
ms.not = false;
@ -580,9 +541,6 @@ int testpktgen_mac_mismatch(struct __sk_buff *skb)
SEC("tc/setup/mac_mismatch")
int testsetup_mac_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* mac('00:01:02:03:04:05') -> proxy */
struct match_set ms = {};
ms.not = false;
@ -630,9 +588,6 @@ int testpktgen_dscp_match(struct __sk_buff *skb)
SEC("tc/setup/dscp_match")
int testsetup_dscp_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dscp(4) -> proxy */
struct match_set ms = {};
ms.dscp = 4;
@ -668,9 +623,6 @@ int testpktgen_dscp_mismatch(struct __sk_buff *skb)
SEC("tc/setup/dscp_mismatch")
int testsetup_dscp_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dscp(5) -> proxy */
struct match_set ms = {};
ms.dscp = 5;
@ -706,9 +658,6 @@ int testpktgen_and_match_1(struct __sk_buff *skb)
SEC("tc/setup/and_match_1")
int testsetup_and_match_1(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dip(1.1.0.0/16) && l4proto(tcp) && dport(1-1023, 8443) -> proxy */
struct match_set ms = {};
ms.not = false;
@ -786,9 +735,6 @@ int testpktgen_and_match_2(struct __sk_buff *skb)
SEC("tc/setup/and_match_2")
int testsetup_and_match_2(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dip(1.1.0.0/16) && l4proto(tcp) && dport(1-1023, 8443) -> proxy */
struct match_set ms = {};
ms.not = false;
@ -866,9 +812,6 @@ int testpktgen_and_mismatch(struct __sk_buff *skb)
SEC("tc/setup/and_mismatch")
int testsetup_and_mismatch(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* dip(1.1.0.0/16) && l4proto(tcp) && dport(1-1023, 8443) -> proxy */
struct match_set ms = {};
ms.not = false;
@ -946,9 +889,6 @@ int testpktgen_not_match(struct __sk_buff *skb)
SEC("tc/setup/not_match")
int testsetup_not_match(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* !dport(80) -> proxy */
struct match_set ms = {};
struct port_range pr = {80, 80};
@ -985,9 +925,6 @@ int testpktgen_not_mismtach(struct __sk_buff *skb)
SEC("tc/setup/not_mismtach")
int testsetup_not_mismtach(struct __sk_buff *skb)
{
__u32 linklen = ETH_HLEN;
bpf_map_update_elem(&linklen_map, &one_key, &linklen, BPF_ANY);
/* !dport(80) -> proxy */
struct match_set ms = {};
struct port_range pr = {80, 80};

View File

@ -188,15 +188,6 @@ struct {
__uint(max_entries, 65535);
} fast_sock SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, __u32); // ifindex
__type(value, __u32); // link length
__uint(max_entries, MAX_INTERFACE_NUM);
/// NOTICE: No persistence.
// __uint(pinning, LIBBPF_PIN_BY_NAME);
} linklen_map SEC(".maps");
// Array of LPM tries:
struct lpm_key {
struct bpf_lpm_trie_key trie_key;
@ -896,17 +887,6 @@ static __always_inline __s64 route(const struct route_params *params)
#undef _dscp
}
static __always_inline __u32 get_link_h_len(__u32 ifindex,
volatile __u32 *link_h_len)
{
__u32 *plink_h_len = bpf_map_lookup_elem(&linklen_map, &ifindex);
if (!plink_h_len)
return -EIO;
*link_h_len = *plink_h_len;
return 0;
}
static __always_inline int assign_listener(struct __sk_buff *skb, __u8 l4proto)
{
struct bpf_sock *sk;
@ -1025,8 +1005,7 @@ retn:
return value;
}
SEC("tc/egress")
int tproxy_lan_egress(struct __sk_buff *skb)
static __always_inline int do_tproxy_lan_egress(struct __sk_buff *skb, u32 link_h_len)
{
struct ethhdr ethh;
struct iphdr iph;
@ -1036,10 +1015,7 @@ int tproxy_lan_egress(struct __sk_buff *skb)
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;
if (get_link_h_len(skb->ifindex, &link_h_len))
return TC_ACT_OK;
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
if (ret) {
@ -1068,8 +1044,19 @@ int tproxy_lan_egress(struct __sk_buff *skb)
return TC_ACT_PIPE;
}
SEC("tc/ingress")
int tproxy_lan_ingress(struct __sk_buff *skb)
SEC("tc/lan_egress_l2")
int tproxy_lan_egress_l2(struct __sk_buff *skb)
{
return do_tproxy_lan_egress(skb, 14);
}
SEC("tc/lan_egress_l3")
int tproxy_lan_egress_l3(struct __sk_buff *skb)
{
return do_tproxy_lan_egress(skb, 0);
}
static __always_inline int do_tproxy_lan_ingress(struct __sk_buff *skb, u32 link_h_len)
{
struct ethhdr ethh;
struct iphdr iph;
@ -1079,10 +1066,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb)
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;
if (get_link_h_len(skb->ifindex, &link_h_len))
return TC_ACT_OK;
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
if (ret) {
@ -1268,6 +1252,18 @@ block:
return TC_ACT_SHOT;
}
SEC("tc/lan_ingress_l2")
int tproxy_lan_ingress_l2(struct __sk_buff *skb)
{
return do_tproxy_lan_ingress(skb, 14);
}
SEC("tc/lan_ingress_l3")
int tproxy_lan_ingress_l3(struct __sk_buff *skb)
{
return do_tproxy_lan_ingress(skb, 0);
}
// Cookie will change after the first packet, so we just use it for
// handshake.
static __always_inline bool pid_is_control_plane(struct __sk_buff *skb,
@ -1319,8 +1315,7 @@ static __always_inline bool pid_is_control_plane(struct __sk_buff *skb,
return false;
}
SEC("tc/wan_ingress")
int tproxy_wan_ingress(struct __sk_buff *skb)
static __always_inline int do_tproxy_wan_ingress(struct __sk_buff *skb, u32 link_h_len)
{
struct ethhdr ethh;
struct iphdr iph;
@ -1330,10 +1325,7 @@ int tproxy_wan_ingress(struct __sk_buff *skb)
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;
if (get_link_h_len(skb->ifindex, &link_h_len))
return TC_ACT_OK;
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
if (ret) {
@ -1356,10 +1348,21 @@ int tproxy_wan_ingress(struct __sk_buff *skb)
return TC_ACT_PIPE;
}
SEC("tc/wan_ingress_l2")
int tproxy_wan_ingress_l2(struct __sk_buff *skb)
{
return do_tproxy_wan_ingress(skb, 14);
}
SEC("tc/wan_ingress_l3")
int tproxy_wan_ingress_l3(struct __sk_buff *skb)
{
return do_tproxy_wan_ingress(skb, 0);
}
// Routing and redirect the packet back.
// We cannot modify the dest address here. So we cooperate with wan_ingress.
SEC("tc/wan_egress")
int tproxy_wan_egress(struct __sk_buff *skb)
static __always_inline int do_tproxy_wan_egress(struct __sk_buff *skb, u32 link_h_len)
{
// Skip packets not from localhost.
if (skb->ingress_ifindex != NOWHERE_IFINDEX)
@ -1376,10 +1379,7 @@ int tproxy_wan_egress(struct __sk_buff *skb)
struct udphdr udph;
__u8 ihl;
__u8 l4proto;
__u32 link_h_len;
if (get_link_h_len(skb->ifindex, &link_h_len))
return TC_ACT_PIPE;
bool tcp_state_syn;
int ret = parse_transport(skb, link_h_len, &ethh, &iph, &ipv6h, &icmp6h,
&tcph, &udph, &ihl, &l4proto);
@ -1638,6 +1638,18 @@ int tproxy_wan_egress(struct __sk_buff *skb)
return bpf_redirect(PARAM.dae0_ifindex, 0);
}
SEC("tc/wan_egress_l2")
int tproxy_wan_egress_l2(struct __sk_buff *skb)
{
return do_tproxy_wan_egress(skb, 14);
}
SEC("tc/wan_egress_l3")
int tproxy_wan_egress_l3(struct __sk_buff *skb)
{
return do_tproxy_wan_egress(skb, 0);
}
SEC("tc/dae0peer_ingress")
int tproxy_dae0peer_ingress(struct __sk_buff *skb)
{