feat: fix port collision problem

This commit is contained in:
mzz2017 2023-02-12 21:50:15 +08:00
parent 7656a6ecaf
commit 919fcc32d9
7 changed files with 189 additions and 65 deletions

View File

@ -120,3 +120,8 @@ const (
TproxyMark uint32 = 0x8000000
LoopbackIfIndex = 1
)
const (
IsWan = 0
IsLan = 1
)

View File

@ -78,7 +78,7 @@ func BindControl(c syscall.RawConn, lAddrPort netip.AddrPort) error {
sockOptErr = fmt.Errorf("error setting IP_TRANSPARENT socket option: %w", err)
}
if err := bindAddr(fd, lAddrPort); err != nil {
sockOptErr = fmt.Errorf("error bindAddr: %w", err)
sockOptErr = fmt.Errorf("error bindAddr %v: %w", lAddrPort.String(), err)
}
})
if controlErr != nil {

View File

@ -291,17 +291,11 @@ func NewControlPlane(
return nil, fmt.Errorf("RoutingMatcherBuilder.Build: %w", err)
}
/// Listen address.
listenIp := "::1"
if len(global.WanInterface) > 0 {
listenIp = "0.0.0.0"
}
c = &ControlPlane{
log: log,
core: core,
deferFuncs: nil,
listenIp: listenIp,
listenIp: "0.0.0.0",
outbounds: outbounds,
outboundName2Id: outboundName2Id,
SimulatedLpmTries: builder.SimulatedLpmTries,
@ -486,9 +480,10 @@ func (c *ControlPlane) ListenAndServe(port uint16) (err error) {
}
break
}
dst := RetrieveOriginalDest(oob[:oobn])
pktDst := RetrieveOriginalDest(oob[:oobn])
var newBuf []byte
outboundIndex, err := c.core.RetrieveOutboundIndex(src, dst, unix.IPPROTO_UDP)
var realDst netip.AddrPort
outboundIndex, err := c.core.RetrieveOutboundIndex(src, pktDst, unix.IPPROTO_UDP)
if err != nil {
// WAN. Old method.
addrHdr, dataOffset, err := ParseAddrHdr(buf[:n])
@ -499,18 +494,19 @@ func (c *ControlPlane) ListenAndServe(port uint16) (err error) {
newBuf = pool.Get(n - dataOffset)
copy(newBuf, buf[dataOffset:n])
outboundIndex = consts.OutboundIndex(addrHdr.Outbound)
src = netip.AddrPortFrom(dst.Addr(), src.Port())
dst = addrHdr.Dest
src = netip.AddrPortFrom(addrHdr.Dest.Addr(), src.Port())
realDst = addrHdr.Dest
} else {
newBuf = pool.Get(n)
copy(newBuf, buf[:n])
realDst = pktDst
}
go func(data []byte, src, dst netip.AddrPort, outboundIndex consts.OutboundIndex) {
if e := c.handlePkt(newBuf, src, dst, outboundIndex); e != nil {
go func(data []byte, src, pktDst, realDst netip.AddrPort, outboundIndex consts.OutboundIndex) {
if e := c.handlePkt(udpConn, newBuf, src, pktDst, realDst, outboundIndex); e != nil {
c.log.Warnln("handlePkt:", e)
}
pool.Put(newBuf)
}(newBuf, src, dst, outboundIndex)
}(newBuf, src, pktDst, realDst, outboundIndex)
}
}()
<-ctx.Done()

View File

@ -273,6 +273,32 @@ func (c *ControlPlaneCore) bindLan(ifname string) error {
}
return nil
})
// Insert filters.
filterEgress := &netlink.BpfFilter{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0x2023, 1),
Protocol: unix.ETH_P_ALL,
// Priority should be front of WAN's
Priority: 1,
},
Fd: c.bpf.bpfPrograms.TproxyLanEgress.FD(),
Name: consts.AppName + "_lan_egress",
DirectAction: true,
}
// Remove and add.
_ = netlink.FilterDel(filterEgress)
if err := netlink.FilterAdd(filterEgress); err != nil {
return fmt.Errorf("cannot attach ebpf object to filter egress: %w", err)
}
c.deferFuncs = append(c.deferFuncs, func() error {
if err := netlink.FilterDel(filterEgress); err != nil {
return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterEgress.Name, err)
}
return nil
})
return nil
}
@ -343,9 +369,9 @@ func (c *ControlPlaneCore) bindWan(ifname string) error {
FilterAttrs: netlink.FilterAttrs{
LinkIndex: link.Attrs().Index,
Parent: netlink.HANDLE_MIN_EGRESS,
Handle: netlink.MakeHandle(0x2023, 1),
Handle: netlink.MakeHandle(0x2023, 2),
Protocol: unix.ETH_P_ALL,
Priority: 1,
Priority: 2,
},
Fd: c.bpf.bpfPrograms.TproxyWanEgress.FD(),
Name: consts.AppName + "_wan_egress",

View File

@ -61,6 +61,9 @@
#define OUTBOUND_LOGICAL_AND 0xFF
#define OUTBOUND_LOGICAL_MASK 0xFE
#define IS_WAN 0
#define IS_LAN 1
#define TPROXY_MARK 0x8000000
#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
@ -820,7 +823,8 @@ static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb,
static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb,
__u8 ipversion, __u8 ihl,
__be16 ipv4hdr_tot_len, void *to,
__u32 decap_hdrlen) {
__u32 decap_hdrlen,
bool (*prevent_pop)(void *to)) {
if (unlikely(decap_hdrlen % 4 != 0)) {
bpf_printk("encap_after_udp_hdr: unexpected decap_hdrlen value %u :must "
"be a multiple of 4",
@ -861,35 +865,37 @@ static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb,
return ret;
}
// Adjust room to decap the header.
if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET,
BPF_F_ADJ_ROOM_NO_CSUM_RESET))) {
bpf_printk("UDP ADJUST ROOM(decap): %d", ret);
return ret;
}
// Rewrite ip len.
if (ipversion == 4) {
if ((ret = adjust_ipv4_len(skb, ipv4hdr_tot_len, -decap_hdrlen))) {
bpf_printk("adjust_ip_len: %d", ret);
if (prevent_pop == NULL || !prevent_pop(to)) {
// Adjust room to decap the header.
if ((ret = bpf_skb_adjust_room(skb, -decap_hdrlen, BPF_ADJ_ROOM_NET,
BPF_F_ADJ_ROOM_NO_CSUM_RESET))) {
bpf_printk("UDP ADJUST ROOM(decap): %d", ret);
return ret;
}
}
// Rewrite udp len.
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen))) {
bpf_printk("adjust_udp_len: %d", ret);
return ret;
}
// Rewrite ip len.
if (ipversion == 4) {
if ((ret = adjust_ipv4_len(skb, ipv4hdr_tot_len, -decap_hdrlen))) {
bpf_printk("adjust_ip_len: %d", ret);
return ret;
}
}
// Rewrite udp checksum.
// Rewrite udp len.
if ((ret = adjust_udp_len(skb, reserved_udphdr.len, ihl, -decap_hdrlen))) {
bpf_printk("adjust_udp_len: %d", ret);
return ret;
}
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0);
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
// Rewrite udp checksum.
__u32 udp_csum_off = l4_checksum_off(IPPROTO_UDP, ihl);
__s64 cksm = bpf_csum_diff(to, decap_hdrlen, 0, 0, 0);
if ((ret = bpf_l4_csum_replace(skb, udp_csum_off, 0, cksm,
BPF_F_MARK_MANGLED_0))) {
bpf_printk("bpf_l4_csum_replace 2: %d", ret);
return ret;
}
}
return 0;
}
@ -1139,6 +1145,69 @@ routing(const __u32 flag[6], const void *l4hdr, const __be32 saddr[4],
#undef _is_wan
}
static bool __always_inline is_not_to_lan(void *_ori_src) {
struct ip_port_outbound *ori_src = _ori_src;
return ori_src->outbound == IS_WAN;
}
// SNAT for UDP packet.
SEC("tc/egress")
int tproxy_lan_egress(struct __sk_buff *skb) {
if (skb->ingress_ifindex != NOWHERE_IFINDEX) {
return TC_ACT_PIPE;
}
struct ethhdr ethh;
struct iphdr iph;
struct ipv6hdr ipv6h;
struct tcphdr tcph;
struct udphdr udph;
__u8 ihl;
__u8 ipversion;
__u8 l4proto;
int ret = parse_transport(skb, &ethh, &iph, &ipv6h, &tcph, &udph, &ihl,
&ipversion, &l4proto);
if (ret) {
bpf_printk("parse_transport: %d", ret);
return TC_ACT_OK;
}
if (l4proto != IPPROTO_UDP) {
return TC_ACT_PIPE;
}
__be16 *tproxy_port = bpf_map_lookup_elem(&param_map, &tproxy_port_key);
if (!tproxy_port) {
return TC_ACT_PIPE;
}
struct tuples tuples;
get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, ipversion, l4proto);
if (*tproxy_port != tuples.src.port) {
return TC_ACT_PIPE;
}
bpf_printk("SAME");
struct ip_port_outbound ori_src;
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl,
ipversion == 4 ? iph.tot_len : 0, &ori_src,
sizeof(ori_src), is_not_to_lan))) {
return TC_ACT_SHOT;
}
if (is_not_to_lan(&ori_src)) {
return TC_ACT_PIPE;
}
if ((ret = rewrite_ip(skb, ipversion, l4proto, ihl, tuples.src.ip, ori_src.ip,
false))) {
return TC_ACT_SHOT;
}
if ((ret = rewrite_port(skb, l4proto, ihl, tuples.src.port, ori_src.port,
false))) {
return TC_ACT_SHOT;
}
// bpf_printk("from %pI6 to %pI6", tuples.src.ip, ori_src.ip);
// bpf_printk("from %u to %u", bpf_ntohs(tuples.src.port),
// bpf_ntohs(ori_src.port));
return TC_ACT_OK;
}
SEC("tc/ingress")
int tproxy_lan_ingress(struct __sk_buff *skb) {
struct ethhdr ethh;
@ -1208,7 +1277,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) {
bpf_sk_release(sk);
}
} else {
// UDP.
// UDP. Accept local listening.
sk = bpf_sk_lookup_udp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0);
if (sk) {
@ -1529,8 +1598,10 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
#if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT)
// Print only new connection.
bpf_printk("tcp(wan): from %pI6:%u", tuples.src.ip,
bpf_ntohs(tuples.src.port));
bpf_printk("tcp(wan): outbound: %u, %pI6:%u", outbound, tuples.dst.ip,
bpf_ntohs(key_src.port));
bpf_ntohs(tuples.dst.port));
#endif
} else {
// bpf_printk("[%X]Old Connection", bpf_ntohl(tcph.seq));
@ -1621,8 +1692,10 @@ int tproxy_wan_egress(struct __sk_buff *skb) {
}
new_hdr.outbound = ret;
#if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT)
bpf_printk("udp(wan): from %pI6:%u", tuples.src.ip,
bpf_ntohs(tuples.src.port));
bpf_printk("udp(wan): outbound: %u, %pI6:%u", new_hdr.outbound,
tuples.dst.ip, bpf_ntohs(new_hdr.port));
tuples.dst.ip, bpf_ntohs(tuples.dst.port));
#endif
if (new_hdr.outbound == OUTBOUND_DIRECT) {
@ -1802,7 +1875,7 @@ int tproxy_wan_ingress(struct __sk_buff *skb) {
// Decap header to get fullcone tuple.
if ((ret = decap_after_udp_hdr(skb, ipversion, ihl, ipv4_tot_len,
&ori_src, sizeof(ori_src)))) {
&ori_src, sizeof(ori_src), NULL))) {
return TC_ACT_SHOT;
}

View File

@ -27,6 +27,11 @@ const (
DnsNatTimeout = 17 * time.Second // RFC 5452
)
var (
UnspecifiedAddr4 = netip.AddrFrom4([4]byte{})
UnspecifiedAddr6 = netip.AddrFrom16([16]byte{})
)
func ChooseNatTimeout(data []byte) (dmsg *dnsmessage.Message, timeout time.Duration) {
var dnsmsg dnsmessage.Message
if err := dnsmsg.Unpack(data); err == nil {
@ -66,10 +71,10 @@ func (hdr *AddrHdr) ToBytesFromPool() []byte {
return buf
}
func sendPktWithHdr(data []byte, from netip.AddrPort, lConn *net.UDPConn, to netip.AddrPort) error {
func sendPktWithHdrWithFlag(data []byte, from netip.AddrPort, lConn *net.UDPConn, to netip.AddrPort, lanWanFlag uint8) error {
hdr := AddrHdr{
Dest: from,
Outbound: 0, // Do not care.
Outbound: lanWanFlag, // Pass some message to the kernel program.
}
bHdr := hdr.ToBytesFromPool()
defer pool.Put(bHdr)
@ -82,20 +87,27 @@ func sendPktWithHdr(data []byte, from netip.AddrPort, lConn *net.UDPConn, to net
return err
}
func sendPktBind(data []byte, from netip.AddrPort, to netip.AddrPort) error {
// sendPkt uses bind first, and fallback to send hdr if addr is in use.
func sendPkt(data []byte, from netip.AddrPort, realTo, to netip.AddrPort, lConn *net.UDPConn, lanWanFlag uint8) (err error) {
d := net.Dialer{Control: func(network, address string, c syscall.RawConn) error {
return dialer.BindControl(c, from)
}}
conn, err := d.Dial("udp", to.String())
var conn net.Conn
conn, err = d.Dial("udp", realTo.String())
if err != nil {
if errors.Is(err, syscall.EADDRINUSE) {
// Port collision, use traditional method.
return sendPktWithHdrWithFlag(data, from, lConn, to, lanWanFlag)
}
return err
}
defer conn.Close()
uConn := conn.(*net.UDPConn)
_, err = uConn.Write(data)
return err
}
func (c *ControlPlane) WriteToUDP(to netip.AddrPort, isDNS bool, dummyFrom *netip.AddrPort, validateRushAnsFunc func(from netip.AddrPort) bool) UdpHandler {
func (c *ControlPlane) WriteToUDP(lanWanFlag uint8, lConn *net.UDPConn, realTo, to netip.AddrPort, isDNS bool, dummyFrom *netip.AddrPort, validateRushAnsFunc func(from netip.AddrPort) bool) UdpHandler {
return func(data []byte, from netip.AddrPort) (err error) {
// Do not return conn-unrelated err in this func.
@ -119,12 +131,23 @@ func (c *ControlPlane) WriteToUDP(to netip.AddrPort, isDNS bool, dummyFrom *neti
if dummyFrom != nil {
from = *dummyFrom
}
return sendPktBind(data, from, to)
return sendPkt(data, from, realTo, to, lConn, lanWanFlag)
}
}
func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundIndex consts.OutboundIndex) (err error) {
func (c *ControlPlane) handlePkt(lConn *net.UDPConn, data []byte, src, pktDst, realDst netip.AddrPort, outboundIndex consts.OutboundIndex) (err error) {
var lanWanFlag uint8
var realSrc netip.AddrPort
useAssign := pktDst == realDst // Use sk_assign instead of modify target ip/port.
if useAssign {
lanWanFlag = consts.IsLan
realSrc = src
} else {
lanWanFlag = consts.IsWan
// From localhost, so dst IP is src IP.
realSrc = netip.AddrPortFrom(pktDst.Addr(), src.Port())
}
switch outboundIndex {
case consts.OutboundDirect:
case consts.OutboundControlPlaneDirect:
@ -144,17 +167,17 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
// We should cache DNS records and set record TTL to 0, in order to monitor the dns req and resp in real time.
isDns := dnsMessage != nil
var dummyFrom *netip.AddrPort
destToSend := dst
destToSend := realDst
if isDns {
if resp := c.LookupDnsRespCache(dnsMessage); resp != nil {
// Send cache to client directly.
if err = sendPktBind(resp, destToSend, src); err != nil {
if err = sendPkt(resp, destToSend, realSrc, src, lConn, lanWanFlag); err != nil {
return fmt.Errorf("failed to write cached DNS resp: %w", err)
}
if c.log.IsLevelEnabled(logrus.DebugLevel) && len(dnsMessage.Questions) > 0 {
q := dnsMessage.Questions[0]
c.log.Tracef("UDP(DNS) %v <-[%v]-> Cache: %v %v",
RefineSourceToShow(src, dst.Addr()), outbound.Name, strings.ToLower(q.Name.String()), q.Type,
RefineSourceToShow(src, realDst.Addr()), outbound.Name, strings.ToLower(q.Name.String()), q.Type,
)
}
return nil
@ -174,10 +197,10 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
}
l4proto := consts.L4ProtoStr_UDP
ipversion := consts.IpVersionFromAddr(dst.Addr())
ipversion := consts.IpVersionFromAddr(realDst.Addr())
var dialerForNew *dialer.Dialer
// For DNS request, modify dst to dns upstream.
// For DNS request, modify realDst to dns upstream.
// NOTICE: We might modify l4proto and ipversion.
dnsUpstream, err := c.dnsUpstream.GetUpstream()
if err != nil {
@ -233,10 +256,10 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
bestTarget = netip.AddrPortFrom(dnsUpstream.Ip6, dnsUpstream.Port)
}
dialerForNew = bestDialer
dummyFrom = &dst
dummyFrom = &realDst
destToSend = bestTarget
c.log.WithFields(logrus.Fields{
"Original": RefineAddrPortToShow(dst),
"Original": RefineAddrPortToShow(realDst),
"New": destToSend,
"Network": string(l4proto) + string(ipversion),
}).Traceln("Modify DNS target")
@ -256,7 +279,7 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
var isNew bool
var realDialer *dialer.Dialer
udpHandler := c.WriteToUDP(src, isDns, dummyFrom, func(from netip.AddrPort) bool {
udpHandler := c.WriteToUDP(lanWanFlag, lConn, realSrc, src, isDns, dummyFrom, func(from netip.AddrPort) bool {
// We only validate rush-ans when outbound is direct and pkt does not send to a home device.
// Because additional record OPT may not be supported by home router.
// So se should trust home devices even if they make rush-answer (or looks like).
@ -283,7 +306,7 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
// If the udp endpoint has been not alive, remove it from pool and get a new one.
if !isNew && !ue.Dialer.MustGetAlive(networkType) {
c.log.WithFields(logrus.Fields{
"src": RefineSourceToShow(src, dst.Addr()),
"src": RefineSourceToShow(src, realDst.Addr()),
"network": string(l4proto) + string(ipversion),
"dialer": ue.Dialer.Name(),
}).Debugln("Old udp endpoint is not alive and removed")
@ -358,7 +381,7 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
"qname": strings.ToLower(q.Name.String()),
"qtype": q.Type,
}).Infof("%v <-> %v",
RefineSourceToShow(src, dst.Addr()), RefineAddrPortToShow(destToSend),
RefineSourceToShow(src, realDst.Addr()), RefineAddrPortToShow(destToSend),
)
} else {
// TODO: Set-up ip to domain mapping and show domain if possible.
@ -367,7 +390,7 @@ func (c *ControlPlane) handlePkt(data []byte, src, dst netip.AddrPort, outboundI
"outbound": outbound.Name,
"dialer": realDialer.Name(),
}).Infof("%v <-> %v",
RefineSourceToShow(src, dst.Addr()), RefineAddrPortToShow(destToSend),
RefineSourceToShow(src, realDst.Addr()), RefineAddrPortToShow(destToSend),
)
}
}

View File

@ -17,6 +17,7 @@ sudo tc filter del dev $wan ingress
sudo tc filter del dev $wan egress
sudo tc filter add dev $lan ingress bpf direct-action obj foo.o sec tc/ingress
sudo tc filter add dev $lan egress bpf direct-action obj foo.o sec tc/egress
# sudo tc filter add dev $wan ingress bpf direct-action obj foo.o sec tc/wan_ingress
# sudo tc filter add dev $wan egress bpf direct-action obj foo.o sec tc/wan_egress