From 6e3637d01821a1973654dd9b3928e44b37e39ceb Mon Sep 17 00:00:00 2001 From: mzz2017 <2017@duck.com> Date: Sat, 28 Jan 2023 13:27:54 +0800 Subject: [PATCH] optimize: lower kernel version requirement from 5.6 to 5.2 --- Makefile | 2 +- cmd/{infra => internal}/su.go | 4 +- cmd/{infra => internal}/subscription.go | 2 +- cmd/run.go | 6 +- component/control/bpf_utils.go | 49 +++- component/control/dns.go | 2 +- component/control/kern/tproxy.c | 11 +- component/control/routing_matcher_builder.go | 2 +- pkg/ebpf_internal/align.go | 8 + pkg/ebpf_internal/elf.go | 104 +++++++ pkg/ebpf_internal/endian_be.go | 15 + pkg/ebpf_internal/endian_le.go | 15 + pkg/ebpf_internal/internal/unix/doc.go | 13 + .../internal/unix/types_linux.go | 192 +++++++++++++ .../internal/unix/types_other.go | 272 ++++++++++++++++++ pkg/ebpf_internal/vdso.go | 155 ++++++++++ pkg/ebpf_internal/version.go | 124 ++++++++ 17 files changed, 962 insertions(+), 14 deletions(-) rename cmd/{infra => internal}/su.go (97%) rename cmd/{infra => internal}/subscription.go (99%) create mode 100644 pkg/ebpf_internal/align.go create mode 100644 pkg/ebpf_internal/elf.go create mode 100644 pkg/ebpf_internal/endian_be.go create mode 100644 pkg/ebpf_internal/endian_le.go create mode 100644 pkg/ebpf_internal/internal/unix/doc.go create mode 100644 pkg/ebpf_internal/internal/unix/types_linux.go create mode 100644 pkg/ebpf_internal/internal/unix/types_other.go create mode 100644 pkg/ebpf_internal/vdso.go create mode 100644 pkg/ebpf_internal/version.go diff --git a/Makefile b/Makefile index ca1311b..c507e0d 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ # The development version of clang is distributed as the 'clang' binary, # while stable/released versions have a version number attached. # Pin the default clang to a stable version. -CLANG ?= clang-14 +CLANG ?= clang STRIP ?= llvm-strip #CFLAGS := -O2 -g -Wall -Werror $(CFLAGS) CFLAGS := -O2 -Wall -Werror $(CFLAGS) diff --git a/cmd/infra/su.go b/cmd/internal/su.go similarity index 97% rename from cmd/infra/su.go rename to cmd/internal/su.go index a59ed57..d1a694d 100644 --- a/cmd/infra/su.go +++ b/cmd/internal/su.go @@ -1,4 +1,4 @@ -package infra +package internal import ( "fmt" @@ -40,4 +40,4 @@ func AutoSu() { os.Exit(1) } os.Exit(stat.ExitCode()) -} \ No newline at end of file +} diff --git a/cmd/infra/subscription.go b/cmd/internal/subscription.go similarity index 99% rename from cmd/infra/subscription.go rename to cmd/internal/subscription.go index 44a2870..a3226ae 100644 --- a/cmd/infra/subscription.go +++ b/cmd/internal/subscription.go @@ -1,4 +1,4 @@ -package infra +package internal import ( "encoding/json" diff --git a/cmd/run.go b/cmd/run.go index d400870..3e40771 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -4,7 +4,7 @@ import ( "fmt" "github.com/sirupsen/logrus" "github.com/spf13/cobra" - "github.com/v2rayA/dae/cmd/infra" + "github.com/v2rayA/dae/cmd/internal" "github.com/v2rayA/dae/component/control" "github.com/v2rayA/dae/config" "github.com/v2rayA/dae/pkg/config_parser" @@ -43,13 +43,13 @@ func Run() (err error) { } // Require "sudo" if necessary. - infra.AutoSu() + internal.AutoSu() // Resolve subscriptions to nodes. nodeList := make([]string, len(param.Node)) copy(nodeList, param.Node) for _, sub := range param.Subscription { - nodes, err := infra.ResolveSubscription(log, sub) + nodes, err := internal.ResolveSubscription(log, sub) if err != nil { log.Warnf(`failed to resolve subscription "%v": %v`, sub, err) } diff --git a/component/control/bpf_utils.go b/component/control/bpf_utils.go index cf5afb4..3f266a2 100644 --- a/component/control/bpf_utils.go +++ b/component/control/bpf_utils.go @@ -6,9 +6,12 @@ package control import ( + "fmt" "github.com/cilium/ebpf" "github.com/v2rayA/dae/common" + "github.com/v2rayA/dae/pkg/ebpf_internal" "net/netip" + "reflect" ) type _bpfLpmKey struct { @@ -27,7 +30,7 @@ func (o *bpfObjects) newLpmMap(keys []_bpfLpmKey, values []uint32) (m *ebpf.Map, if err != nil { return nil, err } - if _, err = m.BatchUpdate(keys, values, &ebpf.BatchOptions{ + if _, err = BatchUpdate(m, keys, values, &ebpf.BatchOptions{ ElemFlags: uint64(ebpf.UpdateAny), }); err != nil { return nil, err @@ -50,3 +53,47 @@ func cidrToBpfLpmKey(prefix netip.Prefix) _bpfLpmKey { Data: common.Ipv6ByteSliceToUint32Array(ip[:]), } } + +// A utility to convert the values to proper strings. +func int8ToStr(arr []int8) string { + b := make([]byte, 0, len(arr)) + for _, v := range arr { + if v == 0x00 { + break + } + b = append(b, byte(v)) + } + return string(b) +} +func BatchUpdate(m *ebpf.Map, keys interface{}, values interface{}, opts *ebpf.BatchOptions) (n int, err error) { + var old bool + version, e := internal.KernelVersion() + if e != nil || version.Less(internal.Version{5, 6, 0}) { + old = true + } + if !old { + return m.BatchUpdate(keys, values, opts) + } else { + vKeys := reflect.ValueOf(keys) + if vKeys.Kind() != reflect.Slice { + return 0, fmt.Errorf("keys must be slice") + } + vVals := reflect.ValueOf(values) + if vVals.Kind() != reflect.Slice { + return 0, fmt.Errorf("values must be slice") + } + length := vKeys.Len() + if vVals.Len() != length { + return 0, fmt.Errorf("keys and values must have same length") + } + + for i := 0; i < length; i++ { + vKey := vKeys.Index(i) + vVal := vVals.Index(i) + if err = m.Update(vKey.Interface(), vVal.Interface(), ebpf.MapUpdateFlags(opts.ElemFlags)); err != nil { + return i, err + } + } + return vKeys.Len(), nil + } +} diff --git a/component/control/dns.go b/component/control/dns.go index 8cb5ecd..6231482 100644 --- a/component/control/dns.go +++ b/component/control/dns.go @@ -55,7 +55,7 @@ func (c *ControlPlane) BatchUpdateDomainRouting(cache *dnsCache) error { Bitmap: cache.DomainBitmap, }) } - if _, err := c.bpf.DomainRoutingMap.BatchUpdate(keys, vals, &ebpf.BatchOptions{ + if _, err := BatchUpdate(c.bpf.DomainRoutingMap, keys, vals, &ebpf.BatchOptions{ ElemFlags: uint64(ebpf.UpdateAny), }); err != nil { return err diff --git a/component/control/kern/tproxy.c b/component/control/kern/tproxy.c index 3ce25b1..1926cda 100644 --- a/component/control/kern/tproxy.c +++ b/component/control/kern/tproxy.c @@ -379,7 +379,7 @@ handle_ipv6_extensions(void *data, void *data_end, __u32 hdr, struct tcphdr **tcph, struct udphdr **udph, __u8 *ihl) { __u8 hdr_length = 0; __s32 *p_s32; - __u8 nexthdr; + __u8 nexthdr = 0; *ihl = sizeof(struct ipv6hdr) / 4; // We only process TCP and UDP traffic. @@ -395,6 +395,7 @@ handle_ipv6_extensions(void *data, void *data_end, __u32 hdr, if (!(p_s32 = bpf_map_lookup_elem(&ipproto_hdrsize_map, &hdr))) { return 1; } + switch (*p_s32) { case -1: if ((void *)((__u8 *)data + 2) > data_end) { @@ -410,7 +411,7 @@ handle_ipv6_extensions(void *data, void *data_end, __u32 hdr, nexthdr = *(__u8 *)data; break; case 4: - hdr_length = *p_s32; + hdr_length = 4; goto special_n1; case 0: if (hdr == IPPROTO_TCP) { @@ -433,6 +434,9 @@ handle_ipv6_extensions(void *data, void *data_end, __u32 hdr, } } return 0; + default: + // Unknown hdr. + return 1; } } bpf_printk("exceeds IPV6_MAX_EXTENSIONS limit"); @@ -836,8 +840,7 @@ static long routing(__u32 flag[3], void *l4_hdr, __be32 saddr[4], *p_u32 > routing->port_range.port_end) { bad_rule = true; } - } else if ((p_u32 = bpf_map_lookup_elem(&l4proto_ipversion_map, - &key))) { + } else if ((p_u32 = bpf_map_lookup_elem(&l4proto_ipversion_map, &key))) { if (!(*p_u32 & routing->__value)) { bad_rule = true; } diff --git a/component/control/routing_matcher_builder.go b/component/control/routing_matcher_builder.go index 8381471..cdbadb6 100644 --- a/component/control/routing_matcher_builder.go +++ b/component/control/routing_matcher_builder.go @@ -186,7 +186,7 @@ func (b *RoutingMatcherBuilder) Build() (err error) { } routingsLen := uint32(len(b.rules)) routingsKeys := common.ARangeU32(routingsLen) - if _, err = b.bpf.RoutingMap.BatchUpdate(routingsKeys, b.rules, &ebpf.BatchOptions{ + if _, err = BatchUpdate(b.bpf.RoutingMap, routingsKeys, b.rules, &ebpf.BatchOptions{ ElemFlags: uint64(ebpf.UpdateAny), }); err != nil { return fmt.Errorf("BatchUpdate: %w", err) diff --git a/pkg/ebpf_internal/align.go b/pkg/ebpf_internal/align.go new file mode 100644 index 0000000..398495c --- /dev/null +++ b/pkg/ebpf_internal/align.go @@ -0,0 +1,8 @@ +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/align.go + +package internal + +// Align returns 'n' updated to 'alignment' boundary. +func Align(n, alignment int) int { + return (int(n) + alignment - 1) / alignment * alignment +} diff --git a/pkg/ebpf_internal/elf.go b/pkg/ebpf_internal/elf.go new file mode 100644 index 0000000..f341c9b --- /dev/null +++ b/pkg/ebpf_internal/elf.go @@ -0,0 +1,104 @@ +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/elf.go + +package internal + +import ( + "debug/elf" + "fmt" + "io" +) + +type SafeELFFile struct { + *elf.File +} + +// NewSafeELFFile reads an ELF safely. +// +// Any panic during parsing is turned into an error. This is necessary since +// there are a bunch of unfixed bugs in debug/elf. +// +// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle +func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + safe = nil + err = fmt.Errorf("reading ELF file panicked: %s", r) + }() + + file, err := elf.NewFile(r) + if err != nil { + return nil, err + } + + return &SafeELFFile{file}, nil +} + +// OpenSafeELFFile reads an ELF from a file. +// +// It works like NewSafeELFFile, with the exception that safe.Close will +// close the underlying file. +func OpenSafeELFFile(path string) (safe *SafeELFFile, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + safe = nil + err = fmt.Errorf("reading ELF file panicked: %s", r) + }() + + file, err := elf.Open(path) + if err != nil { + return nil, err + } + + return &SafeELFFile{file}, nil +} + +// Symbols is the safe version of elf.File.Symbols. +func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + syms = nil + err = fmt.Errorf("reading ELF symbols panicked: %s", r) + }() + + syms, err = se.File.Symbols() + return +} + +// DynamicSymbols is the safe version of elf.File.DynamicSymbols. +func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) { + defer func() { + r := recover() + if r == nil { + return + } + + syms = nil + err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r) + }() + + syms, err = se.File.DynamicSymbols() + return +} + +// SectionsByType returns all sections in the file with the specified section type. +func (se *SafeELFFile) SectionsByType(typ elf.SectionType) []*elf.Section { + sections := make([]*elf.Section, 0, 1) + for _, section := range se.Sections { + if section.Type == typ { + sections = append(sections, section) + } + } + return sections +} diff --git a/pkg/ebpf_internal/endian_be.go b/pkg/ebpf_internal/endian_be.go new file mode 100644 index 0000000..5f8ab65 --- /dev/null +++ b/pkg/ebpf_internal/endian_be.go @@ -0,0 +1,15 @@ +//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64 +// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64 + +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/endian_be.go + +package internal + +import "encoding/binary" + +// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, +// depending on the host's endianness. +var NativeEndian binary.ByteOrder = binary.BigEndian + +// ClangEndian is set to either "el" or "eb" depending on the host's endianness. +const ClangEndian = "eb" diff --git a/pkg/ebpf_internal/endian_le.go b/pkg/ebpf_internal/endian_le.go new file mode 100644 index 0000000..1c16f23 --- /dev/null +++ b/pkg/ebpf_internal/endian_le.go @@ -0,0 +1,15 @@ +//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64 +// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64 + +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/endian_le.go + +package internal + +import "encoding/binary" + +// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, +// depending on the host's endianness. +var NativeEndian binary.ByteOrder = binary.LittleEndian + +// ClangEndian is set to either "el" or "eb" depending on the host's endianness. +const ClangEndian = "el" diff --git a/pkg/ebpf_internal/internal/unix/doc.go b/pkg/ebpf_internal/internal/unix/doc.go new file mode 100644 index 0000000..83c9461 --- /dev/null +++ b/pkg/ebpf_internal/internal/unix/doc.go @@ -0,0 +1,13 @@ +// Copied from https://github.com/cilium/ebpf/tree/v0.10.0/internal/unix + +// Package unix re-exports Linux specific parts of golang.org/x/sys/unix. +// +// It avoids breaking compilation on other OS by providing stubs as follows: +// - Invoking a function always returns an error. +// - Errnos have distinct, non-zero values. +// - Constants have distinct but meaningless values. +// - Types use the same names for members, but may or may not follow the +// Linux layout. +package unix + +// Note: please don't add any custom API to this package. Use internal/sys instead. diff --git a/pkg/ebpf_internal/internal/unix/types_linux.go b/pkg/ebpf_internal/internal/unix/types_linux.go new file mode 100644 index 0000000..bd86119 --- /dev/null +++ b/pkg/ebpf_internal/internal/unix/types_linux.go @@ -0,0 +1,192 @@ +//go:build linux + +// Copied from https://github.com/cilium/ebpf/tree/v0.10.0/internal/unix + +package unix + +import ( + "syscall" + + linux "golang.org/x/sys/unix" +) + +const ( + ENOENT = linux.ENOENT + EEXIST = linux.EEXIST + EAGAIN = linux.EAGAIN + ENOSPC = linux.ENOSPC + EINVAL = linux.EINVAL + EPOLLIN = linux.EPOLLIN + EINTR = linux.EINTR + EPERM = linux.EPERM + ESRCH = linux.ESRCH + ENODEV = linux.ENODEV + EBADF = linux.EBADF + E2BIG = linux.E2BIG + EFAULT = linux.EFAULT + EACCES = linux.EACCES + EILSEQ = linux.EILSEQ + EOPNOTSUPP = linux.EOPNOTSUPP +) + +const ( + BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC + BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE + BPF_F_RDONLY = linux.BPF_F_RDONLY + BPF_F_WRONLY = linux.BPF_F_WRONLY + BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG + BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE + BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE + BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP + BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN + BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN + BPF_TAG_SIZE = linux.BPF_TAG_SIZE + BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT + BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT + BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ + SYS_BPF = linux.SYS_BPF + F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC + EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD + EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC + O_CLOEXEC = linux.O_CLOEXEC + O_NONBLOCK = linux.O_NONBLOCK + PROT_READ = linux.PROT_READ + PROT_WRITE = linux.PROT_WRITE + MAP_SHARED = linux.MAP_SHARED + PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 + PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE + PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT + PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT + PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE + PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE + PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF + PerfBitWatermark = linux.PerfBitWatermark + PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC + RLIM_INFINITY = linux.RLIM_INFINITY + RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK + BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME + PERF_RECORD_LOST = linux.PERF_RECORD_LOST + PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE + AT_FDCWD = linux.AT_FDCWD + RENAME_NOREPLACE = linux.RENAME_NOREPLACE + SO_ATTACH_BPF = linux.SO_ATTACH_BPF + SO_DETACH_BPF = linux.SO_DETACH_BPF + SOL_SOCKET = linux.SOL_SOCKET + SIGPROF = linux.SIGPROF + SIG_BLOCK = linux.SIG_BLOCK + SIG_UNBLOCK = linux.SIG_UNBLOCK + EM_NONE = linux.EM_NONE + EM_BPF = linux.EM_BPF +) + +type Statfs_t = linux.Statfs_t +type Stat_t = linux.Stat_t +type Rlimit = linux.Rlimit +type Signal = linux.Signal +type Sigset_t = linux.Sigset_t +type PerfEventMmapPage = linux.PerfEventMmapPage +type EpollEvent = linux.EpollEvent +type PerfEventAttr = linux.PerfEventAttr +type Utsname = linux.Utsname + +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return linux.Syscall(trap, a1, a2, a3) +} + +func PthreadSigmask(how int, set, oldset *Sigset_t) error { + return linux.PthreadSigmask(how, set, oldset) +} + +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return linux.FcntlInt(fd, cmd, arg) +} + +func IoctlSetInt(fd int, req uint, value int) error { + return linux.IoctlSetInt(fd, req, value) +} + +func Statfs(path string, buf *Statfs_t) (err error) { + return linux.Statfs(path, buf) +} + +func Close(fd int) (err error) { + return linux.Close(fd) +} + +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return linux.EpollWait(epfd, events, msec) +} + +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return linux.EpollCtl(epfd, op, fd, event) +} + +func Eventfd(initval uint, flags int) (fd int, err error) { + return linux.Eventfd(initval, flags) +} + +func Write(fd int, p []byte) (n int, err error) { + return linux.Write(fd, p) +} + +func EpollCreate1(flag int) (fd int, err error) { + return linux.EpollCreate1(flag) +} + +func SetNonblock(fd int, nonblocking bool) (err error) { + return linux.SetNonblock(fd, nonblocking) +} + +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return linux.Mmap(fd, offset, length, prot, flags) +} + +func Munmap(b []byte) (err error) { + return linux.Munmap(b) +} + +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) +} + +func Uname(buf *Utsname) (err error) { + return linux.Uname(buf) +} + +func Getpid() int { + return linux.Getpid() +} + +func Gettid() int { + return linux.Gettid() +} + +func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { + return linux.Tgkill(tgid, tid, sig) +} + +func BytePtrFromString(s string) (*byte, error) { + return linux.BytePtrFromString(s) +} + +func ByteSliceToString(s []byte) string { + return linux.ByteSliceToString(s) +} + +func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { + return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags) +} + +func Prlimit(pid, resource int, new, old *Rlimit) error { + return linux.Prlimit(pid, resource, new, old) +} + +func Open(path string, mode int, perm uint32) (int, error) { + return linux.Open(path, mode, perm) +} + +func Fstat(fd int, stat *Stat_t) error { + return linux.Fstat(fd, stat) +} diff --git a/pkg/ebpf_internal/internal/unix/types_other.go b/pkg/ebpf_internal/internal/unix/types_other.go new file mode 100644 index 0000000..9a11b77 --- /dev/null +++ b/pkg/ebpf_internal/internal/unix/types_other.go @@ -0,0 +1,272 @@ +//go:build !linux + +// Copied from https://github.com/cilium/ebpf/tree/v0.10.0/internal/unix + +package unix + +import ( + "fmt" + "runtime" + "syscall" +) + +var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) + +// Errnos are distinct and non-zero. +const ( + ENOENT syscall.Errno = iota + 1 + EEXIST + EAGAIN + ENOSPC + EINVAL + EINTR + EPERM + ESRCH + ENODEV + EBADF + E2BIG + EFAULT + EACCES + EILSEQ + EOPNOTSUPP +) + +// Constants are distinct to avoid breaking switch statements. +const ( + BPF_F_NO_PREALLOC = iota + BPF_F_NUMA_NODE + BPF_F_RDONLY + BPF_F_WRONLY + BPF_F_RDONLY_PROG + BPF_F_WRONLY_PROG + BPF_F_SLEEPABLE + BPF_F_MMAPABLE + BPF_F_INNER_MAP + BPF_F_KPROBE_MULTI_RETURN + BPF_OBJ_NAME_LEN + BPF_TAG_SIZE + BPF_RINGBUF_BUSY_BIT + BPF_RINGBUF_DISCARD_BIT + BPF_RINGBUF_HDR_SZ + SYS_BPF + F_DUPFD_CLOEXEC + EPOLLIN + EPOLL_CTL_ADD + EPOLL_CLOEXEC + O_CLOEXEC + O_NONBLOCK + PROT_READ + PROT_WRITE + MAP_SHARED + PERF_ATTR_SIZE_VER1 + PERF_TYPE_SOFTWARE + PERF_TYPE_TRACEPOINT + PERF_COUNT_SW_BPF_OUTPUT + PERF_EVENT_IOC_DISABLE + PERF_EVENT_IOC_ENABLE + PERF_EVENT_IOC_SET_BPF + PerfBitWatermark + PERF_SAMPLE_RAW + PERF_FLAG_FD_CLOEXEC + RLIM_INFINITY + RLIMIT_MEMLOCK + BPF_STATS_RUN_TIME + PERF_RECORD_LOST + PERF_RECORD_SAMPLE + AT_FDCWD + RENAME_NOREPLACE + SO_ATTACH_BPF + SO_DETACH_BPF + SOL_SOCKET + SIGPROF + SIG_BLOCK + SIG_UNBLOCK + EM_NONE + EM_BPF +) + +type Statfs_t struct { + Type int64 + Bsize int64 + Blocks uint64 + Bfree uint64 + Bavail uint64 + Files uint64 + Ffree uint64 + Fsid [2]int32 + Namelen int64 + Frsize int64 + Flags int64 + Spare [4]int64 +} + +type Stat_t struct{} + +type Rlimit struct { + Cur uint64 + Max uint64 +} + +type Signal int + +type Sigset_t struct { + Val [4]uint64 +} + +func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { + return 0, 0, syscall.ENOTSUP +} + +func PthreadSigmask(how int, set, oldset *Sigset_t) error { + return errNonLinux +} + +func FcntlInt(fd uintptr, cmd, arg int) (int, error) { + return -1, errNonLinux +} + +func IoctlSetInt(fd int, req uint, value int) error { + return errNonLinux +} + +func Statfs(path string, buf *Statfs_t) error { + return errNonLinux +} + +func Close(fd int) (err error) { + return errNonLinux +} + +type EpollEvent struct { + Events uint32 + Fd int32 + Pad int32 +} + +func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { + return 0, errNonLinux +} + +func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { + return errNonLinux +} + +func Eventfd(initval uint, flags int) (fd int, err error) { + return 0, errNonLinux +} + +func Write(fd int, p []byte) (n int, err error) { + return 0, errNonLinux +} + +func EpollCreate1(flag int) (fd int, err error) { + return 0, errNonLinux +} + +type PerfEventMmapPage struct { + Version uint32 + Compat_version uint32 + Lock uint32 + Index uint32 + Offset int64 + Time_enabled uint64 + Time_running uint64 + Capabilities uint64 + Pmc_width uint16 + Time_shift uint16 + Time_mult uint32 + Time_offset uint64 + Time_zero uint64 + Size uint32 + + Data_head uint64 + Data_tail uint64 + Data_offset uint64 + Data_size uint64 + Aux_head uint64 + Aux_tail uint64 + Aux_offset uint64 + Aux_size uint64 +} + +func SetNonblock(fd int, nonblocking bool) (err error) { + return errNonLinux +} + +func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { + return []byte{}, errNonLinux +} + +func Munmap(b []byte) (err error) { + return errNonLinux +} + +type PerfEventAttr struct { + Type uint32 + Size uint32 + Config uint64 + Sample uint64 + Sample_type uint64 + Read_format uint64 + Bits uint64 + Wakeup uint32 + Bp_type uint32 + Ext1 uint64 + Ext2 uint64 + Branch_sample_type uint64 + Sample_regs_user uint64 + Sample_stack_user uint32 + Clockid int32 + Sample_regs_intr uint64 + Aux_watermark uint32 + Sample_max_stack uint16 +} + +func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return 0, errNonLinux +} + +type Utsname struct { + Release [65]byte + Version [65]byte +} + +func Uname(buf *Utsname) (err error) { + return errNonLinux +} + +func Getpid() int { + return -1 +} + +func Gettid() int { + return -1 +} + +func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { + return errNonLinux +} + +func BytePtrFromString(s string) (*byte, error) { + return nil, errNonLinux +} + +func ByteSliceToString(s []byte) string { + return "" +} + +func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { + return errNonLinux +} + +func Prlimit(pid, resource int, new, old *Rlimit) error { + return errNonLinux +} + +func Open(path string, mode int, perm uint32) (int, error) { + return -1, errNonLinux +} + +func Fstat(fd int, stat *Stat_t) error { + return errNonLinux +} diff --git a/pkg/ebpf_internal/vdso.go b/pkg/ebpf_internal/vdso.go new file mode 100644 index 0000000..62ad537 --- /dev/null +++ b/pkg/ebpf_internal/vdso.go @@ -0,0 +1,155 @@ +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/vdso.go + +package internal + +import ( + "debug/elf" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + + "github.com/v2rayA/dae/pkg/ebpf_internal/internal/unix" +) + +var ( + errAuxvNoVDSO = errors.New("no vdso address found in auxv") +) + +// vdsoVersion returns the LINUX_VERSION_CODE embedded in the vDSO library +// linked into the current process image. +func vdsoVersion() (uint32, error) { + // Read data from the auxiliary vector, which is normally passed directly + // to the process. Go does not expose that data, so we must read it from procfs. + // https://man7.org/linux/man-pages/man3/getauxval.3.html + av, err := os.Open("/proc/self/auxv") + if errors.Is(err, unix.EACCES) { + return 0, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err) + } + if err != nil { + return 0, fmt.Errorf("opening auxv: %w", err) + } + defer av.Close() + + vdsoAddr, err := vdsoMemoryAddress(av) + if err != nil { + return 0, fmt.Errorf("finding vDSO memory address: %w", err) + } + + // Use /proc/self/mem rather than unsafe.Pointer tricks. + mem, err := os.Open("/proc/self/mem") + if err != nil { + return 0, fmt.Errorf("opening mem: %w", err) + } + defer mem.Close() + + // Open ELF at provided memory address, as offset into /proc/self/mem. + c, err := vdsoLinuxVersionCode(io.NewSectionReader(mem, int64(vdsoAddr), math.MaxInt64)) + if err != nil { + return 0, fmt.Errorf("reading linux version code: %w", err) + } + + return c, nil +} + +// vdsoMemoryAddress returns the memory address of the vDSO library +// linked into the current process image. r is an io.Reader into an auxv blob. +func vdsoMemoryAddress(r io.Reader) (uint64, error) { + const ( + _AT_NULL = 0 // End of vector + _AT_SYSINFO_EHDR = 33 // Offset to vDSO blob in process image + ) + + // Loop through all tag/value pairs in auxv until we find `AT_SYSINFO_EHDR`, + // the address of a page containing the virtual Dynamic Shared Object (vDSO). + aux := struct{ Tag, Val uint64 }{} + for { + if err := binary.Read(r, NativeEndian, &aux); err != nil { + return 0, fmt.Errorf("reading auxv entry: %w", err) + } + + switch aux.Tag { + case _AT_SYSINFO_EHDR: + if aux.Val != 0 { + return aux.Val, nil + } + return 0, fmt.Errorf("invalid vDSO address in auxv") + // _AT_NULL is always the last tag/val pair in the aux vector + // and can be treated like EOF. + case _AT_NULL: + return 0, errAuxvNoVDSO + } + } +} + +// format described at https://www.man7.org/linux/man-pages/man5/elf.5.html in section 'Notes (Nhdr)' +type elfNoteHeader struct { + NameSize int32 + DescSize int32 + Type int32 +} + +// vdsoLinuxVersionCode returns the LINUX_VERSION_CODE embedded in +// the ELF notes section of the binary provided by the reader. +func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) { + hdr, err := NewSafeELFFile(r) + if err != nil { + return 0, fmt.Errorf("reading vDSO ELF: %w", err) + } + + sections := hdr.SectionsByType(elf.SHT_NOTE) + if len(sections) == 0 { + return 0, fmt.Errorf("no note section found in vDSO ELF") + } + + for _, sec := range sections { + sr := sec.Open() + var n elfNoteHeader + + // Read notes until we find one named 'Linux'. + for { + if err := binary.Read(sr, hdr.ByteOrder, &n); err != nil { + if errors.Is(err, io.EOF) { + // We looked at all the notes in this section + break + } + return 0, fmt.Errorf("reading note header: %w", err) + } + + // If a note name is defined, it follows the note header. + var name string + if n.NameSize > 0 { + // Read the note name, aligned to 4 bytes. + buf := make([]byte, Align(int(n.NameSize), 4)) + if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil { + return 0, fmt.Errorf("reading note name: %w", err) + } + + // Read nul-terminated string. + name = unix.ByteSliceToString(buf[:n.NameSize]) + } + + // If a note descriptor is defined, it follows the name. + // It is possible for a note to have a descriptor but not a name. + if n.DescSize > 0 { + // LINUX_VERSION_CODE is a uint32 value. + if name == "Linux" && n.DescSize == 4 && n.Type == 0 { + var version uint32 + if err := binary.Read(sr, hdr.ByteOrder, &version); err != nil { + return 0, fmt.Errorf("reading note descriptor: %w", err) + } + return version, nil + } + + // Discard the note descriptor if it exists but we're not interested in it. + if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil { + return 0, err + } + } + } + } + + return 0, fmt.Errorf("no Linux note in ELF") +} diff --git a/pkg/ebpf_internal/version.go b/pkg/ebpf_internal/version.go new file mode 100644 index 0000000..4de2444 --- /dev/null +++ b/pkg/ebpf_internal/version.go @@ -0,0 +1,124 @@ +// Copied from https://github.com/cilium/ebpf/blob/v0.10.0/internal/version.go + +package internal + +import ( + "fmt" + "sync" + + "github.com/v2rayA/dae/pkg/ebpf_internal/internal/unix" +) + +const ( + // Version constant used in ELF binaries indicating that the loader needs to + // substitute the eBPF program's version with the value of the kernel's + // KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf + // and RedSift. + MagicKernelVersion = 0xFFFFFFFE +) + +var ( + kernelVersion = struct { + once sync.Once + version Version + err error + }{} +) + +// A Version in the form Major.Minor.Patch. +type Version [3]uint16 + +// NewVersion creates a version from a string like "Major.Minor.Patch". +// +// Patch is optional. +func NewVersion(ver string) (Version, error) { + var major, minor, patch uint16 + n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch) + if n < 2 { + return Version{}, fmt.Errorf("invalid version: %s", ver) + } + return Version{major, minor, patch}, nil +} + +// NewVersionFromCode creates a version from a LINUX_VERSION_CODE. +func NewVersionFromCode(code uint32) Version { + return Version{ + uint16(uint8(code >> 16)), + uint16(uint8(code >> 8)), + uint16(uint8(code)), + } +} + +func (v Version) String() string { + if v[2] == 0 { + return fmt.Sprintf("v%d.%d", v[0], v[1]) + } + return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2]) +} + +// Less returns true if the version is less than another version. +func (v Version) Less(other Version) bool { + for i, a := range v { + if a == other[i] { + continue + } + return a < other[i] + } + return false +} + +// Unspecified returns true if the version is all zero. +func (v Version) Unspecified() bool { + return v[0] == 0 && v[1] == 0 && v[2] == 0 +} + +// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h. +// It represents the kernel version and patch level as a single value. +func (v Version) Kernel() uint32 { + + // Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid + // overflowing into PATCHLEVEL. + // See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255"). + s := v[2] + if s > 255 { + s = 255 + } + + // Truncate members to uint8 to prevent them from spilling over into + // each other when overflowing 8 bits. + return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s)) +} + +// KernelVersion returns the version of the currently running kernel. +func KernelVersion() (Version, error) { + kernelVersion.once.Do(func() { + kernelVersion.version, kernelVersion.err = detectKernelVersion() + }) + + if kernelVersion.err != nil { + return Version{}, kernelVersion.err + } + return kernelVersion.version, nil +} + +// detectKernelVersion returns the version of the running kernel. +func detectKernelVersion() (Version, error) { + vc, err := vdsoVersion() + if err != nil { + return Version{}, err + } + return NewVersionFromCode(vc), nil +} + +// KernelRelease returns the release string of the running kernel. +// Its format depends on the Linux distribution and corresponds to directory +// names in /lib/modules by convention. Some examples are 5.15.17-1-lts and +// 4.19.0-16-amd64. +func KernelRelease() (string, error) { + var uname unix.Utsname + if err := unix.Uname(&uname); err != nil { + return "", fmt.Errorf("uname failed: %w", err) + } + + return unix.ByteSliceToString(uname.Release[:]), nil +}