feat/optimize: add userspace routing and optimize domain routing (#18)

This commit is contained in:
mzz
2023-02-18 18:27:28 +08:00
committed by GitHub
parent 87efa3d38d
commit 8f6b0a6e2a
79 changed files with 862 additions and 176 deletions

View File

@ -0,0 +1,119 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package domain_matcher
import (
"fmt"
"github.com/cloudflare/ahocorasick"
"github.com/v2rayA/dae/common/consts"
"regexp"
"strings"
)
type Ahocorasick struct {
validIndexes []int
validRegexpIndexes []int
matchers []*ahocorasick.Matcher
regexp [][]*regexp.Regexp
toBuild [][][]byte
err error
}
func NewAhocorasick(bitLength int) *Ahocorasick {
return &Ahocorasick{
matchers: make([]*ahocorasick.Matcher, bitLength),
toBuild: make([][][]byte, bitLength),
regexp: make([][]*regexp.Regexp, bitLength),
}
}
func (n *Ahocorasick) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
if n.err != nil {
return
}
switch typ {
case consts.RoutingDomainKey_Full:
for _, d := range patterns {
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("^"+d+"$"))
}
case consts.RoutingDomainKey_Suffix:
for _, d := range patterns {
if strings.HasPrefix(d, ".") {
// abc.example.com
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte(d+"$"))
} else {
// xxx.example.com
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("."+d+"$"))
// example.com
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("^"+d+"$"))
}
}
case consts.RoutingDomainKey_Keyword:
for _, d := range patterns {
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte(d))
}
case consts.RoutingDomainKey_Regex:
for _, d := range patterns {
r, err := regexp.Compile(d)
if err != nil {
n.err = fmt.Errorf("failed to compile regex: %v", d)
return
}
n.regexp[bitIndex] = append(n.regexp[bitIndex], r)
}
default:
n.err = fmt.Errorf("unknown RoutingDomainKey: %v", typ)
return
}
}
func (n *Ahocorasick) MatchDomainBitmap(domain string) (bitmap []uint32) {
N := len(n.matchers) / 32
if len(n.matchers)%32 != 0 {
N++
}
bitmap = make([]uint32, N)
// Domain should not contain ^ or $.
if strings.ContainsAny(domain, "^$") {
return bitmap
}
// Add magic chars as head and tail.
domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$"
for _, i := range n.validIndexes {
if hits := n.matchers[i].MatchThreadSafe([]byte(domain)); len(hits) > 0 {
bitmap[i/32] |= 1 << (i % 32)
}
}
// Regex matching is independent.
for _, i := range n.validRegexpIndexes {
if bitmap[i/32]&(1<<(i%32)) > 0 {
// Already matched.
continue
}
for _, r := range n.regexp[i] {
if r.MatchString(domain) {
bitmap[i/32] |= 1 << (i % 32)
break
}
}
}
return bitmap
}
func (n *Ahocorasick) Build() error {
if n.err != nil {
return n.err
}
n.validIndexes = make([]int, 0, len(n.toBuild)/8)
for i, toBuild := range n.toBuild {
if len(toBuild) == 0 {
continue
}
n.matchers[i] = ahocorasick.NewMatcher(toBuild)
n.validIndexes = append(n.validIndexes, i)
}
// Release it.
n.toBuild = nil
return nil
}

View File

@ -0,0 +1,236 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package domain_matcher
import (
"fmt"
"github.com/sirupsen/logrus"
"github.com/v2rayA/dae/common/consts"
"github.com/v2rayA/dae/component/routing"
"github.com/v2rayA/dae/config"
"github.com/v2rayA/dae/pkg/config_parser"
"hash/fnv"
"math/rand"
"reflect"
"testing"
)
var TestSample = []string{
"9game.cn",
"aliapp.org",
"alibaba-inc.com",
"alibaba.com",
"alibabacapital.com",
"alibabacorp.com",
"alibabadoctor.com",
"alibabafuturehotel.com",
"alibabagroup.com",
"alibabaplanet.com",
"alibabaued.com",
"alibabausercontent.com",
"alifanyi.com",
"alihealth.com.cn",
"alihealth.hk",
"aliimg.com",
"51y5.net",
"a.adtng.com",
"aaxads.com",
"addthisedge.com",
"adtrue.com",
"ad-srv.net",
"ad.api.moji.com",
"ad.wang502.com",
"adbutter.net",
"ads.trafficjunky.net",
"adtechus.com",
"adxprtz.com",
"cdn.advertserve.com",
"cdn.banclip.com",
"cfts1tifqr.com",
"contentabc.com",
"cretgate.com",
"ero-advertising.com",
"eroadvertising.com",
"exoclick.com",
"exosrv.com",
"go2.global",
"img-bss.csdn.net",
"imglnkc.com",
"imglnkd.com",
"innovid.com",
"ja2.gamersky.com",
"jl3.yjaxa.top",
"juicyads.com",
"kepler-37b.com",
"lqc006.com",
"moat.com",
"moatads.com",
"realsrv.com",
"s4yxaqyq95.com",
"shhs-ydd8x2.yjrmss.cn",
"static.javhd.com",
"tm-banners.gamingadult.com",
"trafficfactory.biz",
"tsyndicate.com",
"abchina.com",
"bankcomm.com",
"bankofbeijing.com.cn",
"bosc.cn",
"bsb.com.cn",
"ccb.com",
"cgbchina.com.cn",
"cib.com.cn",
"citibank.com.cn",
"cmbc.com.cn",
"hsbc.com.cn",
"hxb.com.cn",
"njcb.com.cn",
"psbc.com",
"spdb.com.cn",
"whccb.com",
}
type RoutingMatcherBuilder struct {
*routing.DefaultMatcherBuilder
outboundName2Id map[string]uint8
simulatedDomainSet []routing.DomainSet
Fallback string
err error
}
func (b *RoutingMatcherBuilder) OutboundToId(outbound string) uint8 {
h := fnv.New64()
h.Write([]byte(outbound))
return uint8(h.Sum64() & 0xFF)
}
func (b *RoutingMatcherBuilder) AddDomain(f *config_parser.Function, key string, values []string, outbound string) {
if b.err != nil {
return
}
switch consts.RoutingDomainKey(key) {
case consts.RoutingDomainKey_Regex,
consts.RoutingDomainKey_Full,
consts.RoutingDomainKey_Keyword,
consts.RoutingDomainKey_Suffix:
default:
b.err = fmt.Errorf("AddDomain: unsupported key: %v", key)
return
}
b.simulatedDomainSet = append(b.simulatedDomainSet, routing.DomainSet{
Key: consts.RoutingDomainKey(key),
RuleIndex: len(b.simulatedDomainSet),
Domains: values,
})
}
func getDomain() (simulatedDomainSet []routing.DomainSet, err error) {
var rules []*config_parser.RoutingRule
sections, err := config_parser.Parse(`
routing {
pname(NetworkManager, dnsmasq, systemd-resolved) -> must_direct # Traffic of DNS in local must be direct to avoid loop when binding to WAN.
pname(sogou-qimpanel, sogou-qimpanel-watchdog) -> block
ip(geoip:private, 224.0.0.0/3, 'ff00::/8') -> direct # Put it in front unless you know what you're doing.
domain(geosite:bing)->us
domain(full:dns.google) && port(53) -> direct
domain(geosite:category-ads-all) -> block
ip(geoip:private) -> direct
ip(geoip:cn) -> direct
domain(geosite:cn) -> direct
final: my_group
}`)
if err != nil {
return nil, err
}
var r config.Routing
if err = config.RoutingRuleAndParamParser(reflect.ValueOf(&r), sections[0]); err != nil {
return nil, err
}
if rules, err = routing.ApplyRulesOptimizers(r.Rules,
&routing.RefineFunctionParamKeyOptimizer{},
&routing.DatReaderOptimizer{Logger: logrus.StandardLogger()},
&routing.MergeAndSortRulesOptimizer{},
&routing.DeduplicateParamsOptimizer{},
); err != nil {
return nil, fmt.Errorf("ApplyRulesOptimizers error:\n%w", err)
}
builder := RoutingMatcherBuilder{}
if err = routing.ApplyMatcherBuilder(logrus.StandardLogger(), &builder, rules, r.Fallback); err != nil {
return nil, fmt.Errorf("ApplyMatcherBuilder: %w", err)
}
return builder.simulatedDomainSet, nil
}
func BenchmarkBruteforce(b *testing.B) {
b.StopTimer()
logrus.SetLevel(logrus.WarnLevel)
simulatedDomainSet, err := getDomain()
if err != nil {
b.Fatal(err)
}
bf := NewBruteforce(simulatedDomainSet)
b.StartTimer()
runBenchmark(b, bf)
}
func BenchmarkGoRegexpNfa(b *testing.B) {
b.StopTimer()
logrus.SetLevel(logrus.WarnLevel)
simulatedDomainSet, err := getDomain()
if err != nil {
b.Fatal(err)
}
nfa := NewGoRegexpNfa(consts.MaxMatchSetLen)
for _, domains := range simulatedDomainSet {
nfa.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
}
if err = nfa.Build(); err != nil {
b.Fatal(err)
}
b.StartTimer()
runBenchmark(b, nfa)
}
func BenchmarkAhocorasick(b *testing.B) {
b.StopTimer()
logrus.SetLevel(logrus.WarnLevel)
simulatedDomainSet, err := getDomain()
if err != nil {
b.Fatal(err)
}
ahocorasick := NewAhocorasick(consts.MaxMatchSetLen)
for _, domains := range simulatedDomainSet {
ahocorasick.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
}
if err = ahocorasick.Build(); err != nil {
b.Fatal(err)
}
b.StartTimer()
runBenchmark(b, ahocorasick)
}
func runBenchmark(b *testing.B, matcher routing.DomainMatcher) {
rand.Seed(100)
for i := 0; i < b.N; i++ {
sample := TestSample[rand.Intn(len(TestSample))]
choice := rand.Intn(10)
switch {
case choice < 4:
addN := rand.Intn(5)
buf := make([]byte, addN)
for i := range buf {
buf[i] = 'a' + byte(rand.Intn('z'-'a'))
}
sample = string(buf) + "." + sample
case choice >= 4 && choice < 6:
k := rand.Intn(len(sample))
sample = sample[k:]
default:
}
matcher.MatchDomainBitmap(sample)
}
}

View File

@ -0,0 +1,67 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package domain_matcher
import (
"github.com/v2rayA/dae/common/consts"
"github.com/v2rayA/dae/component/routing"
"regexp"
"strings"
)
type Bruteforce struct {
simulatedDomainSet []routing.DomainSet
err error
}
func NewBruteforce(simulatedDomainSet []routing.DomainSet) *Bruteforce {
return &Bruteforce{
simulatedDomainSet: simulatedDomainSet,
}
}
func (n *Bruteforce) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
}
func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) {
N := len(n.simulatedDomainSet) / 32
if len(n.simulatedDomainSet)%32 != 0 {
N++
}
bitmap = make([]uint32, N)
for _, s := range n.simulatedDomainSet {
for _, d := range s.Domains {
var hit bool
switch s.Key {
case consts.RoutingDomainKey_Suffix:
if domain == d || strings.HasSuffix(domain, "."+strings.TrimPrefix(d, ".")) {
hit = true
}
case consts.RoutingDomainKey_Full:
if strings.EqualFold(domain, d) {
hit = true
}
case consts.RoutingDomainKey_Keyword:
if strings.Contains(strings.ToLower(domain), strings.ToLower(d)) {
hit = true
}
case consts.RoutingDomainKey_Regex:
if regexp.MustCompile(d).MatchString(strings.ToLower(domain)) {
hit = true
}
}
if hit {
bitmap[s.RuleIndex/32] |= 1 << (s.RuleIndex % 32)
break
}
}
}
return bitmap
}
func (n *Bruteforce) Build() error {
if n.err != nil {
return n.err
}
return nil
}

View File

@ -0,0 +1,93 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package domain_matcher
import (
"fmt"
"github.com/v2rayA/dae/common/consts"
"regexp"
"strings"
)
type GoRegexpNfa struct {
validIndexes []int
nfa []*regexp.Regexp
toBuild [][]string
err error
}
func NewGoRegexpNfa(bitLength int) *GoRegexpNfa {
return &GoRegexpNfa{
nfa: make([]*regexp.Regexp, bitLength),
toBuild: make([][]string, bitLength),
}
}
func (n *GoRegexpNfa) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
if n.err != nil {
return
}
switch typ {
case consts.RoutingDomainKey_Full:
for _, d := range patterns {
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], "^"+d+"$")
}
case consts.RoutingDomainKey_Suffix:
for _, d := range patterns {
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], "."+strings.TrimPrefix(d, ".")+"$")
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], "^"+d+"$")
}
case consts.RoutingDomainKey_Keyword:
for _, d := range patterns {
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], d)
}
case consts.RoutingDomainKey_Regex:
for _, d := range patterns {
// Check if it is a valid regexp.
if _, err := regexp.Compile(d); err != nil {
n.err = fmt.Errorf("failed to compile regex: %v", d)
return
}
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], d)
}
default:
n.err = fmt.Errorf("unknown RoutingDomainKey: %v", typ)
return
}
}
func (n *GoRegexpNfa) MatchDomainBitmap(domain string) (bitmap []uint32) {
N := len(n.nfa) / 32
if len(n.nfa)%32 != 0 {
N++
}
bitmap = make([]uint32, N)
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
for _, i := range n.validIndexes {
if n.nfa[i].MatchString(domain) {
bitmap[i/32] |= 1 << (i % 32)
}
}
return bitmap
}
func (n *GoRegexpNfa) Build() error {
if n.err != nil {
return n.err
}
n.validIndexes = make([]int, 0, len(n.toBuild)/8)
for i, toBuild := range n.toBuild {
if len(toBuild) == 0 {
continue
}
r, err := regexp.Compile(strings.Join(toBuild, "|"))
if err != nil {
return fmt.Errorf("failed to build NFA: %w", err)
}
n.nfa[i] = r
n.validIndexes = append(n.validIndexes, i)
}
// Release it.
n.toBuild = nil
return nil
}