2023-02-18 17:27:28 +07:00
|
|
|
/*
|
|
|
|
* SPDX-License-Identifier: AGPL-3.0-only
|
2024-01-04 16:28:16 +07:00
|
|
|
* Copyright (c) 2022-2024, daeuniverse Organization <dae@v2raya.org>
|
2023-02-18 17:27:28 +07:00
|
|
|
*/
|
|
|
|
|
|
|
|
package domain_matcher
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2023-04-23 12:27:29 +07:00
|
|
|
"hash/fnv"
|
|
|
|
"math/rand"
|
|
|
|
"reflect"
|
|
|
|
"testing"
|
|
|
|
|
2024-01-01 15:06:53 +07:00
|
|
|
"github.com/daeuniverse/dae/common/assets"
|
|
|
|
|
2023-03-14 14:01:55 +07:00
|
|
|
"github.com/daeuniverse/dae/common/consts"
|
|
|
|
"github.com/daeuniverse/dae/component/routing"
|
|
|
|
"github.com/daeuniverse/dae/config"
|
|
|
|
"github.com/daeuniverse/dae/pkg/config_parser"
|
2023-04-02 10:07:53 +07:00
|
|
|
"github.com/sirupsen/logrus"
|
2023-02-18 17:27:28 +07:00
|
|
|
)
|
|
|
|
|
|
|
|
var TestSample = []string{
|
|
|
|
"9game.cn",
|
|
|
|
"aliapp.org",
|
|
|
|
"alibaba-inc.com",
|
|
|
|
"alibaba.com",
|
|
|
|
"alibabacapital.com",
|
|
|
|
"alibabacorp.com",
|
|
|
|
"alibabadoctor.com",
|
|
|
|
"alibabafuturehotel.com",
|
|
|
|
"alibabagroup.com",
|
|
|
|
"alibabaplanet.com",
|
|
|
|
"alibabaued.com",
|
|
|
|
"alibabausercontent.com",
|
|
|
|
"alifanyi.com",
|
|
|
|
"alihealth.com.cn",
|
|
|
|
"alihealth.hk",
|
|
|
|
"aliimg.com",
|
|
|
|
"51y5.net",
|
|
|
|
"a.adtng.com",
|
|
|
|
"aaxads.com",
|
|
|
|
"addthisedge.com",
|
|
|
|
"adtrue.com",
|
|
|
|
"ad-srv.net",
|
|
|
|
"ad.api.moji.com",
|
|
|
|
"ad.wang502.com",
|
|
|
|
"adbutter.net",
|
|
|
|
"ads.trafficjunky.net",
|
|
|
|
"adtechus.com",
|
|
|
|
"adxprtz.com",
|
|
|
|
"cdn.advertserve.com",
|
|
|
|
"cdn.banclip.com",
|
|
|
|
"cfts1tifqr.com",
|
|
|
|
"contentabc.com",
|
|
|
|
"cretgate.com",
|
|
|
|
"ero-advertising.com",
|
|
|
|
"eroadvertising.com",
|
|
|
|
"exoclick.com",
|
|
|
|
"exosrv.com",
|
|
|
|
"go2.global",
|
|
|
|
"img-bss.csdn.net",
|
|
|
|
"imglnkc.com",
|
|
|
|
"imglnkd.com",
|
|
|
|
"innovid.com",
|
|
|
|
"ja2.gamersky.com",
|
|
|
|
"jl3.yjaxa.top",
|
|
|
|
"juicyads.com",
|
|
|
|
"kepler-37b.com",
|
|
|
|
"lqc006.com",
|
|
|
|
"moat.com",
|
|
|
|
"moatads.com",
|
|
|
|
"realsrv.com",
|
|
|
|
"s4yxaqyq95.com",
|
|
|
|
"shhs-ydd8x2.yjrmss.cn",
|
|
|
|
"static.javhd.com",
|
|
|
|
"tm-banners.gamingadult.com",
|
|
|
|
"trafficfactory.biz",
|
|
|
|
"tsyndicate.com",
|
|
|
|
"abchina.com",
|
|
|
|
"bankcomm.com",
|
|
|
|
"bankofbeijing.com.cn",
|
|
|
|
"bosc.cn",
|
|
|
|
"bsb.com.cn",
|
|
|
|
"ccb.com",
|
|
|
|
"cgbchina.com.cn",
|
|
|
|
"cib.com.cn",
|
|
|
|
"citibank.com.cn",
|
|
|
|
"cmbc.com.cn",
|
|
|
|
"hsbc.com.cn",
|
|
|
|
"hxb.com.cn",
|
|
|
|
"njcb.com.cn",
|
|
|
|
"psbc.com",
|
|
|
|
"spdb.com.cn",
|
|
|
|
"whccb.com",
|
2024-01-01 15:06:53 +07:00
|
|
|
"_https._tcp.mirrors.ustc.edu.cn",
|
|
|
|
"ipv4.master.test-ipv6.com",
|
2023-02-18 17:27:28 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
type RoutingMatcherBuilder struct {
|
|
|
|
simulatedDomainSet []routing.DomainSet
|
|
|
|
Fallback string
|
|
|
|
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *RoutingMatcherBuilder) OutboundToId(outbound string) uint8 {
|
|
|
|
h := fnv.New64()
|
|
|
|
h.Write([]byte(outbound))
|
|
|
|
return uint8(h.Sum64() & 0xFF)
|
|
|
|
}
|
|
|
|
|
2023-02-20 17:24:29 +07:00
|
|
|
func (b *RoutingMatcherBuilder) AddDomain(f *config_parser.Function, key string, values []string, outbound *routing.Outbound) {
|
2023-02-18 17:27:28 +07:00
|
|
|
if b.err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
switch consts.RoutingDomainKey(key) {
|
|
|
|
case consts.RoutingDomainKey_Regex,
|
|
|
|
consts.RoutingDomainKey_Full,
|
|
|
|
consts.RoutingDomainKey_Keyword,
|
|
|
|
consts.RoutingDomainKey_Suffix:
|
|
|
|
default:
|
2023-02-25 01:38:21 +07:00
|
|
|
b.err = fmt.Errorf("addDomain: unsupported key: %v", key)
|
2023-02-18 17:27:28 +07:00
|
|
|
return
|
|
|
|
}
|
|
|
|
b.simulatedDomainSet = append(b.simulatedDomainSet, routing.DomainSet{
|
|
|
|
Key: consts.RoutingDomainKey(key),
|
|
|
|
RuleIndex: len(b.simulatedDomainSet),
|
|
|
|
Domains: values,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func getDomain() (simulatedDomainSet []routing.DomainSet, err error) {
|
|
|
|
var rules []*config_parser.RoutingRule
|
|
|
|
sections, err := config_parser.Parse(`
|
|
|
|
routing {
|
2024-01-01 15:06:53 +07:00
|
|
|
domain(suffix: test-ipv6.com)->direct
|
2023-02-18 17:27:28 +07:00
|
|
|
domain(geosite:bing)->us
|
2024-01-01 15:06:53 +07:00
|
|
|
domain(_https._tcp.mirrors.ustc.edu.cn)->us
|
2023-04-02 10:07:53 +07:00
|
|
|
domain(full:dns.google.com) -> direct
|
2023-02-18 17:27:28 +07:00
|
|
|
domain(geosite:category-ads-all) -> block
|
|
|
|
domain(geosite:cn) -> direct
|
|
|
|
}`)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var r config.Routing
|
2023-02-25 01:38:21 +07:00
|
|
|
if err = config.SectionParser(reflect.ValueOf(&r), sections[0]); err != nil {
|
2023-02-18 17:27:28 +07:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if rules, err = routing.ApplyRulesOptimizers(r.Rules,
|
2023-02-25 02:12:35 +07:00
|
|
|
&routing.AliasOptimizer{},
|
2023-05-30 21:48:36 +07:00
|
|
|
&routing.DatReaderOptimizer{Logger: logrus.StandardLogger(), LocationFinder: assets.NewLocationFinder(nil)},
|
2023-02-18 17:27:28 +07:00
|
|
|
&routing.MergeAndSortRulesOptimizer{},
|
|
|
|
&routing.DeduplicateParamsOptimizer{},
|
|
|
|
); err != nil {
|
|
|
|
return nil, fmt.Errorf("ApplyRulesOptimizers error:\n%w", err)
|
|
|
|
}
|
|
|
|
builder := RoutingMatcherBuilder{}
|
2023-02-25 01:38:21 +07:00
|
|
|
rb := routing.NewRulesBuilder(logrus.StandardLogger())
|
|
|
|
rb.RegisterFunctionParser("domain", func(log *logrus.Logger, f *config_parser.Function, key string, paramValueGroup []string, overrideOutbound *routing.Outbound) (err error) {
|
|
|
|
builder.AddDomain(f, key, paramValueGroup, overrideOutbound)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err = rb.Apply(rules); err != nil {
|
|
|
|
return nil, fmt.Errorf("Apply: %w", err)
|
2023-02-18 17:27:28 +07:00
|
|
|
}
|
|
|
|
return builder.simulatedDomainSet, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkBruteforce(b *testing.B) {
|
|
|
|
b.StopTimer()
|
|
|
|
logrus.SetLevel(logrus.WarnLevel)
|
|
|
|
simulatedDomainSet, err := getDomain()
|
|
|
|
if err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
2023-02-19 13:08:13 +07:00
|
|
|
bf := NewBruteforce(consts.MaxMatchSetLen)
|
|
|
|
for _, domains := range simulatedDomainSet {
|
|
|
|
bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
|
|
|
}
|
|
|
|
if err = bf.Build(); err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
2023-02-18 17:27:28 +07:00
|
|
|
b.StartTimer()
|
|
|
|
runBenchmark(b, bf)
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkGoRegexpNfa(b *testing.B) {
|
|
|
|
b.StopTimer()
|
|
|
|
logrus.SetLevel(logrus.WarnLevel)
|
|
|
|
simulatedDomainSet, err := getDomain()
|
|
|
|
if err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
|
|
|
nfa := NewGoRegexpNfa(consts.MaxMatchSetLen)
|
|
|
|
for _, domains := range simulatedDomainSet {
|
|
|
|
nfa.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
|
|
|
}
|
|
|
|
if err = nfa.Build(); err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
|
|
|
b.StartTimer()
|
|
|
|
runBenchmark(b, nfa)
|
|
|
|
}
|
|
|
|
|
2023-02-18 21:04:28 +07:00
|
|
|
func BenchmarkAhocorasickSlimtrie(b *testing.B) {
|
2023-02-18 17:27:28 +07:00
|
|
|
b.StopTimer()
|
|
|
|
logrus.SetLevel(logrus.WarnLevel)
|
|
|
|
simulatedDomainSet, err := getDomain()
|
|
|
|
if err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
2023-04-23 12:27:29 +07:00
|
|
|
ahocorasick := NewAhocorasickSlimtrie(logrus.StandardLogger(), consts.MaxMatchSetLen)
|
2023-02-18 17:27:28 +07:00
|
|
|
for _, domains := range simulatedDomainSet {
|
|
|
|
ahocorasick.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
|
|
|
}
|
|
|
|
if err = ahocorasick.Build(); err != nil {
|
|
|
|
b.Fatal(err)
|
|
|
|
}
|
|
|
|
b.StartTimer()
|
|
|
|
runBenchmark(b, ahocorasick)
|
|
|
|
}
|
|
|
|
|
|
|
|
func runBenchmark(b *testing.B, matcher routing.DomainMatcher) {
|
|
|
|
rand.Seed(100)
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
sample := TestSample[rand.Intn(len(TestSample))]
|
|
|
|
choice := rand.Intn(10)
|
|
|
|
switch {
|
|
|
|
case choice < 4:
|
|
|
|
addN := rand.Intn(5)
|
|
|
|
buf := make([]byte, addN)
|
|
|
|
for i := range buf {
|
|
|
|
buf[i] = 'a' + byte(rand.Intn('z'-'a'))
|
|
|
|
}
|
|
|
|
sample = string(buf) + "." + sample
|
|
|
|
case choice >= 4 && choice < 6:
|
|
|
|
k := rand.Intn(len(sample))
|
|
|
|
sample = sample[k:]
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
matcher.MatchDomainBitmap(sample)
|
|
|
|
}
|
|
|
|
}
|