diff --git a/component/routing/domain_matcher/ahocorasick_slimtrie.go b/component/routing/domain_matcher/ahocorasick_slimtrie.go index e746b97..2a39cdb 100644 --- a/component/routing/domain_matcher/ahocorasick_slimtrie.go +++ b/component/routing/domain_matcher/ahocorasick_slimtrie.go @@ -7,10 +7,9 @@ package domain_matcher import ( "fmt" - "github.com/openacid/slim/encode" - "github.com/openacid/slim/trie" "github.com/v2rayA/ahocorasick-domain" "github.com/v2rayA/dae/common/consts" + "github.com/v2rayA/dae/pkg/trie" "regexp" "sort" "strings" @@ -21,7 +20,7 @@ type AhocorasickSlimtrie struct { validTrieIndexes []int validRegexpIndexes []int ac []*ahocorasick.Matcher - trie []*trie.SlimTrie + trie []*trie.Trie regexp [][]*regexp.Regexp toBuildAc [][][]byte @@ -32,7 +31,7 @@ type AhocorasickSlimtrie struct { func NewAhocorasickSlimtrie(bitLength int) *AhocorasickSlimtrie { return &AhocorasickSlimtrie{ ac: make([]*ahocorasick.Matcher, bitLength), - trie: make([]*trie.SlimTrie, bitLength), + trie: make([]*trie.Trie, bitLength), regexp: make([][]*regexp.Regexp, bitLength), toBuildAc: make([][][]byte, bitLength), toBuildTrie: make([][]string, bitLength), @@ -86,8 +85,7 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32) N++ } bitmap = make([]uint32, N) - // Add magic chars as head and tail. - domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$" + domain = strings.ToLower(strings.TrimSuffix(domain, ".")) // Domain should consist of 'a'-'z' and '.' and '-' for _, b := range []byte(domain) { if !ahocorasick.IsValidChar(b) { @@ -95,23 +93,25 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32) } } // Suffix matching. - suffixTrieDomain := ToSuffixTrieString(domain) + suffixTrieDomain := ToSuffixTrieString("^" + domain) for _, i := range n.validTrieIndexes { if bitmap[i/32]&(1<<(i%32)) > 0 { // Already matched. continue } - if _, ok := n.trie[i].Get(suffixTrieDomain); ok { + if n.trie[i].HasPrefix(suffixTrieDomain) { bitmap[i/32] |= 1 << (i % 32) } } // Keyword matching. + // Add magic chars as head and tail. + acDomain := "^" + domain + "$" for _, i := range n.validAcIndexes { if bitmap[i/32]&(1<<(i%32)) > 0 { // Already matched. continue } - if n.ac[i].Contains([]byte(domain)) { + if n.ac[i].Contains([]byte(acDomain)) { bitmap[i/32] |= 1 << (i % 32) } } @@ -167,18 +167,13 @@ func (n *AhocorasickSlimtrie) Build() (err error) { } // Build succinct trie. - trueValue := true for i, toBuild := range n.toBuildTrie { if len(toBuild) == 0 { continue } toBuild = ToSuffixTrieStrings(toBuild) sort.Strings(toBuild) - n.trie[i], err = trie.NewSlimTrie(encode.Dummy{}, toBuild, nil, trie.Opt{ - DedupValue: &trueValue, - // Set opt to complete to avoid false positive. - Complete: &trueValue, - }) + n.trie[i] = trie.NewTrie(toBuild) if err != nil { return err } diff --git a/component/routing/domain_matcher/ahocorasick_slimtrie_test.go b/component/routing/domain_matcher/ahocorasick_slimtrie_test.go new file mode 100644 index 0000000..825d1ab --- /dev/null +++ b/component/routing/domain_matcher/ahocorasick_slimtrie_test.go @@ -0,0 +1,59 @@ +/* + * SPDX-License-Identifier: AGPL-3.0-only + * Copyright (c) 2023, v2rayA Organization + */ + +package domain_matcher + +import ( + "github.com/sirupsen/logrus" + "github.com/v2rayA/dae/common/consts" + "golang.org/x/exp/slices" + "math/rand" + "testing" +) + +func TestAhocorasickSlimtrie(t *testing.T) { + + logrus.SetLevel(logrus.TraceLevel) + simulatedDomainSet, err := getDomain() + if err != nil { + t.Fatal(err) + } + bf := NewBruteforce(consts.MaxMatchSetLen) + actrie := NewAhocorasickSlimtrie(consts.MaxMatchSetLen) + for _, domains := range simulatedDomainSet { + bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key) + actrie.AddSet(domains.RuleIndex, domains.Domains, domains.Key) + } + if err = bf.Build(); err != nil { + t.Fatal(err) + } + if err = actrie.Build(); err != nil { + t.Fatal(err) + } + + rand.Seed(200) + for i := 0; i < 10000; i++ { + sample := TestSample[rand.Intn(len(TestSample))] + choice := rand.Intn(10) + switch { + case choice < 4: + addN := rand.Intn(5) + buf := make([]byte, addN) + for i := range buf { + buf[i] = 'a' + byte(rand.Intn('z'-'a')) + } + sample = string(buf) + "." + sample + case choice >= 4 && choice < 6: + k := rand.Intn(len(sample)) + sample = sample[k:] + default: + } + bitmap := bf.MatchDomainBitmap(sample) + bitmap2 := actrie.MatchDomainBitmap(sample) + if !slices.Equal(bitmap, bitmap2) { + t.Fatal(i, sample, bitmap, bitmap2) + } + } +} diff --git a/component/routing/domain_matcher/benchmark_test.go b/component/routing/domain_matcher/benchmark_test.go index 2b0ac70..799c1eb 100644 --- a/component/routing/domain_matcher/benchmark_test.go +++ b/component/routing/domain_matcher/benchmark_test.go @@ -172,7 +172,13 @@ func BenchmarkBruteforce(b *testing.B) { if err != nil { b.Fatal(err) } - bf := NewBruteforce(simulatedDomainSet) + bf := NewBruteforce(consts.MaxMatchSetLen) + for _, domains := range simulatedDomainSet { + bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key) + } + if err = bf.Build(); err != nil { + b.Fatal(err) + } b.StartTimer() runBenchmark(b, bf) } diff --git a/component/routing/domain_matcher/bruteforce.go b/component/routing/domain_matcher/bruteforce.go index ac86aea..fc3749b 100644 --- a/component/routing/domain_matcher/bruteforce.go +++ b/component/routing/domain_matcher/bruteforce.go @@ -6,6 +6,7 @@ package domain_matcher import ( + "fmt" "github.com/v2rayA/dae/common/consts" "github.com/v2rayA/dae/component/routing" "regexp" @@ -17,18 +18,31 @@ type Bruteforce struct { err error } -func NewBruteforce(simulatedDomainSet []routing.DomainSet) *Bruteforce { +func NewBruteforce(bitLength int) *Bruteforce { return &Bruteforce{ - simulatedDomainSet: simulatedDomainSet, + simulatedDomainSet: make([]routing.DomainSet, bitLength), } } func (n *Bruteforce) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) { + if n.err != nil { + return + } + if len(n.simulatedDomainSet[bitIndex].Domains) != 0 { + n.err = fmt.Errorf("duplicated RuleIndex: %v", bitIndex) + return + } + n.simulatedDomainSet[bitIndex] = routing.DomainSet{ + Key: typ, + RuleIndex: bitIndex, + Domains: patterns, + } } func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) { N := len(n.simulatedDomainSet) / 32 if len(n.simulatedDomainSet)%32 != 0 { N++ } + domain = strings.ToLower(strings.TrimSuffix(domain, ".")) bitmap = make([]uint32, N) for _, s := range n.simulatedDomainSet { for _, d := range s.Domains { @@ -52,6 +66,7 @@ func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) { } } if hit { + //logrus.Traceln(d, s.Key, "matched given", domain) bitmap[s.RuleIndex/32] |= 1 << (s.RuleIndex % 32) break } diff --git a/control/control_plane.go b/control/control_plane.go index a3cd3e5..b5ecc01 100644 --- a/control/control_plane.go +++ b/control/control_plane.go @@ -372,7 +372,7 @@ func (c *ControlPlane) finishInitDnsUpstreamResolve(raw common.UrlOrEmpty, dnsUp A: dnsUpstream.Ip4.As4(), }, }} - if err = c.UpdateDnsCache(fqdn, typ, answers, deadline); err != nil { + if err = c.UpdateDnsCache(dnsUpstream.Hostname, typ, answers, deadline); err != nil { c = nil return } @@ -391,7 +391,7 @@ func (c *ControlPlane) finishInitDnsUpstreamResolve(raw common.UrlOrEmpty, dnsUp AAAA: dnsUpstream.Ip6.As16(), }, }} - if err = c.UpdateDnsCache(fqdn, typ, answers, deadline); err != nil { + if err = c.UpdateDnsCache(dnsUpstream.Hostname, typ, answers, deadline); err != nil { c = nil return } diff --git a/control/dns.go b/control/dns.go index 0b279f8..f1523f3 100644 --- a/control/dns.go +++ b/control/dns.go @@ -331,12 +331,19 @@ loop: } func (c *ControlPlane) UpdateDnsCache(host string, typ dnsmessage.Type, answers []dnsmessage.Resource, deadline time.Time) (err error) { - c.dnsCacheMu.Lock() - fqdn := strings.ToLower(host) - if !strings.HasSuffix(fqdn, ".") { - fqdn += "." + var fqdn string + if strings.HasSuffix(host, ".") { + fqdn = host + host = host[:len(host)-1] + } else { + fqdn = host + "." + } + // Bypass pure IP. + if _, err = netip.ParseAddr(host); err == nil { + return nil } cacheKey := fqdn + typ.String() + c.dnsCacheMu.Lock() cache, ok := c.dnsCache[cacheKey] if ok { c.dnsCacheMu.Unlock() diff --git a/go.mod b/go.mod index 6486538..ebd8c41 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/json-iterator/go v1.1.12 github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 github.com/mzz2017/softwind v0.0.0-20230217170818-542cba31602f - github.com/openacid/slim v0.5.11 github.com/safchain/ethtool v0.0.0-20230116090318-67cc41908669 github.com/sirupsen/logrus v1.9.0 github.com/spf13/cobra v1.6.1 @@ -20,14 +19,13 @@ require ( github.com/vishvananda/netlink v1.1.0 github.com/x-cray/logrus-prefixed-formatter v0.5.2 golang.org/x/crypto v0.5.0 + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/net v0.5.0 golang.org/x/sys v0.4.0 google.golang.org/protobuf v1.28.1 ) require ( - github.com/blang/semver v3.5.1+incompatible // indirect - github.com/davecgh/go-spew v1.1.1 // indirect github.com/dgryski/go-camellia v0.0.0-20191119043421-69a8a13fb23d // indirect github.com/dgryski/go-idea v0.0.0-20170306091226-d2fb45a411fb // indirect github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect @@ -44,22 +42,15 @@ require ( github.com/mzz2017/disk-bloom v1.0.1 // indirect github.com/onsi/ginkgo v1.16.5 // indirect github.com/onsi/gomega v1.26.0 // indirect - github.com/openacid/errors v0.8.1 // indirect - github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8 // indirect - github.com/openacid/must v0.1.3 // indirect - github.com/openacid/testkeys v0.1.7 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.8.1 // indirect github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect gitlab.com/yawning/chacha20.git v0.0.0-20190903091407-6d1cb28dc72c // indirect - golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect golang.org/x/term v0.4.0 // indirect golang.org/x/text v0.6.0 // indirect google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef // indirect google.golang.org/grpc v1.52.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect ) //replace github.com/mzz2017/softwind => /home/mzz/goProjects/softwind diff --git a/go.sum b/go.sum index b50e20f..55ea0a5 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,6 @@ github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls= github.com/adrg/xdg v0.4.0/go.mod h1:N6ag73EX4wyxeaoeHctc1mas01KZgsj5tYiAIwqJE/E= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20221202181307-76fa05c21b12 h1:npHgfD4Tl2WJS3AJaMUi5ynGDPUBfkg3U3fCzDyXZ+4= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20221202181307-76fa05c21b12/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= -github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= -github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/cilium/ebpf v0.10.0 h1:nk5HPMeoBXtOzbkZBWym+ZWq1GIiHUsBFXxwewXAHLQ= github.com/cilium/ebpf v0.10.0/go.mod h1:DPiVdY/kT534dgc9ERmvP8mWA+9gvwgKfRvk4nNWnoE= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= @@ -29,7 +27,6 @@ github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWo github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= @@ -39,7 +36,6 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -55,17 +51,12 @@ github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7P github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mdempsky/unconvert v0.0.0-20200228143138-95ecdbfc0b5f/go.mod h1:AmCV4WB3cDMZqgPk+OUQKumliiQS4ZYsBt3AXekyuAU= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -90,22 +81,6 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.26.0 h1:03cDLK28U6hWvCAns6NeydX3zIm4SF3ci69ulidS32Q= github.com/onsi/gomega v1.26.0/go.mod h1:r+zV744Re+DiYCIPRlYOTxn0YkOLcAnW8k1xXdMPGhM= -github.com/openacid/errors v0.8.1 h1:Hrj9WENDoj5jP27ZfF60SY5LShbxei+sxKZa0EP+oDw= -github.com/openacid/errors v0.8.1/go.mod h1:GUQEJJOJE3W9skHm8E8Y4phdl2LLEN8iD7c5gcGgdx0= -github.com/openacid/genr v0.1.1/go.mod h1:2B9wMFQKBKZnmo8AR/3JCRGnHs85r4OzeNy0RStLTiU= -github.com/openacid/low v0.1.14/go.mod h1:flqvccAtSrKeD+b5AejKgxCQVhVrsNYEWU7NlkpNCI8= -github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8 h1:3+zXRzfjDtpyHusR04m0dILUGFMwjfYS8Ejs8O0Maf8= -github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8/go.mod h1:q+MsKI6Pz2xsCkzV4BLj7NR5M4EX0sGz5AqotpZDVh0= -github.com/openacid/must v0.1.3 h1:deanGZVyVwV+ozfwNFbRU5YF7czXeQ67s8GVyZxzKW4= -github.com/openacid/must v0.1.3/go.mod h1:luPiXCuJlEo3UUFQngVQokV0MPGryeYvtCbQPs3U1+I= -github.com/openacid/slim v0.5.11 h1:LIN8ktjSV5/0h9Wai9o30jpzQPPIYZmaRExmt9nGkPU= -github.com/openacid/slim v0.5.11/go.mod h1:ddlyrp5csrPL30DlLp/SjgP4bdgCnmaCmmv4my407VI= -github.com/openacid/tablewriter v0.0.0-20190429071406-b14f71081b86/go.mod h1:iJAvCLjVGFyZOV2Oh123q4PMcoBv2qQLEvjlVIM9E2E= -github.com/openacid/testkeys v0.1.6/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do= -github.com/openacid/testkeys v0.1.7 h1:8mai/cJLsVvBob8K9RrXilkatK4oahfCZdxDJ8CUK7I= -github.com/openacid/testkeys v0.1.7/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= @@ -153,7 +128,6 @@ golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE= golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -185,13 +159,11 @@ golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.4.0 h1:O7UWfv5+A2qiuulQk30kVinPoMtoIPeVaKLEgLpVkvg= golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -211,7 +183,6 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= diff --git a/pkg/trie/trie.go b/pkg/trie/trie.go new file mode 100644 index 0000000..c472fec --- /dev/null +++ b/pkg/trie/trie.go @@ -0,0 +1,185 @@ +// Package succinct is modified from https://github.com/openacid/succinct/blob/loc100/sskv.go. +package trie + +import "math/bits" + +// Trie is a succinct, sorted and static string set impl with compacted trie as +// storage. The space cost is about half lower than the original data. +// +// # Implementation +// +// It stores sorted strings in a compacted trie(AKA prefix tree). +// A trie node has at most 256 outgoing labels. +// A label is just a single byte. +// E.g., [ab, abc, abcd, axy, buv] is represented with a trie like the following: +// (Numbers are node id) +// +// ^ -a-> 1 -b-> 3 $ +// | | `c-> 6 $ +// | | `d-> 9 $ +// | `x-> 4 -y-> 7 $ +// `b-> 2 -u-> 5 -v-> 8 $ +// +// Internally it uses a packed []byte and a bitmap with `len([]byte)` bits to +// describe the outgoing labels of a node,: +// +// ^: ab 00 +// 1: bx 00 +// 2: u 0 +// 3: c 0 +// 4: y 0 +// 5: v 0 +// 6: d 0 +// 7: ø +// 8: ø +// 9: ø +// +// In storage it packs labels together and bitmaps joined with separator `1`: +// +// labels(ignore space): "ab bx u c y v d" +// label bitmap: 0010010101010101111 +// +// Finally leaf nodes are indicated by another bitmap `leaves`, in which a `1` +// at i-th bit indicates the i-th node is a leaf: +// +// leaves: 0001001111 +type Trie struct { + leaves, labelBitmap []uint64 + labels []byte + ranks, selects []int32 +} + +// NewTrie creates a new *Trie struct, from a slice of sorted strings. +func NewTrie(keys []string) *Trie { + + ss := &Trie{} + lIdx := 0 + + type qElt struct{ s, e, col int } + + queue := []qElt{{0, len(keys), 0}} + + for i := 0; i < len(queue); i++ { + elt := queue[i] + + if elt.col == len(keys[elt.s]) { + // a leaf node + elt.s++ + setBit(&ss.leaves, i, 1) + } + + for j := elt.s; j < elt.e; { + + frm := j + + for ; j < elt.e && keys[j][elt.col] == keys[frm][elt.col]; j++ { + } + + queue = append(queue, qElt{frm, j, elt.col + 1}) + ss.labels = append(ss.labels, keys[frm][elt.col]) + setBit(&ss.labelBitmap, lIdx, 0) + lIdx++ + } + + setBit(&ss.labelBitmap, lIdx, 1) + lIdx++ + } + + ss.init() + return ss +} + +// HasPrefix query for a word and return whether a prefix of the word is in the Trie. +func (ss *Trie) HasPrefix(word string) bool { + + nodeId, bmIdx := 0, 0 + + for i := 0; i < len(word); i++ { + c := word[i] + for ; ; bmIdx++ { + if getBit(ss.labelBitmap, bmIdx) != 0 { + // no more labels in this node + return false + } + + if ss.labels[bmIdx-nodeId] == c { + break + } + } + + // go to next level + + nodeId = countZeros(ss.labelBitmap, ss.ranks, bmIdx+1) + if getBit(ss.leaves, nodeId) != 0 { + return true + } + bmIdx = selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nodeId-1) + 1 + } + + return false +} + +func setBit(bm *[]uint64, i int, v int) { + for i>>6 >= len(*bm) { + *bm = append(*bm, 0) + } + (*bm)[i>>6] |= uint64(v) << uint(i&63) +} + +func getBit(bm []uint64, i int) uint64 { + return bm[i>>6] & (1 << uint(i&63)) +} + +// init builds pre-calculated cache to speed up rank() and select() +func (ss *Trie) init() { + ss.ranks = []int32{0} + for i := 0; i < len(ss.labelBitmap); i++ { + n := bits.OnesCount64(ss.labelBitmap[i]) + ss.ranks = append(ss.ranks, ss.ranks[len(ss.ranks)-1]+int32(n)) + } + + ss.selects = []int32{} + n := 0 + for i := 0; i < len(ss.labelBitmap)<<6; i++ { + z := int(ss.labelBitmap[i>>6]>>uint(i&63)) & 1 + if z == 1 && n&63 == 0 { + ss.selects = append(ss.selects, int32(i)) + } + n += z + } +} + +// countZeros counts the number of "0" in a bitmap before the i-th bit(excluding +// the i-th bit) on behalf of rank index. +// E.g.: +// +// countZeros("010010", 4) == 3 +// // 012345 +func countZeros(bm []uint64, ranks []int32, i int) int { + return i - int(ranks[i>>6]) - bits.OnesCount64(bm[i>>6]&(1<>6] & ^63) + findIthOne := i - int(ranks[base>>6]) + + for i := base >> 6; i < len(bm); i++ { + bitIdx := 0 + for w := bm[i]; w > 0; { + findIthOne -= int(w & 1) + if findIthOne < 0 { + return i<<6 + bitIdx + } + t0 := bits.TrailingZeros64(w &^ 1) + w >>= uint(t0) + bitIdx += t0 + } + } + panic("no more ones") +} diff --git a/pkg/trie/trie_test.go b/pkg/trie/trie_test.go new file mode 100644 index 0000000..4dac7ab --- /dev/null +++ b/pkg/trie/trie_test.go @@ -0,0 +1,119 @@ +/* + * SPDX-License-Identifier: AGPL-3.0-only + * Copyright (c) 2023, v2rayA Organization + */ + +package trie + +import "testing" + +func TestTrie(t *testing.T) { + trie := NewTrie([]string{ + "moc.cbatnetnoc.", + "moc.cbatnetnoc^", + "nc.", + "ua.moc.cbci.", + "ua.moc.cbci^", + "ua.moc.duolcababila.", + "ua.moc.duolcababila^", + "udiab.", + "udiab^", + "ue.cbci.", + "ue.cbci^", + "uhos.", + "uhos^", + "ul.cbci.", + "ul.cbci^", + "ur.dj.", + "ur.dj^", + "ur.llamt.", + "ur.llamt^", + "ur.sserpxeila.", + "ur.sserpxeila^", + "ur.wocsomcbci.", + "ur.wocsomcbci^", + "vt.32b.", + "vt.32b^", + "vt.akoaix.", + "vt.akoaix^", + "vt.eesia.", + "vt.eesia^", + "vt.eiq.", + "vt.eiq^", + "vt.gca.", + "vt.gca^", + "vt.ilibilib.", + "vt.ilibilib^", + "vt.iqnahz.", + "vt.iqnahz^", + "vt.ixiy.", + "vt.ixiy^", + "vt.low.", + "vt.low^", + "vt.nc361.", + "vt.nc361^", + "vt.obihzgnahs.", + "vt.obihzgnahs^", + "vt.ogmi.", + "vt.ogmi^", + "vt.spp.", + "vt.spp^", + "vt.uohsuhc.", + "vt.uohsuhc^", + "vt.uyuod.", + "vt.uyuod^", + "vt.vtig.", + "vt.vtig^", + "vt.vtnh.", + "vt.vtnh^", + "vt.zcbj.", + "vt.zcbj^", + "wk.moc.cbci.", + "wk.moc.cbci^", + "wt.moc.duolcababila.", + "wt.moc.duolcababila^", + "wt.moc.levarthh.", + "wt.moc.levarthh^", + "xc.f.", + "xc.f^", + "xm.moc.cbci.", + "xm.moc.cbci^", + "yapila.", + "yapila^", + "yl.lacisum.", + "yl.lacisum^", + "ym.moc.duolcababila.", + "ym.moc.duolcababila^", + "ym.pirtc.", + "ym.pirtc^", + "zib.anihcbmc.", + "zib.anihcbmc^", + "zib.duolcsndz.", + "zib.duolcsndz^", + "zib.fmc.", + "zib.fmc^", + "zk.ytamlacbci.", + "zk.ytamlacbci^", + }) + if !(trie.HasPrefix("nc.tset^") == true) { + t.Fatal("^test.cn") + } + if !(trie.HasPrefix("nc^") == false) { + t.Fatal("^cn") + } + if !(trie.HasPrefix("nc.") == true) { + t.Fatal(".cn") + } + if !(trie.HasPrefix("nc.^") == true) { + t.Fatal("^.cn") + } + if !(trie.HasPrefix("n") == false) { + t.Fatal("n") + } + if !(trie.HasPrefix("n^") == false) { + t.Fatal("^n") + } + if !(trie.HasPrefix("moc.cbatnetnoc^") == true) { + t.Fatal("contentabc.com") + } +}