fix: domain match

This commit is contained in:
mzz2017
2023-02-19 14:08:13 +08:00
parent 0d9892fff2
commit a011c2a74c
10 changed files with 411 additions and 63 deletions

View File

@ -7,10 +7,9 @@ package domain_matcher
import (
"fmt"
"github.com/openacid/slim/encode"
"github.com/openacid/slim/trie"
"github.com/v2rayA/ahocorasick-domain"
"github.com/v2rayA/dae/common/consts"
"github.com/v2rayA/dae/pkg/trie"
"regexp"
"sort"
"strings"
@ -21,7 +20,7 @@ type AhocorasickSlimtrie struct {
validTrieIndexes []int
validRegexpIndexes []int
ac []*ahocorasick.Matcher
trie []*trie.SlimTrie
trie []*trie.Trie
regexp [][]*regexp.Regexp
toBuildAc [][][]byte
@ -32,7 +31,7 @@ type AhocorasickSlimtrie struct {
func NewAhocorasickSlimtrie(bitLength int) *AhocorasickSlimtrie {
return &AhocorasickSlimtrie{
ac: make([]*ahocorasick.Matcher, bitLength),
trie: make([]*trie.SlimTrie, bitLength),
trie: make([]*trie.Trie, bitLength),
regexp: make([][]*regexp.Regexp, bitLength),
toBuildAc: make([][][]byte, bitLength),
toBuildTrie: make([][]string, bitLength),
@ -86,8 +85,7 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32)
N++
}
bitmap = make([]uint32, N)
// Add magic chars as head and tail.
domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$"
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
// Domain should consist of 'a'-'z' and '.' and '-'
for _, b := range []byte(domain) {
if !ahocorasick.IsValidChar(b) {
@ -95,23 +93,25 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32)
}
}
// Suffix matching.
suffixTrieDomain := ToSuffixTrieString(domain)
suffixTrieDomain := ToSuffixTrieString("^" + domain)
for _, i := range n.validTrieIndexes {
if bitmap[i/32]&(1<<(i%32)) > 0 {
// Already matched.
continue
}
if _, ok := n.trie[i].Get(suffixTrieDomain); ok {
if n.trie[i].HasPrefix(suffixTrieDomain) {
bitmap[i/32] |= 1 << (i % 32)
}
}
// Keyword matching.
// Add magic chars as head and tail.
acDomain := "^" + domain + "$"
for _, i := range n.validAcIndexes {
if bitmap[i/32]&(1<<(i%32)) > 0 {
// Already matched.
continue
}
if n.ac[i].Contains([]byte(domain)) {
if n.ac[i].Contains([]byte(acDomain)) {
bitmap[i/32] |= 1 << (i % 32)
}
}
@ -167,18 +167,13 @@ func (n *AhocorasickSlimtrie) Build() (err error) {
}
// Build succinct trie.
trueValue := true
for i, toBuild := range n.toBuildTrie {
if len(toBuild) == 0 {
continue
}
toBuild = ToSuffixTrieStrings(toBuild)
sort.Strings(toBuild)
n.trie[i], err = trie.NewSlimTrie(encode.Dummy{}, toBuild, nil, trie.Opt{
DedupValue: &trueValue,
// Set opt to complete to avoid false positive.
Complete: &trueValue,
})
n.trie[i] = trie.NewTrie(toBuild)
if err != nil {
return err
}

View File

@ -0,0 +1,59 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package domain_matcher
import (
"github.com/sirupsen/logrus"
"github.com/v2rayA/dae/common/consts"
"golang.org/x/exp/slices"
"math/rand"
"testing"
)
func TestAhocorasickSlimtrie(t *testing.T) {
logrus.SetLevel(logrus.TraceLevel)
simulatedDomainSet, err := getDomain()
if err != nil {
t.Fatal(err)
}
bf := NewBruteforce(consts.MaxMatchSetLen)
actrie := NewAhocorasickSlimtrie(consts.MaxMatchSetLen)
for _, domains := range simulatedDomainSet {
bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
actrie.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
}
if err = bf.Build(); err != nil {
t.Fatal(err)
}
if err = actrie.Build(); err != nil {
t.Fatal(err)
}
rand.Seed(200)
for i := 0; i < 10000; i++ {
sample := TestSample[rand.Intn(len(TestSample))]
choice := rand.Intn(10)
switch {
case choice < 4:
addN := rand.Intn(5)
buf := make([]byte, addN)
for i := range buf {
buf[i] = 'a' + byte(rand.Intn('z'-'a'))
}
sample = string(buf) + "." + sample
case choice >= 4 && choice < 6:
k := rand.Intn(len(sample))
sample = sample[k:]
default:
}
bitmap := bf.MatchDomainBitmap(sample)
bitmap2 := actrie.MatchDomainBitmap(sample)
if !slices.Equal(bitmap, bitmap2) {
t.Fatal(i, sample, bitmap, bitmap2)
}
}
}

View File

@ -172,7 +172,13 @@ func BenchmarkBruteforce(b *testing.B) {
if err != nil {
b.Fatal(err)
}
bf := NewBruteforce(simulatedDomainSet)
bf := NewBruteforce(consts.MaxMatchSetLen)
for _, domains := range simulatedDomainSet {
bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
}
if err = bf.Build(); err != nil {
b.Fatal(err)
}
b.StartTimer()
runBenchmark(b, bf)
}

View File

@ -6,6 +6,7 @@
package domain_matcher
import (
"fmt"
"github.com/v2rayA/dae/common/consts"
"github.com/v2rayA/dae/component/routing"
"regexp"
@ -17,18 +18,31 @@ type Bruteforce struct {
err error
}
func NewBruteforce(simulatedDomainSet []routing.DomainSet) *Bruteforce {
func NewBruteforce(bitLength int) *Bruteforce {
return &Bruteforce{
simulatedDomainSet: simulatedDomainSet,
simulatedDomainSet: make([]routing.DomainSet, bitLength),
}
}
func (n *Bruteforce) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
if n.err != nil {
return
}
if len(n.simulatedDomainSet[bitIndex].Domains) != 0 {
n.err = fmt.Errorf("duplicated RuleIndex: %v", bitIndex)
return
}
n.simulatedDomainSet[bitIndex] = routing.DomainSet{
Key: typ,
RuleIndex: bitIndex,
Domains: patterns,
}
}
func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) {
N := len(n.simulatedDomainSet) / 32
if len(n.simulatedDomainSet)%32 != 0 {
N++
}
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
bitmap = make([]uint32, N)
for _, s := range n.simulatedDomainSet {
for _, d := range s.Domains {
@ -52,6 +66,7 @@ func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) {
}
}
if hit {
//logrus.Traceln(d, s.Key, "matched given", domain)
bitmap[s.RuleIndex/32] |= 1 << (s.RuleIndex % 32)
break
}

View File

@ -372,7 +372,7 @@ func (c *ControlPlane) finishInitDnsUpstreamResolve(raw common.UrlOrEmpty, dnsUp
A: dnsUpstream.Ip4.As4(),
},
}}
if err = c.UpdateDnsCache(fqdn, typ, answers, deadline); err != nil {
if err = c.UpdateDnsCache(dnsUpstream.Hostname, typ, answers, deadline); err != nil {
c = nil
return
}
@ -391,7 +391,7 @@ func (c *ControlPlane) finishInitDnsUpstreamResolve(raw common.UrlOrEmpty, dnsUp
AAAA: dnsUpstream.Ip6.As16(),
},
}}
if err = c.UpdateDnsCache(fqdn, typ, answers, deadline); err != nil {
if err = c.UpdateDnsCache(dnsUpstream.Hostname, typ, answers, deadline); err != nil {
c = nil
return
}

View File

@ -331,12 +331,19 @@ loop:
}
func (c *ControlPlane) UpdateDnsCache(host string, typ dnsmessage.Type, answers []dnsmessage.Resource, deadline time.Time) (err error) {
c.dnsCacheMu.Lock()
fqdn := strings.ToLower(host)
if !strings.HasSuffix(fqdn, ".") {
fqdn += "."
var fqdn string
if strings.HasSuffix(host, ".") {
fqdn = host
host = host[:len(host)-1]
} else {
fqdn = host + "."
}
// Bypass pure IP.
if _, err = netip.ParseAddr(host); err == nil {
return nil
}
cacheKey := fqdn + typ.String()
c.dnsCacheMu.Lock()
cache, ok := c.dnsCache[cacheKey]
if ok {
c.dnsCacheMu.Unlock()

11
go.mod
View File

@ -11,7 +11,6 @@ require (
github.com/json-iterator/go v1.1.12
github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826
github.com/mzz2017/softwind v0.0.0-20230217170818-542cba31602f
github.com/openacid/slim v0.5.11
github.com/safchain/ethtool v0.0.0-20230116090318-67cc41908669
github.com/sirupsen/logrus v1.9.0
github.com/spf13/cobra v1.6.1
@ -20,14 +19,13 @@ require (
github.com/vishvananda/netlink v1.1.0
github.com/x-cray/logrus-prefixed-formatter v0.5.2
golang.org/x/crypto v0.5.0
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
golang.org/x/net v0.5.0
golang.org/x/sys v0.4.0
google.golang.org/protobuf v1.28.1
)
require (
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dgryski/go-camellia v0.0.0-20191119043421-69a8a13fb23d // indirect
github.com/dgryski/go-idea v0.0.0-20170306091226-d2fb45a411fb // indirect
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect
@ -44,22 +42,15 @@ require (
github.com/mzz2017/disk-bloom v1.0.1 // indirect
github.com/onsi/ginkgo v1.16.5 // indirect
github.com/onsi/gomega v1.26.0 // indirect
github.com/openacid/errors v0.8.1 // indirect
github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8 // indirect
github.com/openacid/must v0.1.3 // indirect
github.com/openacid/testkeys v0.1.7 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.1 // indirect
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect
gitlab.com/yawning/chacha20.git v0.0.0-20190903091407-6d1cb28dc72c // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/term v0.4.0 // indirect
golang.org/x/text v0.6.0 // indirect
google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef // indirect
google.golang.org/grpc v1.52.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
//replace github.com/mzz2017/softwind => /home/mzz/goProjects/softwind

29
go.sum
View File

@ -4,8 +4,6 @@ github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls=
github.com/adrg/xdg v0.4.0/go.mod h1:N6ag73EX4wyxeaoeHctc1mas01KZgsj5tYiAIwqJE/E=
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20221202181307-76fa05c21b12 h1:npHgfD4Tl2WJS3AJaMUi5ynGDPUBfkg3U3fCzDyXZ+4=
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20221202181307-76fa05c21b12/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/cilium/ebpf v0.10.0 h1:nk5HPMeoBXtOzbkZBWym+ZWq1GIiHUsBFXxwewXAHLQ=
github.com/cilium/ebpf v0.10.0/go.mod h1:DPiVdY/kT534dgc9ERmvP8mWA+9gvwgKfRvk4nNWnoE=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@ -29,7 +27,6 @@ github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWo
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
@ -39,7 +36,6 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
@ -55,17 +51,12 @@ github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7P
github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.16 h1:bq3VjFmv/sOjHtdEhmkEV4x1AJtvUvOJ2PFAZ5+peKQ=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mdempsky/unconvert v0.0.0-20200228143138-95ecdbfc0b5f/go.mod h1:AmCV4WB3cDMZqgPk+OUQKumliiQS4ZYsBt3AXekyuAU=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI=
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@ -90,22 +81,6 @@ github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7J
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
github.com/onsi/gomega v1.26.0 h1:03cDLK28U6hWvCAns6NeydX3zIm4SF3ci69ulidS32Q=
github.com/onsi/gomega v1.26.0/go.mod h1:r+zV744Re+DiYCIPRlYOTxn0YkOLcAnW8k1xXdMPGhM=
github.com/openacid/errors v0.8.1 h1:Hrj9WENDoj5jP27ZfF60SY5LShbxei+sxKZa0EP+oDw=
github.com/openacid/errors v0.8.1/go.mod h1:GUQEJJOJE3W9skHm8E8Y4phdl2LLEN8iD7c5gcGgdx0=
github.com/openacid/genr v0.1.1/go.mod h1:2B9wMFQKBKZnmo8AR/3JCRGnHs85r4OzeNy0RStLTiU=
github.com/openacid/low v0.1.14/go.mod h1:flqvccAtSrKeD+b5AejKgxCQVhVrsNYEWU7NlkpNCI8=
github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8 h1:3+zXRzfjDtpyHusR04m0dILUGFMwjfYS8Ejs8O0Maf8=
github.com/openacid/low v0.1.22-0.20210130164417-01334eb50af8/go.mod h1:q+MsKI6Pz2xsCkzV4BLj7NR5M4EX0sGz5AqotpZDVh0=
github.com/openacid/must v0.1.3 h1:deanGZVyVwV+ozfwNFbRU5YF7czXeQ67s8GVyZxzKW4=
github.com/openacid/must v0.1.3/go.mod h1:luPiXCuJlEo3UUFQngVQokV0MPGryeYvtCbQPs3U1+I=
github.com/openacid/slim v0.5.11 h1:LIN8ktjSV5/0h9Wai9o30jpzQPPIYZmaRExmt9nGkPU=
github.com/openacid/slim v0.5.11/go.mod h1:ddlyrp5csrPL30DlLp/SjgP4bdgCnmaCmmv4my407VI=
github.com/openacid/tablewriter v0.0.0-20190429071406-b14f71081b86/go.mod h1:iJAvCLjVGFyZOV2Oh123q4PMcoBv2qQLEvjlVIM9E2E=
github.com/openacid/testkeys v0.1.6/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do=
github.com/openacid/testkeys v0.1.7 h1:8mai/cJLsVvBob8K9RrXilkatK4oahfCZdxDJ8CUK7I=
github.com/openacid/testkeys v0.1.7/go.mod h1:MfA7cACzBpbiwekivj8StqX0WIRmqlMsci1c37CA3Do=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
@ -153,7 +128,6 @@ golang.org/x/crypto v0.5.0 h1:U/0M97KRkSFvyD/3FSmdP5W5swImpNgle/EHFhOsQPE=
golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU=
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA=
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@ -185,13 +159,11 @@ golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.4.0 h1:O7UWfv5+A2qiuulQk30kVinPoMtoIPeVaKLEgLpVkvg=
golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k=
golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200225230052-807dcd883420/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@ -211,7 +183,6 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=

185
pkg/trie/trie.go Normal file
View File

@ -0,0 +1,185 @@
// Package succinct is modified from https://github.com/openacid/succinct/blob/loc100/sskv.go.
package trie
import "math/bits"
// Trie is a succinct, sorted and static string set impl with compacted trie as
// storage. The space cost is about half lower than the original data.
//
// # Implementation
//
// It stores sorted strings in a compacted trie(AKA prefix tree).
// A trie node has at most 256 outgoing labels.
// A label is just a single byte.
// E.g., [ab, abc, abcd, axy, buv] is represented with a trie like the following:
// (Numbers are node id)
//
// ^ -a-> 1 -b-> 3 $
// | | `c-> 6 $
// | | `d-> 9 $
// | `x-> 4 -y-> 7 $
// `b-> 2 -u-> 5 -v-> 8 $
//
// Internally it uses a packed []byte and a bitmap with `len([]byte)` bits to
// describe the outgoing labels of a node,:
//
// ^: ab 00
// 1: bx 00
// 2: u 0
// 3: c 0
// 4: y 0
// 5: v 0
// 6: d 0
// 7: ø
// 8: ø
// 9: ø
//
// In storage it packs labels together and bitmaps joined with separator `1`:
//
// labels(ignore space): "ab bx u c y v d"
// label bitmap: 0010010101010101111
//
// Finally leaf nodes are indicated by another bitmap `leaves`, in which a `1`
// at i-th bit indicates the i-th node is a leaf:
//
// leaves: 0001001111
type Trie struct {
leaves, labelBitmap []uint64
labels []byte
ranks, selects []int32
}
// NewTrie creates a new *Trie struct, from a slice of sorted strings.
func NewTrie(keys []string) *Trie {
ss := &Trie{}
lIdx := 0
type qElt struct{ s, e, col int }
queue := []qElt{{0, len(keys), 0}}
for i := 0; i < len(queue); i++ {
elt := queue[i]
if elt.col == len(keys[elt.s]) {
// a leaf node
elt.s++
setBit(&ss.leaves, i, 1)
}
for j := elt.s; j < elt.e; {
frm := j
for ; j < elt.e && keys[j][elt.col] == keys[frm][elt.col]; j++ {
}
queue = append(queue, qElt{frm, j, elt.col + 1})
ss.labels = append(ss.labels, keys[frm][elt.col])
setBit(&ss.labelBitmap, lIdx, 0)
lIdx++
}
setBit(&ss.labelBitmap, lIdx, 1)
lIdx++
}
ss.init()
return ss
}
// HasPrefix query for a word and return whether a prefix of the word is in the Trie.
func (ss *Trie) HasPrefix(word string) bool {
nodeId, bmIdx := 0, 0
for i := 0; i < len(word); i++ {
c := word[i]
for ; ; bmIdx++ {
if getBit(ss.labelBitmap, bmIdx) != 0 {
// no more labels in this node
return false
}
if ss.labels[bmIdx-nodeId] == c {
break
}
}
// go to next level
nodeId = countZeros(ss.labelBitmap, ss.ranks, bmIdx+1)
if getBit(ss.leaves, nodeId) != 0 {
return true
}
bmIdx = selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nodeId-1) + 1
}
return false
}
func setBit(bm *[]uint64, i int, v int) {
for i>>6 >= len(*bm) {
*bm = append(*bm, 0)
}
(*bm)[i>>6] |= uint64(v) << uint(i&63)
}
func getBit(bm []uint64, i int) uint64 {
return bm[i>>6] & (1 << uint(i&63))
}
// init builds pre-calculated cache to speed up rank() and select()
func (ss *Trie) init() {
ss.ranks = []int32{0}
for i := 0; i < len(ss.labelBitmap); i++ {
n := bits.OnesCount64(ss.labelBitmap[i])
ss.ranks = append(ss.ranks, ss.ranks[len(ss.ranks)-1]+int32(n))
}
ss.selects = []int32{}
n := 0
for i := 0; i < len(ss.labelBitmap)<<6; i++ {
z := int(ss.labelBitmap[i>>6]>>uint(i&63)) & 1
if z == 1 && n&63 == 0 {
ss.selects = append(ss.selects, int32(i))
}
n += z
}
}
// countZeros counts the number of "0" in a bitmap before the i-th bit(excluding
// the i-th bit) on behalf of rank index.
// E.g.:
//
// countZeros("010010", 4) == 3
// // 012345
func countZeros(bm []uint64, ranks []int32, i int) int {
return i - int(ranks[i>>6]) - bits.OnesCount64(bm[i>>6]&(1<<uint(i&63)-1))
}
// selectIthOne returns the index of the i-th "1" in a bitmap, on behalf of rank
// and select indexes.
// E.g.:
//
// selectIthOne("010010", 1) == 4
// // 012345
func selectIthOne(bm []uint64, ranks, selects []int32, i int) int {
base := int(selects[i>>6] & ^63)
findIthOne := i - int(ranks[base>>6])
for i := base >> 6; i < len(bm); i++ {
bitIdx := 0
for w := bm[i]; w > 0; {
findIthOne -= int(w & 1)
if findIthOne < 0 {
return i<<6 + bitIdx
}
t0 := bits.TrailingZeros64(w &^ 1)
w >>= uint(t0)
bitIdx += t0
}
}
panic("no more ones")
}

119
pkg/trie/trie_test.go Normal file
View File

@ -0,0 +1,119 @@
/*
* SPDX-License-Identifier: AGPL-3.0-only
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
*/
package trie
import "testing"
func TestTrie(t *testing.T) {
trie := NewTrie([]string{
"moc.cbatnetnoc.",
"moc.cbatnetnoc^",
"nc.",
"ua.moc.cbci.",
"ua.moc.cbci^",
"ua.moc.duolcababila.",
"ua.moc.duolcababila^",
"udiab.",
"udiab^",
"ue.cbci.",
"ue.cbci^",
"uhos.",
"uhos^",
"ul.cbci.",
"ul.cbci^",
"ur.dj.",
"ur.dj^",
"ur.llamt.",
"ur.llamt^",
"ur.sserpxeila.",
"ur.sserpxeila^",
"ur.wocsomcbci.",
"ur.wocsomcbci^",
"vt.32b.",
"vt.32b^",
"vt.akoaix.",
"vt.akoaix^",
"vt.eesia.",
"vt.eesia^",
"vt.eiq.",
"vt.eiq^",
"vt.gca.",
"vt.gca^",
"vt.ilibilib.",
"vt.ilibilib^",
"vt.iqnahz.",
"vt.iqnahz^",
"vt.ixiy.",
"vt.ixiy^",
"vt.low.",
"vt.low^",
"vt.nc361.",
"vt.nc361^",
"vt.obihzgnahs.",
"vt.obihzgnahs^",
"vt.ogmi.",
"vt.ogmi^",
"vt.spp.",
"vt.spp^",
"vt.uohsuhc.",
"vt.uohsuhc^",
"vt.uyuod.",
"vt.uyuod^",
"vt.vtig.",
"vt.vtig^",
"vt.vtnh.",
"vt.vtnh^",
"vt.zcbj.",
"vt.zcbj^",
"wk.moc.cbci.",
"wk.moc.cbci^",
"wt.moc.duolcababila.",
"wt.moc.duolcababila^",
"wt.moc.levarthh.",
"wt.moc.levarthh^",
"xc.f.",
"xc.f^",
"xm.moc.cbci.",
"xm.moc.cbci^",
"yapila.",
"yapila^",
"yl.lacisum.",
"yl.lacisum^",
"ym.moc.duolcababila.",
"ym.moc.duolcababila^",
"ym.pirtc.",
"ym.pirtc^",
"zib.anihcbmc.",
"zib.anihcbmc^",
"zib.duolcsndz.",
"zib.duolcsndz^",
"zib.fmc.",
"zib.fmc^",
"zk.ytamlacbci.",
"zk.ytamlacbci^",
})
if !(trie.HasPrefix("nc.tset^") == true) {
t.Fatal("^test.cn")
}
if !(trie.HasPrefix("nc^") == false) {
t.Fatal("^cn")
}
if !(trie.HasPrefix("nc.") == true) {
t.Fatal(".cn")
}
if !(trie.HasPrefix("nc.^") == true) {
t.Fatal("^.cn")
}
if !(trie.HasPrefix("n") == false) {
t.Fatal("n")
}
if !(trie.HasPrefix("n^") == false) {
t.Fatal("^n")
}
if !(trie.HasPrefix("moc.cbatnetnoc^") == true) {
t.Fatal("contentabc.com")
}
}