mirror of
https://github.com/daeuniverse/dae.git
synced 2025-07-13 09:18:55 +07:00
fix: domain match
This commit is contained in:
@ -7,10 +7,9 @@ package domain_matcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/openacid/slim/encode"
|
||||
"github.com/openacid/slim/trie"
|
||||
"github.com/v2rayA/ahocorasick-domain"
|
||||
"github.com/v2rayA/dae/common/consts"
|
||||
"github.com/v2rayA/dae/pkg/trie"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -21,7 +20,7 @@ type AhocorasickSlimtrie struct {
|
||||
validTrieIndexes []int
|
||||
validRegexpIndexes []int
|
||||
ac []*ahocorasick.Matcher
|
||||
trie []*trie.SlimTrie
|
||||
trie []*trie.Trie
|
||||
regexp [][]*regexp.Regexp
|
||||
|
||||
toBuildAc [][][]byte
|
||||
@ -32,7 +31,7 @@ type AhocorasickSlimtrie struct {
|
||||
func NewAhocorasickSlimtrie(bitLength int) *AhocorasickSlimtrie {
|
||||
return &AhocorasickSlimtrie{
|
||||
ac: make([]*ahocorasick.Matcher, bitLength),
|
||||
trie: make([]*trie.SlimTrie, bitLength),
|
||||
trie: make([]*trie.Trie, bitLength),
|
||||
regexp: make([][]*regexp.Regexp, bitLength),
|
||||
toBuildAc: make([][][]byte, bitLength),
|
||||
toBuildTrie: make([][]string, bitLength),
|
||||
@ -86,8 +85,7 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32)
|
||||
N++
|
||||
}
|
||||
bitmap = make([]uint32, N)
|
||||
// Add magic chars as head and tail.
|
||||
domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$"
|
||||
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
|
||||
// Domain should consist of 'a'-'z' and '.' and '-'
|
||||
for _, b := range []byte(domain) {
|
||||
if !ahocorasick.IsValidChar(b) {
|
||||
@ -95,23 +93,25 @@ func (n *AhocorasickSlimtrie) MatchDomainBitmap(domain string) (bitmap []uint32)
|
||||
}
|
||||
}
|
||||
// Suffix matching.
|
||||
suffixTrieDomain := ToSuffixTrieString(domain)
|
||||
suffixTrieDomain := ToSuffixTrieString("^" + domain)
|
||||
for _, i := range n.validTrieIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
if _, ok := n.trie[i].Get(suffixTrieDomain); ok {
|
||||
if n.trie[i].HasPrefix(suffixTrieDomain) {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
}
|
||||
}
|
||||
// Keyword matching.
|
||||
// Add magic chars as head and tail.
|
||||
acDomain := "^" + domain + "$"
|
||||
for _, i := range n.validAcIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
if n.ac[i].Contains([]byte(domain)) {
|
||||
if n.ac[i].Contains([]byte(acDomain)) {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
}
|
||||
}
|
||||
@ -167,18 +167,13 @@ func (n *AhocorasickSlimtrie) Build() (err error) {
|
||||
}
|
||||
|
||||
// Build succinct trie.
|
||||
trueValue := true
|
||||
for i, toBuild := range n.toBuildTrie {
|
||||
if len(toBuild) == 0 {
|
||||
continue
|
||||
}
|
||||
toBuild = ToSuffixTrieStrings(toBuild)
|
||||
sort.Strings(toBuild)
|
||||
n.trie[i], err = trie.NewSlimTrie(encode.Dummy{}, toBuild, nil, trie.Opt{
|
||||
DedupValue: &trueValue,
|
||||
// Set opt to complete to avoid false positive.
|
||||
Complete: &trueValue,
|
||||
})
|
||||
n.trie[i] = trie.NewTrie(toBuild)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
|
||||
*/
|
||||
|
||||
package domain_matcher
|
||||
|
||||
import (
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/v2rayA/dae/common/consts"
|
||||
"golang.org/x/exp/slices"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAhocorasickSlimtrie(t *testing.T) {
|
||||
|
||||
logrus.SetLevel(logrus.TraceLevel)
|
||||
simulatedDomainSet, err := getDomain()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
bf := NewBruteforce(consts.MaxMatchSetLen)
|
||||
actrie := NewAhocorasickSlimtrie(consts.MaxMatchSetLen)
|
||||
for _, domains := range simulatedDomainSet {
|
||||
bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
||||
actrie.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
||||
}
|
||||
if err = bf.Build(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = actrie.Build(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rand.Seed(200)
|
||||
for i := 0; i < 10000; i++ {
|
||||
sample := TestSample[rand.Intn(len(TestSample))]
|
||||
choice := rand.Intn(10)
|
||||
switch {
|
||||
case choice < 4:
|
||||
addN := rand.Intn(5)
|
||||
buf := make([]byte, addN)
|
||||
for i := range buf {
|
||||
buf[i] = 'a' + byte(rand.Intn('z'-'a'))
|
||||
}
|
||||
sample = string(buf) + "." + sample
|
||||
case choice >= 4 && choice < 6:
|
||||
k := rand.Intn(len(sample))
|
||||
sample = sample[k:]
|
||||
default:
|
||||
}
|
||||
bitmap := bf.MatchDomainBitmap(sample)
|
||||
bitmap2 := actrie.MatchDomainBitmap(sample)
|
||||
if !slices.Equal(bitmap, bitmap2) {
|
||||
t.Fatal(i, sample, bitmap, bitmap2)
|
||||
}
|
||||
}
|
||||
}
|
@ -172,7 +172,13 @@ func BenchmarkBruteforce(b *testing.B) {
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
bf := NewBruteforce(simulatedDomainSet)
|
||||
bf := NewBruteforce(consts.MaxMatchSetLen)
|
||||
for _, domains := range simulatedDomainSet {
|
||||
bf.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
||||
}
|
||||
if err = bf.Build(); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.StartTimer()
|
||||
runBenchmark(b, bf)
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
package domain_matcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/v2rayA/dae/common/consts"
|
||||
"github.com/v2rayA/dae/component/routing"
|
||||
"regexp"
|
||||
@ -17,18 +18,31 @@ type Bruteforce struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func NewBruteforce(simulatedDomainSet []routing.DomainSet) *Bruteforce {
|
||||
func NewBruteforce(bitLength int) *Bruteforce {
|
||||
return &Bruteforce{
|
||||
simulatedDomainSet: simulatedDomainSet,
|
||||
simulatedDomainSet: make([]routing.DomainSet, bitLength),
|
||||
}
|
||||
}
|
||||
func (n *Bruteforce) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
|
||||
if n.err != nil {
|
||||
return
|
||||
}
|
||||
if len(n.simulatedDomainSet[bitIndex].Domains) != 0 {
|
||||
n.err = fmt.Errorf("duplicated RuleIndex: %v", bitIndex)
|
||||
return
|
||||
}
|
||||
n.simulatedDomainSet[bitIndex] = routing.DomainSet{
|
||||
Key: typ,
|
||||
RuleIndex: bitIndex,
|
||||
Domains: patterns,
|
||||
}
|
||||
}
|
||||
func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) {
|
||||
N := len(n.simulatedDomainSet) / 32
|
||||
if len(n.simulatedDomainSet)%32 != 0 {
|
||||
N++
|
||||
}
|
||||
domain = strings.ToLower(strings.TrimSuffix(domain, "."))
|
||||
bitmap = make([]uint32, N)
|
||||
for _, s := range n.simulatedDomainSet {
|
||||
for _, d := range s.Domains {
|
||||
@ -52,6 +66,7 @@ func (n *Bruteforce) MatchDomainBitmap(domain string) (bitmap []uint32) {
|
||||
}
|
||||
}
|
||||
if hit {
|
||||
//logrus.Traceln(d, s.Key, "matched given", domain)
|
||||
bitmap[s.RuleIndex/32] |= 1 << (s.RuleIndex % 32)
|
||||
break
|
||||
}
|
||||
|
Reference in New Issue
Block a user