mirror of
https://github.com/daeuniverse/dae.git
synced 2025-07-04 15:27:55 +07:00
optimize: memory occupation
This commit is contained in:
@ -1,125 +0,0 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
|
||||
*/
|
||||
|
||||
package domain_matcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/cloudflare/ahocorasick"
|
||||
"github.com/v2rayA/dae/common/consts"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Ahocorasick struct {
|
||||
validIndexes []int
|
||||
validRegexpIndexes []int
|
||||
matchers []*ahocorasick.Matcher
|
||||
regexp [][]*regexp.Regexp
|
||||
|
||||
toBuild [][][]byte
|
||||
err error
|
||||
}
|
||||
|
||||
func NewAhocorasick(bitLength int) *Ahocorasick {
|
||||
return &Ahocorasick{
|
||||
matchers: make([]*ahocorasick.Matcher, bitLength),
|
||||
toBuild: make([][][]byte, bitLength),
|
||||
regexp: make([][]*regexp.Regexp, bitLength),
|
||||
}
|
||||
}
|
||||
func (n *Ahocorasick) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
|
||||
if n.err != nil {
|
||||
return
|
||||
}
|
||||
switch typ {
|
||||
case consts.RoutingDomainKey_Full:
|
||||
for _, d := range patterns {
|
||||
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("^"+d+"$"))
|
||||
}
|
||||
case consts.RoutingDomainKey_Suffix:
|
||||
for _, d := range patterns {
|
||||
if strings.HasPrefix(d, ".") {
|
||||
// abc.example.com
|
||||
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte(d+"$"))
|
||||
} else {
|
||||
// xxx.example.com
|
||||
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("."+d+"$"))
|
||||
// example.com
|
||||
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte("^"+d+"$"))
|
||||
}
|
||||
}
|
||||
case consts.RoutingDomainKey_Keyword:
|
||||
for _, d := range patterns {
|
||||
n.toBuild[bitIndex] = append(n.toBuild[bitIndex], []byte(d))
|
||||
}
|
||||
case consts.RoutingDomainKey_Regex:
|
||||
for _, d := range patterns {
|
||||
r, err := regexp.Compile(d)
|
||||
if err != nil {
|
||||
n.err = fmt.Errorf("failed to compile regex: %v", d)
|
||||
return
|
||||
}
|
||||
n.regexp[bitIndex] = append(n.regexp[bitIndex], r)
|
||||
}
|
||||
default:
|
||||
n.err = fmt.Errorf("unknown RoutingDomainKey: %v", typ)
|
||||
return
|
||||
}
|
||||
}
|
||||
func (n *Ahocorasick) MatchDomainBitmap(domain string) (bitmap []uint32) {
|
||||
N := len(n.matchers) / 32
|
||||
if len(n.matchers)%32 != 0 {
|
||||
N++
|
||||
}
|
||||
bitmap = make([]uint32, N)
|
||||
// Domain should not contain ^ or $.
|
||||
if strings.ContainsAny(domain, "^$") {
|
||||
return bitmap
|
||||
}
|
||||
// Add magic chars as head and tail.
|
||||
domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$"
|
||||
for _, i := range n.validIndexes {
|
||||
if hits := n.matchers[i].MatchThreadSafe([]byte(domain)); len(hits) > 0 {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
}
|
||||
}
|
||||
// Regex matching is independent.
|
||||
for _, i := range n.validRegexpIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
for _, r := range n.regexp[i] {
|
||||
if r.MatchString(domain) {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return bitmap
|
||||
}
|
||||
func (n *Ahocorasick) Build() error {
|
||||
if n.err != nil {
|
||||
return n.err
|
||||
}
|
||||
n.validIndexes = make([]int, 0, len(n.toBuild)/8)
|
||||
for i, toBuild := range n.toBuild {
|
||||
if len(toBuild) == 0 {
|
||||
continue
|
||||
}
|
||||
n.matchers[i] = ahocorasick.NewMatcher(toBuild)
|
||||
n.validIndexes = append(n.validIndexes, i)
|
||||
}
|
||||
for i := range n.regexp {
|
||||
if len(n.regexp[i]) == 0 {
|
||||
continue
|
||||
}
|
||||
n.validRegexpIndexes = append(n.validRegexpIndexes, i)
|
||||
}
|
||||
// Release it.
|
||||
n.toBuild = nil
|
||||
return nil
|
||||
}
|
195
component/routing/domain_matcher/ahocorasick_succinctset.go
Normal file
195
component/routing/domain_matcher/ahocorasick_succinctset.go
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
|
||||
*/
|
||||
|
||||
package domain_matcher
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/openacid/slim/encode"
|
||||
"github.com/openacid/slim/trie"
|
||||
"github.com/v2rayA/ahocorasick-domain"
|
||||
"github.com/v2rayA/dae/common/consts"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type AhocorasickSuccinctset struct {
|
||||
validAcIndexes []int
|
||||
validTrieIndexes []int
|
||||
validRegexpIndexes []int
|
||||
ac []*ahocorasick.Matcher
|
||||
trie []*trie.SlimTrie
|
||||
regexp [][]*regexp.Regexp
|
||||
|
||||
toBuildAc [][][]byte
|
||||
toBuildTrie [][]string
|
||||
err error
|
||||
}
|
||||
|
||||
func NewAhocorasickSuccinctset(bitLength int) *AhocorasickSuccinctset {
|
||||
return &AhocorasickSuccinctset{
|
||||
ac: make([]*ahocorasick.Matcher, bitLength),
|
||||
trie: make([]*trie.SlimTrie, bitLength),
|
||||
regexp: make([][]*regexp.Regexp, bitLength),
|
||||
toBuildAc: make([][][]byte, bitLength),
|
||||
toBuildTrie: make([][]string, bitLength),
|
||||
}
|
||||
}
|
||||
func (n *AhocorasickSuccinctset) AddSet(bitIndex int, patterns []string, typ consts.RoutingDomainKey) {
|
||||
if n.err != nil {
|
||||
return
|
||||
}
|
||||
switch typ {
|
||||
case consts.RoutingDomainKey_Full:
|
||||
for _, d := range patterns {
|
||||
n.toBuildTrie[bitIndex] = append(n.toBuildTrie[bitIndex], "^"+d+"$")
|
||||
}
|
||||
case consts.RoutingDomainKey_Suffix:
|
||||
for _, d := range patterns {
|
||||
if strings.HasPrefix(d, ".") {
|
||||
// abc.example.com
|
||||
n.toBuildTrie[bitIndex] = append(n.toBuildTrie[bitIndex], d+"$")
|
||||
// cannot match example.com
|
||||
} else {
|
||||
// xxx.example.com
|
||||
n.toBuildTrie[bitIndex] = append(n.toBuildTrie[bitIndex], "."+d+"$")
|
||||
// example.com
|
||||
n.toBuildTrie[bitIndex] = append(n.toBuildTrie[bitIndex], "^"+d+"$")
|
||||
// cannot match abcexample.com
|
||||
}
|
||||
}
|
||||
case consts.RoutingDomainKey_Keyword:
|
||||
// Only use ac automaton for "keyword" matching to save memory.
|
||||
for _, d := range patterns {
|
||||
n.toBuildAc[bitIndex] = append(n.toBuildAc[bitIndex], []byte(d))
|
||||
}
|
||||
case consts.RoutingDomainKey_Regex:
|
||||
for _, d := range patterns {
|
||||
r, err := regexp.Compile(d)
|
||||
if err != nil {
|
||||
n.err = fmt.Errorf("failed to compile regex: %v", d)
|
||||
return
|
||||
}
|
||||
n.regexp[bitIndex] = append(n.regexp[bitIndex], r)
|
||||
}
|
||||
default:
|
||||
n.err = fmt.Errorf("unknown RoutingDomainKey: %v", typ)
|
||||
return
|
||||
}
|
||||
}
|
||||
func (n *AhocorasickSuccinctset) MatchDomainBitmap(domain string) (bitmap []uint32) {
|
||||
N := len(n.ac) / 32
|
||||
if len(n.ac)%32 != 0 {
|
||||
N++
|
||||
}
|
||||
bitmap = make([]uint32, N)
|
||||
// Add magic chars as head and tail.
|
||||
domain = "^" + strings.ToLower(strings.TrimSuffix(domain, ".")) + "$"
|
||||
// Domain should consist of 'a'-'z' and '.' and '-'
|
||||
for _, b := range []byte(domain) {
|
||||
if !ahocorasick.IsValidChar(b) {
|
||||
return bitmap
|
||||
}
|
||||
}
|
||||
// Suffix matching.
|
||||
suffixTrieDomain := ToSuffixTrieString(domain)
|
||||
for _, i := range n.validTrieIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
if _, ok := n.trie[i].Get(suffixTrieDomain); ok {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
}
|
||||
}
|
||||
// Keyword matching.
|
||||
for _, i := range n.validAcIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
if hits := n.ac[i].MatchThreadSafe([]byte(domain)); len(hits) > 0 {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
}
|
||||
}
|
||||
// Regex matching.
|
||||
for _, i := range n.validRegexpIndexes {
|
||||
if bitmap[i/32]&(1<<(i%32)) > 0 {
|
||||
// Already matched.
|
||||
continue
|
||||
}
|
||||
for _, r := range n.regexp[i] {
|
||||
if r.MatchString(domain) {
|
||||
bitmap[i/32] |= 1 << (i % 32)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return bitmap
|
||||
}
|
||||
func ToSuffixTrieString(s string) string {
|
||||
// No need for end char "$".
|
||||
b := []byte(strings.TrimSuffix(s, "$"))
|
||||
// Reverse.
|
||||
half := len(b) / 2
|
||||
for i := 0; i < half; i++ {
|
||||
b[i], b[len(b)-i-1] = b[len(b)-i-1], b[i]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
func ToSuffixTrieStrings(s []string) []string {
|
||||
to := make([]string, len(s))
|
||||
for i := range s {
|
||||
to[i] = ToSuffixTrieString(s[i])
|
||||
}
|
||||
return to
|
||||
}
|
||||
func (n *AhocorasickSuccinctset) Build() (err error) {
|
||||
if n.err != nil {
|
||||
return n.err
|
||||
}
|
||||
n.validAcIndexes = make([]int, 0, len(n.toBuildAc)/8)
|
||||
n.validTrieIndexes = make([]int, 0, len(n.toBuildAc)/8)
|
||||
n.validRegexpIndexes = make([]int, 0, len(n.toBuildAc)/8)
|
||||
// Build AC automaton.
|
||||
for i, toBuild := range n.toBuildAc {
|
||||
if len(toBuild) == 0 {
|
||||
continue
|
||||
}
|
||||
n.ac[i], err = ahocorasick.NewMatcher(toBuild)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.validAcIndexes = append(n.validAcIndexes, i)
|
||||
}
|
||||
|
||||
// Build succinct trie.
|
||||
for i, toBuild := range n.toBuildTrie {
|
||||
if len(toBuild) == 0 {
|
||||
continue
|
||||
}
|
||||
toBuild = ToSuffixTrieStrings(toBuild)
|
||||
sort.Strings(toBuild)
|
||||
n.trie[i], err = trie.NewSlimTrie(encode.I8{}, toBuild, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.validTrieIndexes = append(n.validTrieIndexes, i)
|
||||
}
|
||||
|
||||
// Regexp.
|
||||
for i := range n.regexp {
|
||||
if len(n.regexp[i]) == 0 {
|
||||
continue
|
||||
}
|
||||
n.validRegexpIndexes = append(n.validRegexpIndexes, i)
|
||||
}
|
||||
|
||||
// Release unused data.
|
||||
n.toBuildAc = nil
|
||||
n.toBuildTrie = nil
|
||||
return nil
|
||||
}
|
@ -195,14 +195,14 @@ func BenchmarkGoRegexpNfa(b *testing.B) {
|
||||
runBenchmark(b, nfa)
|
||||
}
|
||||
|
||||
func BenchmarkAhocorasick(b *testing.B) {
|
||||
func BenchmarkAhocorasickSuccinctset(b *testing.B) {
|
||||
b.StopTimer()
|
||||
logrus.SetLevel(logrus.WarnLevel)
|
||||
simulatedDomainSet, err := getDomain()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
ahocorasick := NewAhocorasick(consts.MaxMatchSetLen)
|
||||
ahocorasick := NewAhocorasickSuccinctset(consts.MaxMatchSetLen)
|
||||
for _, domains := range simulatedDomainSet {
|
||||
ahocorasick.AddSet(domains.RuleIndex, domains.Domains, domains.Key)
|
||||
}
|
||||
|
Reference in New Issue
Block a user