mirror of
https://github.com/daeuniverse/dae.git
synced 2025-07-31 15:19:45 +07:00
fix: domain match
This commit is contained in:
185
pkg/trie/trie.go
Normal file
185
pkg/trie/trie.go
Normal file
@ -0,0 +1,185 @@
|
||||
// Package succinct is modified from https://github.com/openacid/succinct/blob/loc100/sskv.go.
|
||||
package trie
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// Trie is a succinct, sorted and static string set impl with compacted trie as
|
||||
// storage. The space cost is about half lower than the original data.
|
||||
//
|
||||
// # Implementation
|
||||
//
|
||||
// It stores sorted strings in a compacted trie(AKA prefix tree).
|
||||
// A trie node has at most 256 outgoing labels.
|
||||
// A label is just a single byte.
|
||||
// E.g., [ab, abc, abcd, axy, buv] is represented with a trie like the following:
|
||||
// (Numbers are node id)
|
||||
//
|
||||
// ^ -a-> 1 -b-> 3 $
|
||||
// | | `c-> 6 $
|
||||
// | | `d-> 9 $
|
||||
// | `x-> 4 -y-> 7 $
|
||||
// `b-> 2 -u-> 5 -v-> 8 $
|
||||
//
|
||||
// Internally it uses a packed []byte and a bitmap with `len([]byte)` bits to
|
||||
// describe the outgoing labels of a node,:
|
||||
//
|
||||
// ^: ab 00
|
||||
// 1: bx 00
|
||||
// 2: u 0
|
||||
// 3: c 0
|
||||
// 4: y 0
|
||||
// 5: v 0
|
||||
// 6: d 0
|
||||
// 7: ø
|
||||
// 8: ø
|
||||
// 9: ø
|
||||
//
|
||||
// In storage it packs labels together and bitmaps joined with separator `1`:
|
||||
//
|
||||
// labels(ignore space): "ab bx u c y v d"
|
||||
// label bitmap: 0010010101010101111
|
||||
//
|
||||
// Finally leaf nodes are indicated by another bitmap `leaves`, in which a `1`
|
||||
// at i-th bit indicates the i-th node is a leaf:
|
||||
//
|
||||
// leaves: 0001001111
|
||||
type Trie struct {
|
||||
leaves, labelBitmap []uint64
|
||||
labels []byte
|
||||
ranks, selects []int32
|
||||
}
|
||||
|
||||
// NewTrie creates a new *Trie struct, from a slice of sorted strings.
|
||||
func NewTrie(keys []string) *Trie {
|
||||
|
||||
ss := &Trie{}
|
||||
lIdx := 0
|
||||
|
||||
type qElt struct{ s, e, col int }
|
||||
|
||||
queue := []qElt{{0, len(keys), 0}}
|
||||
|
||||
for i := 0; i < len(queue); i++ {
|
||||
elt := queue[i]
|
||||
|
||||
if elt.col == len(keys[elt.s]) {
|
||||
// a leaf node
|
||||
elt.s++
|
||||
setBit(&ss.leaves, i, 1)
|
||||
}
|
||||
|
||||
for j := elt.s; j < elt.e; {
|
||||
|
||||
frm := j
|
||||
|
||||
for ; j < elt.e && keys[j][elt.col] == keys[frm][elt.col]; j++ {
|
||||
}
|
||||
|
||||
queue = append(queue, qElt{frm, j, elt.col + 1})
|
||||
ss.labels = append(ss.labels, keys[frm][elt.col])
|
||||
setBit(&ss.labelBitmap, lIdx, 0)
|
||||
lIdx++
|
||||
}
|
||||
|
||||
setBit(&ss.labelBitmap, lIdx, 1)
|
||||
lIdx++
|
||||
}
|
||||
|
||||
ss.init()
|
||||
return ss
|
||||
}
|
||||
|
||||
// HasPrefix query for a word and return whether a prefix of the word is in the Trie.
|
||||
func (ss *Trie) HasPrefix(word string) bool {
|
||||
|
||||
nodeId, bmIdx := 0, 0
|
||||
|
||||
for i := 0; i < len(word); i++ {
|
||||
c := word[i]
|
||||
for ; ; bmIdx++ {
|
||||
if getBit(ss.labelBitmap, bmIdx) != 0 {
|
||||
// no more labels in this node
|
||||
return false
|
||||
}
|
||||
|
||||
if ss.labels[bmIdx-nodeId] == c {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// go to next level
|
||||
|
||||
nodeId = countZeros(ss.labelBitmap, ss.ranks, bmIdx+1)
|
||||
if getBit(ss.leaves, nodeId) != 0 {
|
||||
return true
|
||||
}
|
||||
bmIdx = selectIthOne(ss.labelBitmap, ss.ranks, ss.selects, nodeId-1) + 1
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func setBit(bm *[]uint64, i int, v int) {
|
||||
for i>>6 >= len(*bm) {
|
||||
*bm = append(*bm, 0)
|
||||
}
|
||||
(*bm)[i>>6] |= uint64(v) << uint(i&63)
|
||||
}
|
||||
|
||||
func getBit(bm []uint64, i int) uint64 {
|
||||
return bm[i>>6] & (1 << uint(i&63))
|
||||
}
|
||||
|
||||
// init builds pre-calculated cache to speed up rank() and select()
|
||||
func (ss *Trie) init() {
|
||||
ss.ranks = []int32{0}
|
||||
for i := 0; i < len(ss.labelBitmap); i++ {
|
||||
n := bits.OnesCount64(ss.labelBitmap[i])
|
||||
ss.ranks = append(ss.ranks, ss.ranks[len(ss.ranks)-1]+int32(n))
|
||||
}
|
||||
|
||||
ss.selects = []int32{}
|
||||
n := 0
|
||||
for i := 0; i < len(ss.labelBitmap)<<6; i++ {
|
||||
z := int(ss.labelBitmap[i>>6]>>uint(i&63)) & 1
|
||||
if z == 1 && n&63 == 0 {
|
||||
ss.selects = append(ss.selects, int32(i))
|
||||
}
|
||||
n += z
|
||||
}
|
||||
}
|
||||
|
||||
// countZeros counts the number of "0" in a bitmap before the i-th bit(excluding
|
||||
// the i-th bit) on behalf of rank index.
|
||||
// E.g.:
|
||||
//
|
||||
// countZeros("010010", 4) == 3
|
||||
// // 012345
|
||||
func countZeros(bm []uint64, ranks []int32, i int) int {
|
||||
return i - int(ranks[i>>6]) - bits.OnesCount64(bm[i>>6]&(1<<uint(i&63)-1))
|
||||
}
|
||||
|
||||
// selectIthOne returns the index of the i-th "1" in a bitmap, on behalf of rank
|
||||
// and select indexes.
|
||||
// E.g.:
|
||||
//
|
||||
// selectIthOne("010010", 1) == 4
|
||||
// // 012345
|
||||
func selectIthOne(bm []uint64, ranks, selects []int32, i int) int {
|
||||
base := int(selects[i>>6] & ^63)
|
||||
findIthOne := i - int(ranks[base>>6])
|
||||
|
||||
for i := base >> 6; i < len(bm); i++ {
|
||||
bitIdx := 0
|
||||
for w := bm[i]; w > 0; {
|
||||
findIthOne -= int(w & 1)
|
||||
if findIthOne < 0 {
|
||||
return i<<6 + bitIdx
|
||||
}
|
||||
t0 := bits.TrailingZeros64(w &^ 1)
|
||||
w >>= uint(t0)
|
||||
bitIdx += t0
|
||||
}
|
||||
}
|
||||
panic("no more ones")
|
||||
}
|
119
pkg/trie/trie_test.go
Normal file
119
pkg/trie/trie_test.go
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
* Copyright (c) 2023, v2rayA Organization <team@v2raya.org>
|
||||
*/
|
||||
|
||||
package trie
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTrie(t *testing.T) {
|
||||
trie := NewTrie([]string{
|
||||
"moc.cbatnetnoc.",
|
||||
"moc.cbatnetnoc^",
|
||||
"nc.",
|
||||
"ua.moc.cbci.",
|
||||
"ua.moc.cbci^",
|
||||
"ua.moc.duolcababila.",
|
||||
"ua.moc.duolcababila^",
|
||||
"udiab.",
|
||||
"udiab^",
|
||||
"ue.cbci.",
|
||||
"ue.cbci^",
|
||||
"uhos.",
|
||||
"uhos^",
|
||||
"ul.cbci.",
|
||||
"ul.cbci^",
|
||||
"ur.dj.",
|
||||
"ur.dj^",
|
||||
"ur.llamt.",
|
||||
"ur.llamt^",
|
||||
"ur.sserpxeila.",
|
||||
"ur.sserpxeila^",
|
||||
"ur.wocsomcbci.",
|
||||
"ur.wocsomcbci^",
|
||||
"vt.32b.",
|
||||
"vt.32b^",
|
||||
"vt.akoaix.",
|
||||
"vt.akoaix^",
|
||||
"vt.eesia.",
|
||||
"vt.eesia^",
|
||||
"vt.eiq.",
|
||||
"vt.eiq^",
|
||||
"vt.gca.",
|
||||
"vt.gca^",
|
||||
"vt.ilibilib.",
|
||||
"vt.ilibilib^",
|
||||
"vt.iqnahz.",
|
||||
"vt.iqnahz^",
|
||||
"vt.ixiy.",
|
||||
"vt.ixiy^",
|
||||
"vt.low.",
|
||||
"vt.low^",
|
||||
"vt.nc361.",
|
||||
"vt.nc361^",
|
||||
"vt.obihzgnahs.",
|
||||
"vt.obihzgnahs^",
|
||||
"vt.ogmi.",
|
||||
"vt.ogmi^",
|
||||
"vt.spp.",
|
||||
"vt.spp^",
|
||||
"vt.uohsuhc.",
|
||||
"vt.uohsuhc^",
|
||||
"vt.uyuod.",
|
||||
"vt.uyuod^",
|
||||
"vt.vtig.",
|
||||
"vt.vtig^",
|
||||
"vt.vtnh.",
|
||||
"vt.vtnh^",
|
||||
"vt.zcbj.",
|
||||
"vt.zcbj^",
|
||||
"wk.moc.cbci.",
|
||||
"wk.moc.cbci^",
|
||||
"wt.moc.duolcababila.",
|
||||
"wt.moc.duolcababila^",
|
||||
"wt.moc.levarthh.",
|
||||
"wt.moc.levarthh^",
|
||||
"xc.f.",
|
||||
"xc.f^",
|
||||
"xm.moc.cbci.",
|
||||
"xm.moc.cbci^",
|
||||
"yapila.",
|
||||
"yapila^",
|
||||
"yl.lacisum.",
|
||||
"yl.lacisum^",
|
||||
"ym.moc.duolcababila.",
|
||||
"ym.moc.duolcababila^",
|
||||
"ym.pirtc.",
|
||||
"ym.pirtc^",
|
||||
"zib.anihcbmc.",
|
||||
"zib.anihcbmc^",
|
||||
"zib.duolcsndz.",
|
||||
"zib.duolcsndz^",
|
||||
"zib.fmc.",
|
||||
"zib.fmc^",
|
||||
"zk.ytamlacbci.",
|
||||
"zk.ytamlacbci^",
|
||||
})
|
||||
if !(trie.HasPrefix("nc.tset^") == true) {
|
||||
t.Fatal("^test.cn")
|
||||
}
|
||||
if !(trie.HasPrefix("nc^") == false) {
|
||||
t.Fatal("^cn")
|
||||
}
|
||||
if !(trie.HasPrefix("nc.") == true) {
|
||||
t.Fatal(".cn")
|
||||
}
|
||||
if !(trie.HasPrefix("nc.^") == true) {
|
||||
t.Fatal("^.cn")
|
||||
}
|
||||
if !(trie.HasPrefix("n") == false) {
|
||||
t.Fatal("n")
|
||||
}
|
||||
if !(trie.HasPrefix("n^") == false) {
|
||||
t.Fatal("^n")
|
||||
}
|
||||
if !(trie.HasPrefix("moc.cbatnetnoc^") == true) {
|
||||
t.Fatal("contentabc.com")
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user