Skip to content

Commit edda792

Browse files
authored
feat: add option disable stop dict (#891)
1 parent a9ca99b commit edda792

File tree

5 files changed

+49
-33
lines changed

5 files changed

+49
-33
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ require (
1313
github.com/gin-contrib/cors v1.4.0
1414
github.com/gin-contrib/pprof v1.4.0
1515
github.com/gin-gonic/gin v1.9.0
16-
github.com/go-ego/gse v0.70.2
16+
github.com/go-ego/gse v0.80.2
1717
github.com/goccy/go-json v0.10.0
1818
github.com/joho/godotenv v1.4.0
1919
github.com/prometheus/client_golang v1.15.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm
104104
github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk=
105105
github.com/gin-gonic/gin v1.9.0 h1:OjyFBKICoexlu99ctXNR2gg+c5pKrKMuyjgARg9qeY8=
106106
github.com/gin-gonic/gin v1.9.0/go.mod h1:W1Me9+hsUSyj3CePGrd1/QrKJMSJ1Tu/0hFEH89961k=
107-
github.com/go-ego/gse v0.70.2 h1:y2UMOHJMtI+0b2GjxTtQfKON5DMmlyX1hOQHTo8UVVs=
108-
github.com/go-ego/gse v0.70.2/go.mod h1:kesekpZfcFQ/kwd9b27VZHUOH5dQUjaaQUZ4OGt4Hj4=
107+
github.com/go-ego/gse v0.80.2 h1:3LRfkaBuwlsHsmkOZvnhTcsYPXUAhiP06Sqcid7mO1M=
108+
github.com/go-ego/gse v0.80.2/go.mod h1:kesekpZfcFQ/kwd9b27VZHUOH5dQUjaaQUZ4OGt4Hj4=
109109
github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA=
110110
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
111111
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=

pkg/bluge/analysis/lang/chs/gse.go

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020

2121
"github.com/blugelabs/bluge/analysis"
2222
"github.com/go-ego/gse"
23+
"github.com/rs/zerolog/log"
2324

2425
"github.com/zincsearch/zincsearch/pkg/bluge/analysis/lang/chs/analyzer"
2526
"github.com/zincsearch/zincsearch/pkg/bluge/analysis/lang/chs/token"
@@ -51,36 +52,46 @@ var seg *gse.Segmenter
5152

5253
func init() {
5354
seg = new(gse.Segmenter)
54-
enable := config.Global.Plugin.GSE.Enable // true / false
55-
embed := config.Global.Plugin.GSE.DictEmbed // small / big
55+
enable := config.Global.Plugin.GSE.Enable // true / false
56+
enableStop := config.Global.Plugin.GSE.EnableStop // true / false
57+
embed := config.Global.Plugin.GSE.DictEmbed // small / big
5658
embed = strings.ToUpper(embed)
57-
loadDict(enable, embed)
59+
loadDict(enable, enableStop, embed)
5860
}
5961

60-
func loadDict(enable bool, embed string) {
62+
func loadDict(enable, enableStop bool, embed string) {
6163
if enable {
64+
// load default dict
6265
if embed == "BIG" {
6366
_ = seg.LoadDictEmbed("zh_s")
64-
_ = seg.LoadStopEmbed()
67+
if enableStop {
68+
_ = seg.LoadStopEmbed()
69+
}
6570
} else {
6671
_ = seg.LoadDictStr(_dictCHS)
67-
_ = seg.LoadStopStr(_dictStop)
72+
if enableStop {
73+
_ = seg.LoadStopStr(_dictStop)
74+
}
75+
}
76+
// load user dict
77+
dataPath := config.Global.Plugin.GSE.DictPath
78+
userDict := dataPath + "/user.txt"
79+
log.Info().Msgf("Loading Gse user dict... %s", userDict)
80+
if ok, _ := zutils.IsExist(userDict); ok {
81+
_ = seg.LoadDict(userDict)
82+
}
83+
stopDict := dataPath + "/stop.txt"
84+
log.Info().Msgf("Loading Gse user stop... %s", stopDict)
85+
if ok, _ := zutils.IsExist(stopDict); ok {
86+
_ = seg.LoadStop(stopDict)
6887
}
6988
} else {
89+
// load empty dict
7090
_ = seg.LoadDictStr(`zinc`)
71-
_ = seg.LoadStopStr(_dictStop)
91+
if enableStop {
92+
_ = seg.LoadStopStr(_dictStop)
93+
}
7294
}
7395
seg.Load = true
7496
seg.SkipLog = true
75-
76-
// load user dict
77-
dataPath := config.Global.Plugin.GSE.DictPath
78-
userDict := dataPath + "/user.txt"
79-
if ok, _ := zutils.IsExist(userDict); ok {
80-
_ = seg.LoadDict(userDict)
81-
}
82-
stopDict := dataPath + "/stop.txt"
83-
if ok, _ := zutils.IsExist(stopDict); ok {
84-
_ = seg.LoadStop(stopDict)
85-
}
8697
}

pkg/bluge/analysis/lang/chs/gse_test.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ import (
2828

2929
func TestLoadDict(t *testing.T) {
3030
type args struct {
31-
enable bool
32-
embed string
31+
enable bool
32+
enableStop bool
33+
embed string
3334
}
3435
tests := []struct {
3536
name string
@@ -38,22 +39,25 @@ func TestLoadDict(t *testing.T) {
3839
{
3940
name: "enable=false,embed=small",
4041
args: args{
41-
enable: false,
42-
embed: "SMALL",
42+
enable: false,
43+
enableStop: false,
44+
embed: "SMALL",
4345
},
4446
},
4547
{
4648
name: "enable=true,embed=small",
4749
args: args{
48-
enable: true,
49-
embed: "SMALL",
50+
enable: true,
51+
enableStop: true,
52+
embed: "SMALL",
5053
},
5154
},
5255
{
5356
name: "enable=true,embed=big",
5457
args: args{
55-
enable: true,
56-
embed: "BIG",
58+
enable: true,
59+
enableStop: true,
60+
embed: "BIG",
5761
},
5862
},
5963
}
@@ -69,7 +73,7 @@ func TestLoadDict(t *testing.T) {
6973

7074
for _, tt := range tests {
7175
t.Run(tt.name, func(t *testing.T) {
72-
loadDict(tt.args.enable, tt.args.embed)
76+
loadDict(tt.args.enable, tt.args.enableStop, tt.args.embed)
7377
})
7478
}
7579

pkg/config/config.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,10 @@ type elasticsearch struct {
9393
}
9494

9595
type gse struct {
96-
Enable bool `env:"ZINC_PLUGIN_GSE_ENABLE,default=false"`
97-
DictEmbed string `env:"ZINC_PLUGIN_GSE_DICT_EMBED,default=small"`
98-
DictPath string `env:"ZINC_PLUGIN_GSE_DICT_PATH,default=./plugins/gse/dict"`
96+
Enable bool `env:"ZINC_PLUGIN_GSE_ENABLE,default=false"`
97+
EnableStop bool `env:"ZINC_PLUGIN_GSE_ENABLE_STOP,default=true"`
98+
DictEmbed string `env:"ZINC_PLUGIN_GSE_DICT_EMBED,default=small"`
99+
DictPath string `env:"ZINC_PLUGIN_GSE_DICT_PATH,default=./plugins/gse/dict"`
99100
}
100101

101102
var Global = new(config)

0 commit comments

Comments
 (0)