Skip to content

Commit c47de57

Browse files
committed
wildcard analyzer
1 parent 3d30468 commit c47de57

File tree

6 files changed

+196
-2
lines changed

6 files changed

+196
-2
lines changed

core/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,6 @@ public enum AnalyzerType {
4040
collation,
4141
classification,
4242
nearest_neighbors,
43-
minhash
43+
minhash,
44+
wildcard
4445
}

core/src/main/java/com/arangodb/entity/arangosearch/analyzer/SearchAnalyzer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@
5353
@JsonSubTypes.Type(name = "collation", value = CollationAnalyzer.class),
5454
@JsonSubTypes.Type(name = "classification", value = ClassificationAnalyzer.class),
5555
@JsonSubTypes.Type(name = "nearest_neighbors", value = NearestNeighborsAnalyzer.class),
56-
@JsonSubTypes.Type(name = "minhash", value = MinHashAnalyzer.class)
56+
@JsonSubTypes.Type(name = "minhash", value = MinHashAnalyzer.class),
57+
@JsonSubTypes.Type(name = "wildcard", value = WildcardAnalyzer.class)
5758
})
5859
public abstract class SearchAnalyzer {
5960
private String name;
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import com.arangodb.entity.arangosearch.AnalyzerType;
25+
26+
import java.util.Objects;
27+
28+
/**
29+
* An Analyzer that creates n-grams to enable fast partial matching for wildcard queries if you have large string
30+
* values, especially if you want to search for suffixes or substrings in the middle of strings (infixes) as opposed to
31+
* prefixes.
32+
* It can apply an Analyzer of your choice before creating the n-grams, for example, to normalize text for
33+
* case-insensitive and accent-insensitive search.
34+
*
35+
* @author Michele Rastelli
36+
* @see <a href= "https://docs.arangodb.com/3.12/index-and-search/analyzers/#wildcard">API Documentation</a>
37+
*/
38+
public final class WildcardAnalyzer extends SearchAnalyzer {
39+
private WildcardAnalyzerProperties properties;
40+
41+
public WildcardAnalyzer() {
42+
setType(AnalyzerType.wildcard);
43+
}
44+
45+
public WildcardAnalyzerProperties getProperties() {
46+
return properties;
47+
}
48+
49+
public void setProperties(WildcardAnalyzerProperties properties) {
50+
this.properties = properties;
51+
}
52+
53+
@Override
54+
public boolean equals(Object o) {
55+
if (this == o) return true;
56+
if (o == null || getClass() != o.getClass()) return false;
57+
if (!super.equals(o)) return false;
58+
WildcardAnalyzer that = (WildcardAnalyzer) o;
59+
return Objects.equals(properties, that.properties);
60+
}
61+
62+
@Override
63+
public int hashCode() {
64+
return Objects.hash(super.hashCode(), properties);
65+
}
66+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* DISCLAIMER
3+
*
4+
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
* Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
*/
20+
21+
package com.arangodb.entity.arangosearch.analyzer;
22+
23+
24+
import java.util.Objects;
25+
26+
/**
27+
* @author Michele Rastelli
28+
*/
29+
public final class WildcardAnalyzerProperties {
30+
31+
private Integer ngramSize;
32+
private SearchAnalyzer analyzer;
33+
34+
/**
35+
* @return unsigned integer for the n-gram length, needs to be at least 2
36+
*/
37+
public Integer getNgramSize() {
38+
return ngramSize;
39+
}
40+
41+
/**
42+
* @param ngramSize unsigned integer for the n-gram length, needs to be at least 2
43+
*/
44+
public void setNgramSize(Integer ngramSize) {
45+
this.ngramSize = ngramSize;
46+
}
47+
48+
public SearchAnalyzer getAnalyzer() {
49+
return analyzer;
50+
}
51+
52+
public void setAnalyzer(SearchAnalyzer analyzer) {
53+
this.analyzer = analyzer;
54+
}
55+
56+
@Override
57+
public boolean equals(Object o) {
58+
if (this == o) return true;
59+
if (o == null || getClass() != o.getClass()) return false;
60+
WildcardAnalyzerProperties that = (WildcardAnalyzerProperties) o;
61+
return Objects.equals(ngramSize, that.ngramSize) && Objects.equals(analyzer, that.analyzer);
62+
}
63+
64+
@Override
65+
public int hashCode() {
66+
return Objects.hash(ngramSize, analyzer);
67+
}
68+
}

driver/src/test/java/com/arangodb/ArangoSearchAsyncTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,35 @@ void MinHashAnalyzer(ArangoDatabaseAsync db) throws ExecutionException, Interrup
10341034
createGetAndDeleteTypedAnalyzer(db, analyzer);
10351035
}
10361036

1037+
@ParameterizedTest
1038+
@MethodSource("asyncDbs")
1039+
void WildcardAnalyzer(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {
1040+
assumeTrue(isAtLeastVersion(3, 12));
1041+
1042+
NormAnalyzerProperties properties = new NormAnalyzerProperties();
1043+
properties.setLocale("ru");
1044+
properties.setAnalyzerCase(SearchAnalyzerCase.lower);
1045+
properties.setAccent(true);
1046+
1047+
NormAnalyzer normAnalyzer = new NormAnalyzer();
1048+
normAnalyzer.setProperties(properties);
1049+
1050+
WildcardAnalyzerProperties wildcardProperties = new WildcardAnalyzerProperties();
1051+
wildcardProperties.setNgramSize(3);
1052+
wildcardProperties.setAnalyzer(normAnalyzer);
1053+
1054+
Set<AnalyzerFeature> features = new HashSet<>();
1055+
features.add(AnalyzerFeature.frequency);
1056+
features.add(AnalyzerFeature.position);
1057+
1058+
WildcardAnalyzer wildcardAnalyzer = new WildcardAnalyzer();
1059+
wildcardAnalyzer.setName("test-" + UUID.randomUUID());
1060+
wildcardAnalyzer.setProperties(wildcardProperties);
1061+
wildcardAnalyzer.setFeatures(features);
1062+
1063+
createGetAndDeleteTypedAnalyzer(db, wildcardAnalyzer);
1064+
}
1065+
10371066
@ParameterizedTest
10381067
@MethodSource("asyncDbs")
10391068
void offsetFeature(ArangoDatabaseAsync db) throws ExecutionException, InterruptedException {

driver/src/test/java/com/arangodb/ArangoSearchTest.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,6 +1033,35 @@ void MinHashAnalyzer(ArangoDatabase db) {
10331033
createGetAndDeleteTypedAnalyzer(db, analyzer);
10341034
}
10351035

1036+
@ParameterizedTest
1037+
@MethodSource("dbs")
1038+
void WildcardAnalyzer(ArangoDatabase db) {
1039+
assumeTrue(isAtLeastVersion(3, 12));
1040+
1041+
NormAnalyzerProperties properties = new NormAnalyzerProperties();
1042+
properties.setLocale("ru");
1043+
properties.setAnalyzerCase(SearchAnalyzerCase.lower);
1044+
properties.setAccent(true);
1045+
1046+
NormAnalyzer normAnalyzer = new NormAnalyzer();
1047+
normAnalyzer.setProperties(properties);
1048+
1049+
WildcardAnalyzerProperties wildcardProperties = new WildcardAnalyzerProperties();
1050+
wildcardProperties.setNgramSize(3);
1051+
wildcardProperties.setAnalyzer(normAnalyzer);
1052+
1053+
Set<AnalyzerFeature> features = new HashSet<>();
1054+
features.add(AnalyzerFeature.frequency);
1055+
features.add(AnalyzerFeature.position);
1056+
1057+
WildcardAnalyzer wildcardAnalyzer = new WildcardAnalyzer();
1058+
wildcardAnalyzer.setName("test-" + UUID.randomUUID());
1059+
wildcardAnalyzer.setProperties(wildcardProperties);
1060+
wildcardAnalyzer.setFeatures(features);
1061+
1062+
createGetAndDeleteTypedAnalyzer(db, wildcardAnalyzer);
1063+
}
1064+
10361065
@ParameterizedTest
10371066
@MethodSource("dbs")
10381067
void offsetFeature(ArangoDatabase db) {

0 commit comments

Comments
 (0)