Skip to content

Commit 7953fdc

Browse files
committed
Add a CaseSensitive parameter to synonym dictionaries.
Simon Riggs
1 parent 2fc2795 commit 7953fdc

File tree

2 files changed

+34
-8
lines changed

2 files changed

+34
-8
lines changed

doc/src/sgml/textsearch.sgml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
22

33
<chapter id="textsearch">
44
<title id="textsearch-title">Full Text Search</title>
@@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
22092209
dictionary can be used to overcome linguistic problems, for example, to
22102210
prevent an English stemmer dictionary from reducing the word 'Paris' to
22112211
'pari'. It is enough to have a <literal>Paris paris</literal> line in the
2212-
synonym dictionary and put it before the <literal>english_stem</> dictionary:
2212+
synonym dictionary and put it before the <literal>english_stem</>
2213+
dictionary. For example:
22132214

22142215
<programlisting>
22152216
SELECT * FROM ts_debug('english', 'Paris');
@@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
22422243
<productname>PostgreSQL</> installation's shared-data directory).
22432244
The file format is just one line
22442245
per word to be substituted, with the word followed by its synonym,
2245-
separated by white space. Blank lines and trailing spaces are ignored,
2246-
and upper case is folded to lower case.
2246+
separated by white space. Blank lines and trailing spaces are ignored.
22472247
</para>
22482248

2249+
<para>
2250+
The <literal>synonym</> template also has an optional parameter
2251+
<literal>CaseSensitive</>, which defaults to <literal>false</>. When
2252+
<literal>CaseSensitive</> is <literal>false</>, words in the synonym file
2253+
are folded to lower case, as are input tokens. When it is
2254+
<literal>true</>, words and tokens are not folded to lower case,
2255+
but are compared as-is.
2256+
</para>
22492257
</sect2>
22502258

22512259
<sect2 id="textsearch-thesaurus">

src/backend/tsearch/dict_synonym.c

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -30,6 +30,7 @@ typedef struct
3030
{
3131
int len; /* length of syn array */
3232
Syn *syn;
33+
bool case_sensitive;
3334
} DictSyn;
3435

3536
/*
@@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
7778
DictSyn *d;
7879
ListCell *l;
7980
char *filename = NULL;
81+
bool case_sensitive = false;
8082
FILE *fin;
8183
char *starti,
8284
*starto,
@@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
9092

9193
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
9294
filename = defGetString(defel);
95+
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
96+
case_sensitive = defGetBoolean(defel);
9397
else
9498
ereport(ERROR,
9599
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
154158
}
155159
}
156160

157-
d->syn[cur].in = lowerstr(starti);
158-
d->syn[cur].out = lowerstr(starto);
161+
if (case_sensitive)
162+
{
163+
d->syn[cur].in = pstrdup(starti);
164+
d->syn[cur].out = pstrdup(starto);
165+
}
166+
else
167+
{
168+
d->syn[cur].in = lowerstr(starti);
169+
d->syn[cur].out = lowerstr(starto);
170+
}
159171

160172
cur++;
161173

@@ -168,6 +180,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
168180
d->len = cur;
169181
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
170182

183+
d->case_sensitive = case_sensitive;
184+
171185
PG_RETURN_POINTER(d);
172186
}
173187

@@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
185199
if (len <= 0 || d->len <= 0)
186200
PG_RETURN_POINTER(NULL);
187201

188-
key.in = lowerstr_with_len(in, len);
202+
if (d->case_sensitive)
203+
key.in = pnstrdup(in, len);
204+
else
205+
key.in = lowerstr_with_len(in, len);
206+
189207
key.out = NULL;
190208

191209
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);

0 commit comments

Comments
 (0)