Skip to content

Commit 3fa4715

Browse files
committed
Fix not-terribly-safe coding in NIImportOOAffixes() and NIImportAffixes().
There were two places in spell.c that supposed that they could search for a location in a string produced by lowerstr() and then transpose the offset into the original string. But this fails completely if lowerstr() transforms any characters into characters of different byte length, as can happen in Turkish UTF8 for instance. We'd added some comments about this coding in commit 51e78ab, but failed to realize that it was not merely confusing but wrong. Coverity complained about this code years ago, but in such an opaque fashion that nobody understood what it was on about. I'm not entirely sure that this issue *is* what it's on about, actually, but perhaps this patch will shut it up -- and in any case the problem is clear. Back-patch to all supported branches.
1 parent 6192083 commit 3fa4715

File tree

1 file changed

+22
-13
lines changed

1 file changed

+22
-13
lines changed

src/backend/tsearch/spell.c

+22-13
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,19 @@ findchar(char *str, int c)
169169
return NULL;
170170
}
171171

172+
static char *
173+
findchar2(char *str, int c1, int c2)
174+
{
175+
while (*str)
176+
{
177+
if (t_iseq(str, c1) || t_iseq(str, c2))
178+
return str;
179+
str += pg_mblen(str);
180+
}
181+
182+
return NULL;
183+
}
184+
172185

173186
/* backward string compare for suffix tree operations */
174187
static int
@@ -856,22 +869,20 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
856869

857870
if (strlen(sflag) != 1 || flag != *sflag || flag == 0)
858871
goto nextline;
859-
prepl = lowerstr_ctx(Conf, repl);
860-
/* Find position of '/' in lowercased string "prepl" */
861-
if ((ptr = strchr(prepl, '/')) != NULL)
872+
/* Get flags after '/' (flags are case sensitive) */
873+
if ((ptr = strchr(repl, '/')) != NULL)
862874
{
863-
/*
864-
* Here we use non-lowercased string "repl". We need position
865-
* of '/' in "repl".
866-
*/
867-
*ptr = '\0';
868-
ptr = repl + (ptr - prepl) + 1;
875+
ptr++;
869876
while (*ptr)
870877
{
871878
aflg |= Conf->flagval[*(unsigned char *) ptr];
872879
ptr++;
873880
}
874881
}
882+
/* Get lowercased version of string before '/' */
883+
prepl = lowerstr_ctx(Conf, repl);
884+
if ((ptr = strchr(prepl, '/')) != NULL)
885+
*ptr = '\0';
875886
pfind = lowerstr_ctx(Conf, find);
876887
pmask = lowerstr_ctx(Conf, mask);
877888
if (t_iseq(find, '0'))
@@ -939,12 +950,10 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
939950

940951
if (STRNCMP(pstr, "compoundwords") == 0)
941952
{
942-
/* Find position in lowercased string "pstr" */
943-
s = findchar(pstr, 'l');
953+
/* Find case-insensitive L flag in non-lowercased string */
954+
s = findchar2(recoded, 'l', 'L');
944955
if (s)
945956
{
946-
/* Here we use non-lowercased string "recoded" */
947-
s = recoded + (s - pstr);
948957
while (*s && !t_isspace(s))
949958
s += pg_mblen(s);
950959
while (*s && t_isspace(s))

0 commit comments

Comments
 (0)