Skip to content

Commit 09e9619

Browse files
committed
Don't leak compiled regex(es) when an ispell cache entry is dropped.
The text search cache mechanisms assume that we can clean up an invalidated dictionary cache entry simply by resetting the associated long-lived memory context. However, that does not work for ispell affixes that make use of regular expressions, because the regex library deals in plain old malloc. Hence, we leaked compiled regex(es) any time we dropped such a cache entry. That could quickly add up, since even a fairly trivial regex can use up tens of kB, and a large one can eat megabytes. Add a memory context callback to ensure that a regex gets freed when its owning cache entry is cleared. Found via valgrind testing. This problem is ancient, so back-patch to all supported branches. Discussion: https://postgr.es/m/3816764.1616104288@sss.pgh.pa.us
1 parent 19b32bd commit 09e9619

File tree

2 files changed

+42
-5
lines changed

2 files changed

+42
-5
lines changed

src/backend/tsearch/spell.c

+30-4
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,17 @@ FindWord(IspellDict *Conf, const char *word, char *affixflag, int flag)
654654
return 0;
655655
}
656656

657+
/*
658+
* Context reset/delete callback for a regular expression used in an affix
659+
*/
660+
static void
661+
regex_affix_deletion_callback(void *arg)
662+
{
663+
aff_regex_struct *pregex = (aff_regex_struct *) arg;
664+
665+
pg_regfree(&(pregex->regex));
666+
}
667+
657668
/*
658669
* Adds a new affix rule to the Affix field.
659670
*
@@ -716,6 +727,7 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
716727
int err;
717728
pg_wchar *wmask;
718729
char *tmask;
730+
aff_regex_struct *pregex;
719731

720732
Affix->issimple = 0;
721733
Affix->isregis = 0;
@@ -729,18 +741,32 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
729741
wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
730742
wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
731743

732-
err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
744+
/*
745+
* The regex engine stores its stuff using malloc not palloc, so we
746+
* must arrange to explicitly clean up the regex when the dictionary's
747+
* context is cleared. That means the regex_t has to stay in a fixed
748+
* location within the context; we can't keep it directly in the AFFIX
749+
* struct, since we may sort and resize the array of AFFIXes.
750+
*/
751+
Affix->reg.pregex = pregex = palloc(sizeof(aff_regex_struct));
752+
753+
err = pg_regcomp(&(pregex->regex), wmask, wmasklen,
733754
REG_ADVANCED | REG_NOSUB,
734755
DEFAULT_COLLATION_OID);
735756
if (err)
736757
{
737758
char errstr[100];
738759

739-
pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
760+
pg_regerror(err, &(pregex->regex), errstr, sizeof(errstr));
740761
ereport(ERROR,
741762
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
742763
errmsg("invalid regular expression: %s", errstr)));
743764
}
765+
766+
pregex->mcallback.func = regex_affix_deletion_callback;
767+
pregex->mcallback.arg = (void *) pregex;
768+
MemoryContextRegisterResetCallback(CurrentMemoryContext,
769+
&pregex->mcallback);
744770
}
745771

746772
Affix->flagflags = flagflags;
@@ -2119,7 +2145,6 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
21192145
}
21202146
else
21212147
{
2122-
int err;
21232148
pg_wchar *data;
21242149
size_t data_len;
21252150
int newword_len;
@@ -2129,7 +2154,8 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
21292154
data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
21302155
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
21312156

2132-
if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
2157+
if (pg_regexec(&(Affix->reg.pregex->regex), data, data_len,
2158+
0, NULL, 0, NULL, 0) == REG_OKAY)
21332159
{
21342160
pfree(data);
21352161
return newword;

src/include/tsearch/dicts/spell.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ typedef struct spell_struct
8181

8282
#define SPELLHDRSZ (offsetof(SPELL, word))
8383

84+
/*
85+
* If an affix uses a regex, we have to store that separately in a struct
86+
* that won't move around when arrays of affixes are enlarged or sorted.
87+
* This is so that it can be found to be cleaned up at context destruction.
88+
*/
89+
typedef struct aff_regex_struct
90+
{
91+
regex_t regex;
92+
MemoryContextCallback mcallback;
93+
} aff_regex_struct;
94+
8495
/*
8596
* Represents an entry in an affix list.
8697
*/
@@ -97,7 +108,7 @@ typedef struct aff_struct
97108
char *repl;
98109
union
99110
{
100-
regex_t regex;
111+
aff_regex_struct *pregex;
101112
Regis regis;
102113
} reg;
103114
} AFFIX;

0 commit comments

Comments
 (0)