Skip to content

Commit bea3d7e

Browse files
committed
Use MemoryContext API for regex memory management.
Previously, regex_t objects' memory was managed with malloc() and free() directly. Switch to palloc()-based memory management instead. Advantages: * memory used by cached regexes is now visible with MemoryContext observability tools * cleanup can be done automatically in certain failure modes (something that later commits will take advantage of) * cleanup can be done in bulk On the downside, there may be more fragmentation (wasted memory) due to per-regex MemoryContext objects. This is a problem shared with other cached objects in PostgreSQL and can probably be improved with later tuning. Thanks to Noah Misch for suggesting this general approach, which unblocks later work on interrupts. Suggested-by: Noah Misch <noah@leadboat.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
1 parent fcd77d5 commit bea3d7e

File tree

3 files changed

+45
-20
lines changed

3 files changed

+45
-20
lines changed

src/backend/regex/regprefix.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ static int findprefix(struct cnfa *cnfa, struct colormap *cm,
3232
* REG_EXACT: all strings satisfying the regex must match the same string
3333
* or a REG_XXX error code
3434
*
35-
* In the non-failure cases, *string is set to a malloc'd string containing
35+
* In the non-failure cases, *string is set to a palloc'd string containing
3636
* the common prefix or exact value, of length *slength (measured in chrs
3737
* not bytes!).
3838
*

src/backend/utils/adt/regexp.c

+41-16
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,13 @@ typedef struct regexp_matches_ctx
9696
#define MAX_CACHED_RES 32
9797
#endif
9898

99+
/* A parent memory context for regular expressions. */
100+
static MemoryContext RegexpCacheMemoryContext;
101+
99102
/* this structure describes one cached regular expression */
100103
typedef struct cached_re_str
101104
{
105+
MemoryContext cre_context; /* memory context for this regexp */
102106
char *cre_pat; /* original RE (not null terminated!) */
103107
int cre_pat_len; /* length of original RE, in bytes */
104108
int cre_flags; /* compile flags: extended,icase etc */
@@ -145,6 +149,7 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
145149
int regcomp_result;
146150
cached_re_str re_temp;
147151
char errMsg[100];
152+
MemoryContext oldcontext;
148153

149154
/*
150155
* Look for a match among previously compiled REs. Since the data
@@ -172,6 +177,13 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
172177
}
173178
}
174179

180+
/* Set up the cache memory on first go through. */
181+
if (unlikely(RegexpCacheMemoryContext == NULL))
182+
RegexpCacheMemoryContext =
183+
AllocSetContextCreate(TopMemoryContext,
184+
"RegexpCacheMemoryContext",
185+
ALLOCSET_SMALL_SIZES);
186+
175187
/*
176188
* Couldn't find it, so try to compile the new RE. To avoid leaking
177189
* resources on failure, we build into the re_temp local.
@@ -183,6 +195,18 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
183195
pattern,
184196
text_re_len);
185197

198+
/*
199+
* Make a memory context for this compiled regexp. This is initially a
200+
* child of the current memory context, so it will be cleaned up
201+
* automatically if compilation is interrupted and throws an ERROR. We'll
202+
* re-parent it under the longer lived cache context if we make it to the
203+
* bottom of this function.
204+
*/
205+
re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext,
206+
"RegexpMemoryContext",
207+
ALLOCSET_SMALL_SIZES);
208+
oldcontext = MemoryContextSwitchTo(re_temp.cre_context);
209+
186210
regcomp_result = pg_regcomp(&re_temp.cre_re,
187211
pattern,
188212
pattern_len,
@@ -209,21 +233,17 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
209233
errmsg("invalid regular expression: %s", errMsg)));
210234
}
211235

236+
/* Copy the pattern into the per-regexp memory context. */
237+
re_temp.cre_pat = palloc(text_re_len + 1);
238+
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
239+
212240
/*
213-
* We use malloc/free for the cre_pat field because the storage has to
214-
* persist across transactions, and because we want to get control back on
215-
* out-of-memory. The Max() is because some malloc implementations return
216-
* NULL for malloc(0).
241+
* NUL-terminate it only for the benefit of the identifier used for the
242+
* memory context, visible in the pg_backend_memory_contexts view.
217243
*/
218-
re_temp.cre_pat = malloc(Max(text_re_len, 1));
219-
if (re_temp.cre_pat == NULL)
220-
{
221-
pg_regfree(&re_temp.cre_re);
222-
ereport(ERROR,
223-
(errcode(ERRCODE_OUT_OF_MEMORY),
224-
errmsg("out of memory")));
225-
}
226-
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
244+
re_temp.cre_pat[text_re_len] = 0;
245+
MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat);
246+
227247
re_temp.cre_pat_len = text_re_len;
228248
re_temp.cre_flags = cflags;
229249
re_temp.cre_collation = collation;
@@ -236,16 +256,21 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
236256
{
237257
--num_res;
238258
Assert(num_res < MAX_CACHED_RES);
239-
pg_regfree(&re_array[num_res].cre_re);
240-
free(re_array[num_res].cre_pat);
259+
/* Delete the memory context holding the regexp and pattern. */
260+
MemoryContextDelete(re_array[num_res].cre_context);
241261
}
242262

263+
/* Re-parent the memory context to our long-lived cache context. */
264+
MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext);
265+
243266
if (num_res > 0)
244267
memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
245268

246269
re_array[0] = re_temp;
247270
num_res++;
248271

272+
MemoryContextSwitchTo(oldcontext);
273+
249274
return &re_array[0].cre_re;
250275
}
251276

@@ -1990,7 +2015,7 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
19902015
slen = pg_wchar2mb_with_len(str, result, slen);
19912016
Assert(slen < maxlen);
19922017

1993-
free(str);
2018+
pfree(str);
19942019

19952020
return result;
19962021
}

src/include/regex/regcustom.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@
4949

5050
/* overrides for regguts.h definitions, if any */
5151
#define FUNCPTR(name, args) (*name) args
52-
#define MALLOC(n) malloc(n)
53-
#define FREE(p) free(VS(p))
54-
#define REALLOC(p,n) realloc(VS(p),n)
52+
#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM)
53+
#define FREE(p) pfree(VS(p))
54+
#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM)
5555
#define assert(x) Assert(x)
5656

5757
/* internal character type and related */

0 commit comments

Comments
 (0)