Skip to content

Commit bad5fe9

Browse files
author
Thomas G. Lockhart
committed
Search the existing regular expression cache as a ring buffer.
Will optimize the case for repeated calls for the same expression, which seems to be the most common case. Formerly, always searched from the first entry. May want to look at the least-recently-used algorithm to make sure it is identifying the right slots to reclaim. Seems silly to do math when it seems that we could simply use an incrementing counter...
1 parent e626600 commit bad5fe9

File tree

1 file changed

+26
-30
lines changed

1 file changed

+26
-30
lines changed

src/backend/utils/adt/regexp.c

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.39 2002/06/11 15:41:37 thomas Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.40 2002/06/15 02:49:47 thomas Exp $
1212
*
1313
* Alistair Crooks added the code for the regex caching
1414
* agc - cached the regular expressions used - there's a good chance
@@ -53,7 +53,7 @@ struct cached_re_str
5353
static int rec = 0; /* # of cached re's */
5454
static struct cached_re_str rev[MAX_CACHED_RES]; /* cached re's */
5555
static unsigned long lru; /* system lru tag */
56-
static int pg_lastre = 0;
56+
static int pg_lastrec = 0;
5757

5858
/* attempt to compile `re' as an re, then match it against text */
5959
/* cflags - flag to regcomp indicates case sensitivity */
@@ -70,43 +70,46 @@ RE_compile_and_execute(text *text_re, char *text, int cflags,
7070
re = DatumGetCString(DirectFunctionCall1(textout,
7171
PointerGetDatum(text_re)));
7272

73-
if ((i = pg_lastre) < rec)
73+
/* Find a previously compiled regular expression.
74+
* Run the cache as a ring buffer, starting the search
75+
* from the previous match if any.
76+
*/
77+
i = pg_lastrec;
78+
while (i < rec)
7479
{
75-
if (rev[i].cre_s)
80+
if (rev[i].cre_s != NULL)
7681
{
7782
if (strcmp(rev[i].cre_s, re) == 0 &&
7883
rev[i].cre_type == cflags)
7984
{
85+
pg_lastrec = i;
8086
rev[i].cre_lru = ++lru;
8187
pfree(re);
8288
return (pg_regexec(&rev[i].cre_re,
8389
text, nmatch,
8490
pmatch, 0) == 0);
8591
}
8692
}
87-
}
88-
89-
/* find a previously compiled regular expression */
90-
for (i = 0; i < rec; i++)
91-
{
92-
if (i == pg_lastre) continue;
93+
i++;
9394

94-
if (rev[i].cre_s)
95+
/* If we were not at the first slot to start,
96+
* then think about wrapping if necessary.
97+
*/
98+
if (pg_lastrec != 0)
9599
{
96-
if (strcmp(rev[i].cre_s, re) == 0 &&
97-
rev[i].cre_type == cflags)
100+
if (i >= rec)
98101
{
99-
rev[i].cre_lru = ++lru;
100-
pfree(re);
101-
return (pg_regexec(&rev[i].cre_re,
102-
text, nmatch,
103-
pmatch, 0) == 0);
102+
i = 0;
103+
}
104+
else if (i == pg_lastrec)
105+
{
106+
break;
104107
}
105108
}
106109
}
107110

108111
/* we didn't find it - make room in the cache for it */
109-
if (rec == MAX_CACHED_RES)
112+
if (rec >= MAX_CACHED_RES)
110113
{
111114
/* cache is full - find the oldest entry */
112115
for (oldest = 0, i = 1; i < rec; i++)
@@ -116,13 +119,16 @@ RE_compile_and_execute(text *text_re, char *text, int cflags,
116119
}
117120
}
118121
else
122+
{
119123
oldest = rec++;
124+
}
120125

121126
/* if there was an old re, then de-allocate the space it used */
122127
if (rev[oldest].cre_s != (char *) NULL)
123128
{
124129
for (lru = i = 0; i < rec; i++)
125130
{
131+
/* downweight all of the other cached entries */
126132
rev[i].cre_lru = (rev[i].cre_lru - rev[oldest].cre_lru) / 2;
127133
if (rev[i].cre_lru > lru)
128134
lru = rev[i].cre_lru;
@@ -141,6 +147,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags,
141147
regcomp_result = pg_regcomp(&rev[oldest].cre_re, re, cflags);
142148
if (regcomp_result == 0)
143149
{
150+
pg_lastrec = oldest;
144151
/*
145152
* use malloc/free for the cre_s field because the storage has to
146153
* persist across transactions
@@ -311,7 +318,6 @@ textregexsubstr(PG_FUNCTION_ARGS)
311318
{
312319
text *s = PG_GETARG_TEXT_P(0);
313320
text *p = PG_GETARG_TEXT_P(1);
314-
text *result;
315321
char *sterm;
316322
int len;
317323
bool match;
@@ -339,16 +345,6 @@ textregexsubstr(PG_FUNCTION_ARGS)
339345
Int32GetDatum(pmatch.rm_so+1),
340346
Int32GetDatum(pmatch.rm_eo-pmatch.rm_so)));
341347
}
342-
#if 0
343-
/* otherwise, return a zero-length string */
344-
else
345-
{
346-
result = palloc(VARHDRSZ);
347-
VARATT_SIZEP(result) = VARHDRSZ;
348-
PG_RETURN_TEXT_P(result);
349-
}
350-
#endif
351348

352-
/* not reached */
353349
PG_RETURN_NULL();
354350
}

0 commit comments

Comments
 (0)