Skip to content

Commit 75a64ee

Browse files
committed
I made the patch that implements regexp_replace again.
The specification of this function is as follows. regexp_replace(source text, pattern text, replacement text, [flags text]) returns text Replace string that matches to regular expression in source text to replacement text. - pattern is regular expression pattern. - replacement is replace string that can use '\1'-'\9', and '\&'. '\1'-'\9': back reference to the n'th subexpression. '\&' : entire matched string. - flags can use the following values: g: global (replace all) i: ignore case When the flags is not specified, case sensitive, replace the first instance only. Atsushi Ogawa
1 parent 73e2431 commit 75a64ee

File tree

10 files changed

+418
-55
lines changed

10 files changed

+418
-55
lines changed

doc/src/sgml/func.sgml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.263 2005/07/06 19:02:52 momjian Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.264 2005/07/10 04:54:30 momjian Exp $
33
PostgreSQL documentation
44
-->
55

@@ -1256,6 +1256,26 @@ PostgreSQL documentation
12561256
<entry><literal>'O''Reilly'</literal></entry>
12571257
</row>
12581258

1259+
<row>
1260+
<entry><literal><function>regexp_replace</function>(<parameter>source</parameter> <type>text</type>,
1261+
<parameter>pattern</parameter> <type>text</type>,
1262+
<parameter>replacement</parameter> <type>text</type>
1263+
<optional>, <parameter>flags</parameter> <type>text</type></optional>)</literal></entry>
1264+
<entry><type>text</type></entry>
1265+
<entry>Replace string that matches the regular expression
1266+
<parameter>pattern</parameter> in <parameter>source</parameter> to
1267+
<parameter>replacement</parameter>.
1268+
<parameter>replacement</parameter> can use <literal>\1</>-<literal>\9</> and <literal>\&amp;</>.
1269+
<literal>\1</>-<literal>\9</> is a back reference to the n'th subexpression, and
1270+
<literal>\&amp;</> is the entire matched string.
1271+
<parameter>flags</parameter> can use <literal>g</>(global) and <literal>i</>(ignore case).
1272+
When flags is not specified, case sensitive matching is used, and it replaces
1273+
only the instance.
1274+
</entry>
1275+
<entry><literal>regexp_replace('1112223333', '(\\d{3})(\\d{3})(\\d{4})', '(\\1) \\2-\\3')</literal></entry>
1276+
<entry><literal>(111) 222-3333</literal></entry>
1277+
</row>
1278+
12591279
<row>
12601280
<entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>integer</type>)</literal></entry>
12611281
<entry><type>text</type></entry>

src/backend/regex/regexec.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2828
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929
*
30-
* $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.24 2003/11/29 19:51:55 pgsql Exp $
30+
* $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.25 2005/07/10 04:54:30 momjian Exp $
3131
*
3232
*/
3333

@@ -110,6 +110,7 @@ struct vars
110110
regmatch_t *pmatch;
111111
rm_detail_t *details;
112112
chr *start; /* start of string */
113+
chr *search_start; /* search start of string */
113114
chr *stop; /* just past end of string */
114115
int err; /* error code if any (0 none) */
115116
regoff_t *mem; /* memory vector for backtracking */
@@ -168,6 +169,7 @@ int
168169
pg_regexec(regex_t *re,
169170
const chr *string,
170171
size_t len,
172+
size_t search_start,
171173
rm_detail_t *details,
172174
size_t nmatch,
173175
regmatch_t pmatch[],
@@ -219,6 +221,7 @@ pg_regexec(regex_t *re,
219221
v->pmatch = pmatch;
220222
v->details = details;
221223
v->start = (chr *) string;
224+
v->search_start = (chr *) string + search_start;
222225
v->stop = (chr *) string + len;
223226
v->err = 0;
224227
if (backref)
@@ -288,7 +291,8 @@ find(struct vars * v,
288291
NOERR();
289292
MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
290293
cold = NULL;
291-
close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL);
294+
close = shortest(v, s, v->search_start, v->search_start, v->stop,
295+
&cold, (int *) NULL);
292296
freedfa(s);
293297
NOERR();
294298
if (v->g->cflags & REG_EXPECT)
@@ -415,7 +419,7 @@ cfindloop(struct vars * v,
415419

416420
assert(d != NULL && s != NULL);
417421
cold = NULL;
418-
close = v->start;
422+
close = v->search_start;
419423
do
420424
{
421425
MDEBUG(("\ncsearch at %ld\n", LOFF(close)));

src/backend/utils/adt/regexp.c

Lines changed: 121 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.56 2004/12/31 22:01:22 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.57 2005/07/10 04:54:30 momjian Exp $
1212
*
1313
* Alistair Crooks added the code for the regex caching
1414
* agc - cached the regular expressions used - there's a good chance
@@ -81,38 +81,27 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
8181

8282

8383
/*
84-
* RE_compile_and_execute - compile and execute a RE, caching if possible
84+
* RE_compile_and_cache - compile a RE, caching if possible
8585
*
86-
* Returns TRUE on match, FALSE on no match
86+
* Returns regex_t
8787
*
88-
* text_re --- the pattern, expressed as an *untoasted* TEXT object
89-
* dat --- the data to match against (need not be null-terminated)
90-
* dat_len --- the length of the data string
91-
* cflags --- compile options for the pattern
92-
* nmatch, pmatch --- optional return area for match details
88+
* text_re --- the pattern, expressed as an *untoasted* TEXT object
89+
* cflags --- compile options for the pattern
9390
*
94-
* Both pattern and data are given in the database encoding. We internally
95-
* convert to array of pg_wchar which is what Spencer's regex package wants.
91+
* Pattern is given in the database encoding. We internally convert to
92+
* array of pg_wchar which is what Spencer's regex package wants.
9693
*/
97-
static bool
98-
RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
99-
int cflags, int nmatch, regmatch_t *pmatch)
94+
static regex_t
95+
RE_compile_and_cache(text *text_re, int cflags)
10096
{
10197
int text_re_len = VARSIZE(text_re);
102-
pg_wchar *data;
103-
size_t data_len;
10498
pg_wchar *pattern;
10599
size_t pattern_len;
106100
int i;
107101
int regcomp_result;
108-
int regexec_result;
109102
cached_re_str re_temp;
110103
char errMsg[100];
111104

112-
/* Convert data string to wide characters */
113-
data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
114-
data_len = pg_mb2wchar_with_len(dat, data, dat_len);
115-
116105
/*
117106
* Look for a match among previously compiled REs. Since the data
118107
* structure is self-organizing with most-used entries at the front,
@@ -134,28 +123,7 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
134123
re_array[0] = re_temp;
135124
}
136125

137-
/* Perform RE match and return result */
138-
regexec_result = pg_regexec(&re_array[0].cre_re,
139-
data,
140-
data_len,
141-
NULL, /* no details */
142-
nmatch,
143-
pmatch,
144-
0);
145-
146-
pfree(data);
147-
148-
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
149-
{
150-
/* re failed??? */
151-
pg_regerror(regexec_result, &re_array[0].cre_re,
152-
errMsg, sizeof(errMsg));
153-
ereport(ERROR,
154-
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
155-
errmsg("regular expression failed: %s", errMsg)));
156-
}
157-
158-
return (regexec_result == REG_OKAY);
126+
return re_array[0].cre_re;
159127
}
160128
}
161129

@@ -220,10 +188,45 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
220188
re_array[0] = re_temp;
221189
num_res++;
222190

191+
return re_array[0].cre_re;
192+
}
193+
194+
/*
195+
* RE_compile_and_execute - compile and execute a RE
196+
*
197+
* Returns TRUE on match, FALSE on no match
198+
*
199+
* text_re --- the pattern, expressed as an *untoasted* TEXT object
200+
* dat --- the data to match against (need not be null-terminated)
201+
* dat_len --- the length of the data string
202+
* cflags --- compile options for the pattern
203+
* nmatch, pmatch --- optional return area for match details
204+
*
205+
* Both pattern and data are given in the database encoding. We internally
206+
* convert to array of pg_wchar which is what Spencer's regex package wants.
207+
*/
208+
static bool
209+
RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
210+
int cflags, int nmatch, regmatch_t *pmatch)
211+
{
212+
pg_wchar *data;
213+
size_t data_len;
214+
int regexec_result;
215+
regex_t re;
216+
char errMsg[100];
217+
218+
/* Convert data string to wide characters */
219+
data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
220+
data_len = pg_mb2wchar_with_len(dat, data, dat_len);
221+
222+
/* Compile RE */
223+
re = RE_compile_and_cache(text_re, cflags);
224+
223225
/* Perform RE match and return result */
224226
regexec_result = pg_regexec(&re_array[0].cre_re,
225227
data,
226228
data_len,
229+
0,
227230
NULL, /* no details */
228231
nmatch,
229232
pmatch,
@@ -428,15 +431,89 @@ textregexsubstr(PG_FUNCTION_ARGS)
428431
eo = pmatch[0].rm_eo;
429432
}
430433

431-
return (DirectFunctionCall3(text_substr,
434+
return DirectFunctionCall3(text_substr,
432435
PointerGetDatum(s),
433436
Int32GetDatum(so + 1),
434-
Int32GetDatum(eo - so)));
437+
Int32GetDatum(eo - so));
435438
}
436439

437440
PG_RETURN_NULL();
438441
}
439442

443+
/*
444+
* textregexreplace_noopt()
445+
* Return a replace string matched by a regular expression.
446+
* This function is a version that doesn't specify the option of
447+
* textregexreplace. This is case sensitive, replace the first
448+
* instance only.
449+
*/
450+
Datum
451+
textregexreplace_noopt(PG_FUNCTION_ARGS)
452+
{
453+
text *s = PG_GETARG_TEXT_P(0);
454+
text *p = PG_GETARG_TEXT_P(1);
455+
text *r = PG_GETARG_TEXT_P(2);
456+
regex_t re;
457+
458+
re = RE_compile_and_cache(p, regex_flavor);
459+
460+
return DirectFunctionCall4(replace_text_regexp,
461+
PointerGetDatum(s),
462+
PointerGetDatum(&re),
463+
PointerGetDatum(r),
464+
BoolGetDatum(false));
465+
}
466+
467+
/*
468+
* textregexreplace()
469+
* Return a replace string matched by a regular expression.
470+
*/
471+
Datum
472+
textregexreplace(PG_FUNCTION_ARGS)
473+
{
474+
text *s = PG_GETARG_TEXT_P(0);
475+
text *p = PG_GETARG_TEXT_P(1);
476+
text *r = PG_GETARG_TEXT_P(2);
477+
text *opt = PG_GETARG_TEXT_P(3);
478+
char *opt_p = VARDATA(opt);
479+
int opt_len = (VARSIZE(opt) - VARHDRSZ);
480+
int i;
481+
bool global = false;
482+
bool ignorecase = false;
483+
regex_t re;
484+
485+
/* parse options */
486+
for (i = 0; i < opt_len; i++)
487+
{
488+
switch (opt_p[i])
489+
{
490+
case 'i':
491+
ignorecase = true;
492+
break;
493+
case 'g':
494+
global = true;
495+
break;
496+
default:
497+
ereport(ERROR,
498+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
499+
errmsg("invalid option of regexp_replace: %c",
500+
opt_p[i])));
501+
break;
502+
}
503+
}
504+
505+
if (ignorecase)
506+
re = RE_compile_and_cache(p, regex_flavor | REG_ICASE);
507+
else
508+
re = RE_compile_and_cache(p, regex_flavor);
509+
510+
return DirectFunctionCall4(replace_text_regexp,
511+
PointerGetDatum(s),
512+
PointerGetDatum(&re),
513+
PointerGetDatum(r),
514+
BoolGetDatum(global));
515+
}
516+
440517
/* similar_escape()
441518
* Convert a SQL99 regexp pattern to POSIX style, so it can be used by
442519
* our regexp engine.

0 commit comments

Comments
 (0)