8
8
*
9
9
*
10
10
* IDENTIFICATION
11
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.56 2004/12/31 22:01:22 pgsql Exp $
11
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.57 2005/07/10 04:54:30 momjian Exp $
12
12
*
13
13
* Alistair Crooks added the code for the regex caching
14
14
* agc - cached the regular expressions used - there's a good chance
@@ -81,38 +81,27 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
81
81
82
82
83
83
/*
84
- * RE_compile_and_execute - compile and execute a RE, caching if possible
84
+ * RE_compile_and_cache - compile a RE, caching if possible
85
85
*
86
- * Returns TRUE on match, FALSE on no match
86
+ * Returns regex_t
87
87
*
88
- * text_re --- the pattern, expressed as an *untoasted* TEXT object
89
- * dat --- the data to match against (need not be null-terminated)
90
- * dat_len --- the length of the data string
91
- * cflags --- compile options for the pattern
92
- * nmatch, pmatch --- optional return area for match details
88
+ * text_re --- the pattern, expressed as an *untoasted* TEXT object
89
+ * cflags --- compile options for the pattern
93
90
*
94
- * Both pattern and data are given in the database encoding. We internally
95
- * convert to array of pg_wchar which is what Spencer's regex package wants.
91
+ * Pattern is given in the database encoding. We internally convert to
92
+ * array of pg_wchar which is what Spencer's regex package wants.
96
93
*/
97
- static bool
98
- RE_compile_and_execute (text * text_re , unsigned char * dat , int dat_len ,
99
- int cflags , int nmatch , regmatch_t * pmatch )
94
+ static regex_t
95
+ RE_compile_and_cache (text * text_re , int cflags )
100
96
{
101
97
int text_re_len = VARSIZE (text_re );
102
- pg_wchar * data ;
103
- size_t data_len ;
104
98
pg_wchar * pattern ;
105
99
size_t pattern_len ;
106
100
int i ;
107
101
int regcomp_result ;
108
- int regexec_result ;
109
102
cached_re_str re_temp ;
110
103
char errMsg [100 ];
111
104
112
- /* Convert data string to wide characters */
113
- data = (pg_wchar * ) palloc ((dat_len + 1 ) * sizeof (pg_wchar ));
114
- data_len = pg_mb2wchar_with_len (dat , data , dat_len );
115
-
116
105
/*
117
106
* Look for a match among previously compiled REs. Since the data
118
107
* structure is self-organizing with most-used entries at the front,
@@ -134,28 +123,7 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
134
123
re_array [0 ] = re_temp ;
135
124
}
136
125
137
- /* Perform RE match and return result */
138
- regexec_result = pg_regexec (& re_array [0 ].cre_re ,
139
- data ,
140
- data_len ,
141
- NULL , /* no details */
142
- nmatch ,
143
- pmatch ,
144
- 0 );
145
-
146
- pfree (data );
147
-
148
- if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH )
149
- {
150
- /* re failed??? */
151
- pg_regerror (regexec_result , & re_array [0 ].cre_re ,
152
- errMsg , sizeof (errMsg ));
153
- ereport (ERROR ,
154
- (errcode (ERRCODE_INVALID_REGULAR_EXPRESSION ),
155
- errmsg ("regular expression failed: %s" , errMsg )));
156
- }
157
-
158
- return (regexec_result == REG_OKAY );
126
+ return re_array [0 ].cre_re ;
159
127
}
160
128
}
161
129
@@ -220,10 +188,45 @@ RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
220
188
re_array [0 ] = re_temp ;
221
189
num_res ++ ;
222
190
191
+ return re_array [0 ].cre_re ;
192
+ }
193
+
194
+ /*
195
+ * RE_compile_and_execute - compile and execute a RE
196
+ *
197
+ * Returns TRUE on match, FALSE on no match
198
+ *
199
+ * text_re --- the pattern, expressed as an *untoasted* TEXT object
200
+ * dat --- the data to match against (need not be null-terminated)
201
+ * dat_len --- the length of the data string
202
+ * cflags --- compile options for the pattern
203
+ * nmatch, pmatch --- optional return area for match details
204
+ *
205
+ * Both pattern and data are given in the database encoding. We internally
206
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
207
+ */
208
+ static bool
209
+ RE_compile_and_execute (text * text_re , unsigned char * dat , int dat_len ,
210
+ int cflags , int nmatch , regmatch_t * pmatch )
211
+ {
212
+ pg_wchar * data ;
213
+ size_t data_len ;
214
+ int regexec_result ;
215
+ regex_t re ;
216
+ char errMsg [100 ];
217
+
218
+ /* Convert data string to wide characters */
219
+ data = (pg_wchar * ) palloc ((dat_len + 1 ) * sizeof (pg_wchar ));
220
+ data_len = pg_mb2wchar_with_len (dat , data , dat_len );
221
+
222
+ /* Compile RE */
223
+ re = RE_compile_and_cache (text_re , cflags );
224
+
223
225
/* Perform RE match and return result */
224
226
regexec_result = pg_regexec (& re_array [0 ].cre_re ,
225
227
data ,
226
228
data_len ,
229
+ 0 ,
227
230
NULL , /* no details */
228
231
nmatch ,
229
232
pmatch ,
@@ -428,15 +431,89 @@ textregexsubstr(PG_FUNCTION_ARGS)
428
431
eo = pmatch [0 ].rm_eo ;
429
432
}
430
433
431
- return ( DirectFunctionCall3 (text_substr ,
434
+ return DirectFunctionCall3 (text_substr ,
432
435
PointerGetDatum (s ),
433
436
Int32GetDatum (so + 1 ),
434
- Int32GetDatum (eo - so ))) ;
437
+ Int32GetDatum (eo - so ));
435
438
}
436
439
437
440
PG_RETURN_NULL ();
438
441
}
439
442
443
+ /*
444
+ * textregexreplace_noopt()
445
+ * Return a replace string matched by a regular expression.
446
+ * This function is a version that doesn't specify the option of
447
+ * textregexreplace. This is case sensitive, replace the first
448
+ * instance only.
449
+ */
450
+ Datum
451
+ textregexreplace_noopt (PG_FUNCTION_ARGS )
452
+ {
453
+ text * s = PG_GETARG_TEXT_P (0 );
454
+ text * p = PG_GETARG_TEXT_P (1 );
455
+ text * r = PG_GETARG_TEXT_P (2 );
456
+ regex_t re ;
457
+
458
+ re = RE_compile_and_cache (p , regex_flavor );
459
+
460
+ return DirectFunctionCall4 (replace_text_regexp ,
461
+ PointerGetDatum (s ),
462
+ PointerGetDatum (& re ),
463
+ PointerGetDatum (r ),
464
+ BoolGetDatum (false));
465
+ }
466
+
467
+ /*
468
+ * textregexreplace()
469
+ * Return a replace string matched by a regular expression.
470
+ */
471
+ Datum
472
+ textregexreplace (PG_FUNCTION_ARGS )
473
+ {
474
+ text * s = PG_GETARG_TEXT_P (0 );
475
+ text * p = PG_GETARG_TEXT_P (1 );
476
+ text * r = PG_GETARG_TEXT_P (2 );
477
+ text * opt = PG_GETARG_TEXT_P (3 );
478
+ char * opt_p = VARDATA (opt );
479
+ int opt_len = (VARSIZE (opt ) - VARHDRSZ );
480
+ int i ;
481
+ bool global = false;
482
+ bool ignorecase = false;
483
+ regex_t re ;
484
+
485
+ /* parse options */
486
+ for (i = 0 ; i < opt_len ; i ++ )
487
+ {
488
+ switch (opt_p [i ])
489
+ {
490
+ case 'i' :
491
+ ignorecase = true;
492
+ break ;
493
+ case 'g' :
494
+ global = true;
495
+ break ;
496
+ default :
497
+ ereport (ERROR ,
498
+ (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
499
+ errmsg ("invalid option of regexp_replace: %c" ,
500
+ opt_p [i ])));
501
+ break ;
502
+ }
503
+ }
504
+
505
+ if (ignorecase )
506
+ re = RE_compile_and_cache (p , regex_flavor | REG_ICASE );
507
+ else
508
+ re = RE_compile_and_cache (p , regex_flavor );
509
+
510
+ return DirectFunctionCall4 (replace_text_regexp ,
511
+ PointerGetDatum (s ),
512
+ PointerGetDatum (& re ),
513
+ PointerGetDatum (r ),
514
+ BoolGetDatum (global ));
515
+ }
516
+
440
517
/* similar_escape()
441
518
* Convert a SQL99 regexp pattern to POSIX style, so it can be used by
442
519
* our regexp engine.
0 commit comments