Skip to content

Commit a148f8b

Browse files
committed
Add a planner support function for starts_with().
This fills in some gaps in planner support for starts_with() and the equivalent ^@ operator: * A condition such as "textcol ^@ constant" can now use a regular btree index, not only an SP-GiST index, so long as the index's collation is C. (This works just like "textcol LIKE 'foo%'".) * "starts_with(textcol, constant)" can be optimized the same as "textcol ^@ constant". * Fixed-prefix LIKE and regex patterns are now more like starts_with() in another way: if you apply one to an SPGiST-indexed column, you'll get an index condition using ^@ rather than two index conditions with >= and <. Per a complaint from Shay Rojansky. Patch by me; thanks to Nathan Bossart for review. Discussion: https://postgr.es/m/232599.1633800229@sss.pgh.pa.us
1 parent 248c3a9 commit a148f8b

File tree

6 files changed

+95
-23
lines changed

6 files changed

+95
-23
lines changed

src/backend/utils/adt/like_support.c

+46-19
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,14 @@ texticregexeq_support(PG_FUNCTION_ARGS)
143143
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
144144
}
145145

146+
Datum
147+
text_starts_with_support(PG_FUNCTION_ARGS)
148+
{
149+
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
150+
151+
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
152+
}
153+
146154
/* Common code for the above */
147155
static Node *
148156
like_regex_support(Node *rawreq, Pattern_Type ptype)
@@ -246,6 +254,7 @@ match_pattern_prefix(Node *leftop,
246254
Oid eqopr;
247255
Oid ltopr;
248256
Oid geopr;
257+
Oid preopr = InvalidOid;
249258
bool collation_aware;
250259
Expr *expr;
251260
FmgrInfo ltproc;
@@ -302,12 +311,20 @@ match_pattern_prefix(Node *leftop,
302311
switch (ldatatype)
303312
{
304313
case TEXTOID:
305-
if (opfamily == TEXT_PATTERN_BTREE_FAM_OID ||
306-
opfamily == TEXT_SPGIST_FAM_OID)
314+
if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
315+
{
316+
eqopr = TextEqualOperator;
317+
ltopr = TextPatternLessOperator;
318+
geopr = TextPatternGreaterEqualOperator;
319+
collation_aware = false;
320+
}
321+
else if (opfamily == TEXT_SPGIST_FAM_OID)
307322
{
308323
eqopr = TextEqualOperator;
309324
ltopr = TextPatternLessOperator;
310325
geopr = TextPatternGreaterEqualOperator;
326+
/* This opfamily has direct support for prefixing */
327+
preopr = TextPrefixOperator;
311328
collation_aware = false;
312329
}
313330
else
@@ -360,20 +377,6 @@ match_pattern_prefix(Node *leftop,
360377
return NIL;
361378
}
362379

363-
/*
364-
* If necessary, verify that the index's collation behavior is compatible.
365-
* For an exact-match case, we don't have to be picky. Otherwise, insist
366-
* that the index collation be "C". Note that here we are looking at the
367-
* index's collation, not the expression's collation -- this test is *not*
368-
* dependent on the LIKE/regex operator's collation.
369-
*/
370-
if (collation_aware)
371-
{
372-
if (!(pstatus == Pattern_Prefix_Exact ||
373-
lc_collate_is_c(indexcollation)))
374-
return NIL;
375-
}
376-
377380
/*
378381
* If necessary, coerce the prefix constant to the right type. The given
379382
* prefix constant is either text or bytea type, therefore the only case
@@ -409,8 +412,31 @@ match_pattern_prefix(Node *leftop,
409412
}
410413

411414
/*
412-
* Otherwise, we have a nonempty required prefix of the values.
413-
*
415+
* Otherwise, we have a nonempty required prefix of the values. Some
416+
* opclasses support prefix checks directly, otherwise we'll try to
417+
* generate a range constraint.
418+
*/
419+
if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
420+
{
421+
expr = make_opclause(preopr, BOOLOID, false,
422+
(Expr *) leftop, (Expr *) prefix,
423+
InvalidOid, indexcollation);
424+
result = list_make1(expr);
425+
return result;
426+
}
427+
428+
/*
429+
* Since we need a range constraint, it's only going to work reliably if
430+
* the index is collation-insensitive or has "C" collation. Note that
431+
* here we are looking at the index's collation, not the expression's
432+
* collation -- this test is *not* dependent on the LIKE/regex operator's
433+
* collation.
434+
*/
435+
if (collation_aware &&
436+
!lc_collate_is_c(indexcollation))
437+
return NIL;
438+
439+
/*
414440
* We can always say "x >= prefix".
415441
*/
416442
if (!op_in_opfamily(geopr, opfamily))
@@ -1165,7 +1191,6 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
11651191
case Pattern_Type_Prefix:
11661192
/* Prefix type work is trivial. */
11671193
result = Pattern_Prefix_Partial;
1168-
*rest_selec = 1.0; /* all */
11691194
*prefix = makeConst(patt->consttype,
11701195
patt->consttypmod,
11711196
patt->constcollid,
@@ -1175,6 +1200,8 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
11751200
patt->constlen),
11761201
patt->constisnull,
11771202
patt->constbyval);
1203+
if (rest_selec != NULL)
1204+
*rest_selec = 1.0; /* all */
11781205
break;
11791206
default:
11801207
elog(ERROR, "unrecognized ptype: %d", (int) ptype);

src/include/catalog/catversion.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 202111091
56+
#define CATALOG_VERSION_NO 202111171
5757

5858
#endif

src/include/catalog/pg_operator.dat

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
oprright => 'text', oprresult => 'bool', oprcom => '=(text,text)',
103103
oprnegate => '<>(text,text)', oprcode => 'texteq', oprrest => 'eqsel',
104104
oprjoin => 'eqjoinsel' },
105-
{ oid => '3877', descr => 'starts with',
105+
{ oid => '3877', oid_symbol => 'TextPrefixOperator', descr => 'starts with',
106106
oprname => '^@', oprleft => 'text', oprright => 'text', oprresult => 'bool',
107107
oprcode => 'starts_with', oprrest => 'prefixsel',
108108
oprjoin => 'prefixjoinsel' },

src/include/catalog/pg_proc.dat

+6-2
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,12 @@
167167
proname => 'texteq', proleakproof => 't', prorettype => 'bool',
168168
proargtypes => 'text text', prosrc => 'texteq' },
169169
{ oid => '3696',
170-
proname => 'starts_with', proleakproof => 't', prorettype => 'bool',
171-
proargtypes => 'text text', prosrc => 'text_starts_with' },
170+
proname => 'starts_with', prosupport => 'text_starts_with_support',
171+
proleakproof => 't', prorettype => 'bool', proargtypes => 'text text',
172+
prosrc => 'text_starts_with' },
173+
{ oid => '8923', descr => 'planner support for text_starts_with',
174+
proname => 'text_starts_with_support', prorettype => 'internal',
175+
proargtypes => 'internal', prosrc => 'text_starts_with_support' },
172176
{ oid => '68',
173177
proname => 'xideq', proleakproof => 't', prorettype => 'bool',
174178
proargtypes => 'xid xid', prosrc => 'xideq' },

src/test/regress/expected/create_index_spgist.out

+33
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,22 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
804804
2
805805
(1 row)
806806

807+
EXPLAIN (COSTS OFF)
808+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
809+
QUERY PLAN
810+
------------------------------------------------------------
811+
Aggregate
812+
-> Index Only Scan using sp_radix_ind on radix_text_tbl
813+
Index Cond: (t ^@ 'Worth'::text)
814+
Filter: starts_with(t, 'Worth'::text)
815+
(4 rows)
816+
817+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
818+
count
819+
-------
820+
2
821+
(1 row)
822+
807823
-- Now check the results from bitmap indexscan
808824
SET enable_seqscan = OFF;
809825
SET enable_indexscan = OFF;
@@ -1333,6 +1349,23 @@ SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
13331349
2
13341350
(1 row)
13351351

1352+
EXPLAIN (COSTS OFF)
1353+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
1354+
QUERY PLAN
1355+
------------------------------------------------
1356+
Aggregate
1357+
-> Bitmap Heap Scan on radix_text_tbl
1358+
Filter: starts_with(t, 'Worth'::text)
1359+
-> Bitmap Index Scan on sp_radix_ind
1360+
Index Cond: (t ^@ 'Worth'::text)
1361+
(5 rows)
1362+
1363+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
1364+
count
1365+
-------
1366+
2
1367+
(1 row)
1368+
13361369
RESET enable_seqscan;
13371370
RESET enable_indexscan;
13381371
RESET enable_bitmapscan;

src/test/regress/sql/create_index_spgist.sql

+8
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,10 @@ EXPLAIN (COSTS OFF)
295295
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
296296
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
297297

298+
EXPLAIN (COSTS OFF)
299+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
300+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
301+
298302
-- Now check the results from bitmap indexscan
299303
SET enable_seqscan = OFF;
300304
SET enable_indexscan = OFF;
@@ -424,6 +428,10 @@ EXPLAIN (COSTS OFF)
424428
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
425429
SELECT count(*) FROM radix_text_tbl WHERE t ^@ 'Worth';
426430

431+
EXPLAIN (COSTS OFF)
432+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
433+
SELECT count(*) FROM radix_text_tbl WHERE starts_with(t, 'Worth');
434+
427435
RESET enable_seqscan;
428436
RESET enable_indexscan;
429437
RESET enable_bitmapscan;

0 commit comments

Comments
 (0)