Skip to content

Commit c7597a1

Browse files
committed
Fix GIN's shimTriConsistentFn to not corrupt its input.
Commit 0f21db3 made an assumption that GIN triConsistentFns would not modify their input entryRes[] arrays. But in fact, the "shim" triConsistentFn that we use for opclasses that don't supply their own did exactly that, potentially leading to wrong answers from a GIN index search. Through bad luck, none of the test cases that we have for such opclasses exposed the bug. One response to this could be that the assumption of consistency check functions not modifying entryRes[] arrays is a bad one, but it still seems reasonable to me. Notably, shimTriConsistentFn is itself assuming that with respect to the underlying boolean consistentFn, so it's sure being self-centered in supposing that it gets to do so. Fortunately, it's quite simple to fix shimTriConsistentFn to restore the entry-time state of entryRes[], so let's do that instead. This issue doesn't affect any core GIN opclasses, since they all supply their own triConsistentFns. It does affect contrib modules btree_gin, hstore, and intarray. Along the way, I (tgl) noticed that shimTriConsistentFn failed to pick up on a "recheck" flag returned by its first call to the boolean consistentFn. This may be only a latent problem, since it would be unlikely for a consistentFn to set recheck for the all-false case and not any other cases. (Indeed, none of our contrib modules do that.) Nonetheless, it's formally wrong. Reported-by: Vinod Sridharan <vsridh90@gmail.com> Author: Vinod Sridharan <vsridh90@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CAFMdLD7XzsXfi1+DpTqTgrD8XU0i2C99KuF=5VHLWjx4C1pkcg@mail.gmail.com Backpatch-through: 13
1 parent e2f42f8 commit c7597a1

File tree

3 files changed

+64
-5
lines changed

3 files changed

+64
-5
lines changed

contrib/intarray/expected/_int.out

+42
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
473473
6344
474474
(1 row)
475475

476+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
477+
count
478+
-------
479+
12
480+
(1 row)
481+
476482
SET enable_seqscan = off; -- not all of these would use index by default
477483
CREATE INDEX text_idx on test__int using gist ( a gist__int_ops );
478484
SELECT count(*) from test__int WHERE a && '{23,50}';
@@ -547,6 +553,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
547553
6344
548554
(1 row)
549555

556+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
557+
count
558+
-------
559+
12
560+
(1 row)
561+
550562
INSERT INTO test__int SELECT array(SELECT x FROM generate_series(1, 1001) x); -- should fail
551563
ERROR: input array is too big (199 maximum allowed, 1001 current), use gist__intbig_ops opclass instead
552564
DROP INDEX text_idx;
@@ -629,6 +641,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
629641
6344
630642
(1 row)
631643

644+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
645+
count
646+
-------
647+
12
648+
(1 row)
649+
632650
DROP INDEX text_idx;
633651
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 0));
634652
ERROR: value 0 out of bounds for option "siglen"
@@ -709,6 +727,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
709727
6344
710728
(1 row)
711729

730+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
731+
count
732+
-------
733+
12
734+
(1 row)
735+
712736
DROP INDEX text_idx;
713737
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops );
714738
SELECT count(*) from test__int WHERE a && '{23,50}';
@@ -783,6 +807,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
783807
6344
784808
(1 row)
785809

810+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
811+
count
812+
-------
813+
12
814+
(1 row)
815+
786816
DROP INDEX text_idx;
787817
CREATE INDEX text_idx on test__int using gin ( a gin__int_ops );
788818
SELECT count(*) from test__int WHERE a && '{23,50}';
@@ -857,6 +887,12 @@ SELECT count(*) from test__int WHERE a @@ '!20 & !21';
857887
6344
858888
(1 row)
859889

890+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
891+
count
892+
-------
893+
12
894+
(1 row)
895+
860896
DROP INDEX text_idx;
861897
-- Repeat the same queries with an extended data set. The data set is the
862898
-- same that we used before, except that each element in the array is
@@ -949,4 +985,10 @@ SELECT count(*) from more__int WHERE a @@ '!20 & !21';
949985
6344
950986
(1 row)
951987

988+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
989+
count
990+
-------
991+
12
992+
(1 row)
993+
952994
RESET enable_seqscan;

contrib/intarray/sql/_int.sql

+7
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
9292
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
9393
SELECT count(*) from test__int WHERE a @@ '20 | !21';
9494
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
95+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
9596

9697
SET enable_seqscan = off; -- not all of these would use index by default
9798

@@ -109,6 +110,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
109110
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
110111
SELECT count(*) from test__int WHERE a @@ '20 | !21';
111112
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
113+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
112114

113115
INSERT INTO test__int SELECT array(SELECT x FROM generate_series(1, 1001) x); -- should fail
114116

@@ -129,6 +131,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
129131
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
130132
SELECT count(*) from test__int WHERE a @@ '20 | !21';
131133
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
134+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
132135

133136
DROP INDEX text_idx;
134137
CREATE INDEX text_idx on test__int using gist (a gist__intbig_ops(siglen = 0));
@@ -147,6 +150,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
147150
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
148151
SELECT count(*) from test__int WHERE a @@ '20 | !21';
149152
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
153+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
150154

151155
DROP INDEX text_idx;
152156
CREATE INDEX text_idx on test__int using gist ( a gist__intbig_ops );
@@ -163,6 +167,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
163167
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
164168
SELECT count(*) from test__int WHERE a @@ '20 | !21';
165169
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
170+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
166171

167172
DROP INDEX text_idx;
168173
CREATE INDEX text_idx on test__int using gin ( a gin__int_ops );
@@ -179,6 +184,7 @@ SELECT count(*) from test__int WHERE a @> '{20,23}' or a @> '{50,68}';
179184
SELECT count(*) from test__int WHERE a @@ '(20&23)|(50&68)';
180185
SELECT count(*) from test__int WHERE a @@ '20 | !21';
181186
SELECT count(*) from test__int WHERE a @@ '!20 & !21';
187+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
182188

183189
DROP INDEX text_idx;
184190

@@ -214,6 +220,7 @@ SELECT count(*) from more__int WHERE a @> '{20,23}' or a @> '{50,68}';
214220
SELECT count(*) from more__int WHERE a @@ '(20&23)|(50&68)';
215221
SELECT count(*) from more__int WHERE a @@ '20 | !21';
216222
SELECT count(*) from more__int WHERE a @@ '!20 & !21';
223+
SELECT count(*) from test__int WHERE a @@ '!2733 & (2738 | 254)';
217224

218225

219226
RESET enable_seqscan;

src/backend/access/gin/ginlogic.c

+15-5
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,9 @@ shimBoolConsistentFn(GinScanKey key)
146146
* every combination is O(n^2), so this is only feasible for a small number of
147147
* MAYBE inputs.
148148
*
149-
* NB: This function modifies the key->entryRes array!
149+
* NB: This function modifies the key->entryRes array. For now that's okay
150+
* so long as we restore the entry-time contents before returning. This may
151+
* need revisiting if we ever invent multithreaded GIN scans, though.
150152
*/
151153
static GinTernaryValue
152154
shimTriConsistentFn(GinScanKey key)
@@ -155,7 +157,7 @@ shimTriConsistentFn(GinScanKey key)
155157
int maybeEntries[MAX_MAYBE_ENTRIES];
156158
int i;
157159
bool boolResult;
158-
bool recheck = false;
160+
bool recheck;
159161
GinTernaryValue curResult;
160162

161163
/*
@@ -175,8 +177,8 @@ shimTriConsistentFn(GinScanKey key)
175177
}
176178

177179
/*
178-
* If none of the inputs were MAYBE, so we can just call consistent
179-
* function as is.
180+
* If none of the inputs were MAYBE, we can just call the consistent
181+
* function as-is.
180182
*/
181183
if (nmaybe == 0)
182184
return directBoolConsistentFn(key);
@@ -185,6 +187,7 @@ shimTriConsistentFn(GinScanKey key)
185187
for (i = 0; i < nmaybe; i++)
186188
key->entryRes[maybeEntries[i]] = GIN_FALSE;
187189
curResult = directBoolConsistentFn(key);
190+
recheck = key->recheckCurItem;
188191

189192
for (;;)
190193
{
@@ -206,13 +209,20 @@ shimTriConsistentFn(GinScanKey key)
206209
recheck |= key->recheckCurItem;
207210

208211
if (curResult != boolResult)
209-
return GIN_MAYBE;
212+
{
213+
curResult = GIN_MAYBE;
214+
break;
215+
}
210216
}
211217

212218
/* TRUE with recheck is taken to mean MAYBE */
213219
if (curResult == GIN_TRUE && recheck)
214220
curResult = GIN_MAYBE;
215221

222+
/* We must restore the original state of the entryRes array */
223+
for (i = 0; i < nmaybe; i++)
224+
key->entryRes[maybeEntries[i]] = GIN_MAYBE;
225+
216226
return curResult;
217227
}
218228

0 commit comments

Comments
 (0)