Skip to content

Commit 38bb3ae

Browse files
committed
Convert tsginidx.c's GIN indexing logic to fully ternary operation.
Commit 2f2007f did this partially, but there were two remaining warts. checkcondition_gin handled some uncertain cases by setting the out-of-band recheck flag, some by returning TS_MAYBE, and some by doing both. Meanwhile, TS_execute arbitrarily converted a TS_MAYBE result to TS_YES. Thus, if checkcondition_gin chose to only return TS_MAYBE, the outcome would be TS_YES with no recheck flag, potentially resulting in wrong query outputs. The case where this'd happen is if there were GIN_MAYBE entries in the indexscan results passed to gin_tsquery_[tri]consistent, which so far as I can see would only happen if the tidbitmap used to accumulate indexscan results grew large enough to become lossy. I initially thought of fixing this by ensuring we always set the recheck flag as well as returning TS_MAYBE in uncertain cases. But that errs in the other direction, potentially forcing rechecks of rows that provably match the query (since the recheck flag remains set even if TS_execute later finds that the answer must be TS_YES). Instead, let's get rid of the out-of-band recheck flag altogether and rely on returning TS_MAYBE. This requires exporting a version of TS_execute that will actually return the full ternary result of the evaluation ... but we likely should have done that to start with. Unfortunately it doesn't seem practical to add a regression test case that covers this: the amount of data needed to cause the GIN bitmap to become lossy results in a longer runtime than I think we want to have in the tests. (I'm wondering about allowing smaller work_mem settings to ameliorate that, but it'd be a matter for a separate patch.) Per bug #16865 from Dimitri Nüscheler. Back-patch to v13 where the faulty commit came in. Discussion: https://postgr.es/m/16865-4ffdc3e682e6d75b@postgresql.org
1 parent f672df5 commit 38bb3ae

File tree

3 files changed

+44
-28
lines changed

3 files changed

+44
-28
lines changed

src/backend/utils/adt/tsginidx.c

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ typedef struct
175175
QueryItem *first_item;
176176
GinTernaryValue *check;
177177
int *map_item_operand;
178-
bool *need_recheck;
179178
} GinChkVal;
180179

181180
/*
@@ -186,33 +185,30 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
186185
{
187186
GinChkVal *gcv = (GinChkVal *) checkval;
188187
int j;
189-
190-
/*
191-
* if any val requiring a weight is used or caller needs position
192-
* information then set recheck flag
193-
*/
194-
if (val->weight != 0 || data != NULL)
195-
*(gcv->need_recheck) = true;
188+
GinTernaryValue result;
196189

197190
/* convert item's number to corresponding entry's (operand's) number */
198191
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
199192

193+
/* determine presence of current entry in indexed value */
194+
result = gcv->check[j];
195+
200196
/*
201-
* return presence of current entry in indexed value; but TRUE becomes
202-
* MAYBE in the presence of a query requiring recheck
197+
* If any val requiring a weight is used or caller needs position
198+
* information then we must recheck, so replace TRUE with MAYBE.
203199
*/
204-
if (gcv->check[j] == GIN_TRUE)
200+
if (result == GIN_TRUE)
205201
{
206202
if (val->weight != 0 || data != NULL)
207-
return TS_MAYBE;
203+
result = GIN_MAYBE;
208204
}
209205

210206
/*
211207
* We rely on GinTernaryValue and TSTernaryValue using equivalent value
212208
* assignments. We could use a switch statement to map the values if that
213209
* ever stops being true, but it seems unlikely to happen.
214210
*/
215-
return (TSTernaryValue) gcv->check[j];
211+
return (TSTernaryValue) result;
216212
}
217213

218214
Datum
@@ -244,12 +240,23 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
244240
"sizes of GinTernaryValue and bool are not equal");
245241
gcv.check = (GinTernaryValue *) check;
246242
gcv.map_item_operand = (int *) (extra_data[0]);
247-
gcv.need_recheck = recheck;
248243

249-
res = TS_execute(GETQUERY(query),
250-
&gcv,
251-
TS_EXEC_PHRASE_NO_POS,
252-
checkcondition_gin);
244+
switch (TS_execute_ternary(GETQUERY(query),
245+
&gcv,
246+
TS_EXEC_PHRASE_NO_POS,
247+
checkcondition_gin))
248+
{
249+
case TS_NO:
250+
res = false;
251+
break;
252+
case TS_YES:
253+
res = true;
254+
break;
255+
case TS_MAYBE:
256+
res = true;
257+
*recheck = true;
258+
break;
259+
}
253260
}
254261

255262
PG_RETURN_BOOL(res);
@@ -266,10 +273,6 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
266273
/* int32 nkeys = PG_GETARG_INT32(3); */
267274
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
268275
GinTernaryValue res = GIN_FALSE;
269-
bool recheck;
270-
271-
/* Initially assume query doesn't require recheck */
272-
recheck = false;
273276

274277
if (query->size > 0)
275278
{
@@ -282,13 +285,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
282285
gcv.first_item = GETQUERY(query);
283286
gcv.check = check;
284287
gcv.map_item_operand = (int *) (extra_data[0]);
285-
gcv.need_recheck = &recheck;
286288

287-
if (TS_execute(GETQUERY(query),
288-
&gcv,
289-
TS_EXEC_PHRASE_NO_POS,
290-
checkcondition_gin))
291-
res = recheck ? GIN_MAYBE : GIN_TRUE;
289+
res = TS_execute_ternary(GETQUERY(query),
290+
&gcv,
291+
TS_EXEC_PHRASE_NO_POS,
292+
checkcondition_gin);
292293
}
293294

294295
PG_RETURN_GIN_TERNARY_VALUE(res);

src/backend/utils/adt/tsvector_op.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,18 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags,
18541854
return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
18551855
}
18561856

1857+
/*
1858+
* Evaluate tsquery boolean expression.
1859+
*
1860+
* This is the same as TS_execute except that TS_MAYBE is returned as-is.
1861+
*/
1862+
TSTernaryValue
1863+
TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1864+
TSExecuteCallback chkcond)
1865+
{
1866+
return TS_execute_recurse(curitem, arg, flags, chkcond);
1867+
}
1868+
18571869
/*
18581870
* TS_execute recursion for operators above any phrase operator. Here we do
18591871
* not need to worry about lexeme positions. As soon as we hit an OP_PHRASE

src/include/tsearch/ts_utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
199199

200200
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
201201
TSExecuteCallback chkcond);
202+
extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
203+
uint32 flags,
204+
TSExecuteCallback chkcond);
202205
extern bool tsquery_requires_match(QueryItem *curitem);
203206

204207
/*

0 commit comments

Comments
 (0)