Skip to content

Commit 1df4123

Browse files
committed
Fix handling of NULL distances in KNN-GiST
In order to implement NULL LAST semantic GiST previously assumed distance to the NULL value to be Inf. However, our distance functions can return Inf and NaN for non-null values. In such cases, NULL LAST semantic appears to be broken. This commit fixes that by introducing separate array of null flags for distances. Backpatch to all supported versions. Discussion: https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.com Author: Alexander Korotkov Backpatch-through: 9.4
1 parent 111fb7e commit 1df4123

File tree

3 files changed

+93
-33
lines changed

3 files changed

+93
-33
lines changed

src/backend/access/gist/gistget.c

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434
* request it. recheck is not interesting when examining a non-leaf entry,
3535
* since we must visit the lower index page if there's any doubt.
3636
*
37-
* If we are doing an ordered scan, so->distances[] is filled with distance
38-
* data from the distance() functions before returning success.
37+
* If we are doing an ordered scan, so->distancesValues[] and
38+
* so->distancesNulls[] is filled with distance data from the distance()
39+
* functions before returning success.
3940
*
4041
* We must decompress the key in the IndexTuple before passing it to the
4142
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -55,7 +56,8 @@ gistindex_keytest(IndexScanDesc scan,
5556
GISTSTATE *giststate = so->giststate;
5657
ScanKey key = scan->keyData;
5758
int keySize = scan->numberOfKeys;
58-
double *distance_p;
59+
double *distance_value_p;
60+
bool *distance_null_p;
5961
Relation r = scan->indexRelation;
6062

6163
*recheck_p = false;
@@ -72,7 +74,10 @@ gistindex_keytest(IndexScanDesc scan,
7274
if (GistPageIsLeaf(page)) /* shouldn't happen */
7375
elog(ERROR, "invalid GiST tuple found on leaf page");
7476
for (i = 0; i < scan->numberOfOrderBys; i++)
75-
so->distances[i] = -get_float8_infinity();
77+
{
78+
so->distanceValues[i] = -get_float8_infinity();
79+
so->distanceNulls[i] = false;
80+
}
7681
return true;
7782
}
7883

@@ -155,7 +160,8 @@ gistindex_keytest(IndexScanDesc scan,
155160

156161
/* OK, it passes --- now let's compute the distances */
157162
key = scan->orderByData;
158-
distance_p = so->distances;
163+
distance_value_p = so->distanceValues;
164+
distance_null_p = so->distanceNulls;
159165
keySize = scan->numberOfOrderBys;
160166
while (keySize > 0)
161167
{
@@ -169,8 +175,9 @@ gistindex_keytest(IndexScanDesc scan,
169175

170176
if ((key->sk_flags & SK_ISNULL) || isNull)
171177
{
172-
/* Assume distance computes as null and sorts to the end */
173-
*distance_p = get_float8_infinity();
178+
/* Assume distance computes as null */
179+
*distance_value_p = 0.0;
180+
*distance_null_p = true;
174181
}
175182
else
176183
{
@@ -199,14 +206,15 @@ gistindex_keytest(IndexScanDesc scan,
199206
key->sk_collation,
200207
PointerGetDatum(&de),
201208
key->sk_argument,
202-
Int32GetDatum(key->sk_strategy),
209+
Int16GetDatum(key->sk_strategy),
203210
ObjectIdGetDatum(key->sk_subtype));
204-
205-
*distance_p = DatumGetFloat8(dist);
211+
*distance_value_p = DatumGetFloat8(dist);
212+
*distance_null_p = false;
206213
}
207214

208215
key++;
209-
distance_p++;
216+
distance_value_p++;
217+
distance_null_p++;
210218
keySize--;
211219
}
212220

@@ -219,7 +227,8 @@ gistindex_keytest(IndexScanDesc scan,
219227
*
220228
* scan: index scan we are executing
221229
* pageItem: search queue item identifying an index page to scan
222-
* myDistances: distances array associated with pageItem, or NULL at the root
230+
* myDistanceValues: distances array associated with pageItem, or NULL at the root
231+
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
223232
* tbm: if not NULL, gistgetbitmap's output bitmap
224233
* ntids: if not NULL, gistgetbitmap's output tuple counter
225234
*
@@ -234,7 +243,8 @@ gistindex_keytest(IndexScanDesc scan,
234243
* sibling will be processed next.
235244
*/
236245
static void
237-
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
246+
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
247+
double *myDistanceValues, bool *myDistanceNulls,
238248
TIDBitmap *tbm, int64 *ntids)
239249
{
240250
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
@@ -270,7 +280,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
270280
GISTSearchItem *item;
271281

272282
/* This can't happen when starting at the root */
273-
Assert(myDistances != NULL);
283+
Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
274284

275285
oldcxt = MemoryContextSwitchTo(so->queueCxt);
276286

@@ -283,8 +293,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
283293
/* Insert it into the queue using same distances as for this page */
284294
tmpItem->head = item;
285295
tmpItem->lastHeap = NULL;
286-
memcpy(tmpItem->distances, myDistances,
287-
sizeof(double) * scan->numberOfOrderBys);
296+
memcpy(GISTSearchTreeItemDistanceValues(tmpItem, scan->numberOfOrderBys),
297+
myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
298+
memcpy(GISTSearchTreeItemDistanceNulls(tmpItem, scan->numberOfOrderBys),
299+
myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
288300

289301
(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
290302

@@ -344,6 +356,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
344356
* search.
345357
*/
346358
GISTSearchItem *item;
359+
int nOrderBys = scan->numberOfOrderBys;
347360

348361
oldcxt = MemoryContextSwitchTo(so->queueCxt);
349362

@@ -374,8 +387,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
374387
/* Insert it into the queue using new distance data */
375388
tmpItem->head = item;
376389
tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL;
377-
memcpy(tmpItem->distances, so->distances,
378-
sizeof(double) * scan->numberOfOrderBys);
390+
memcpy(GISTSearchTreeItemDistanceValues(tmpItem, nOrderBys),
391+
so->distanceValues, sizeof(double) * nOrderBys);
392+
memcpy(GISTSearchTreeItemDistanceNulls(tmpItem, nOrderBys),
393+
so->distanceNulls, sizeof(bool) * nOrderBys);
379394

380395
(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
381396

@@ -458,7 +473,10 @@ getNextNearest(IndexScanDesc scan)
458473
/* visit an index page, extract its items into queue */
459474
CHECK_FOR_INTERRUPTS();
460475

461-
gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
476+
gistScanPage(scan, item,
477+
GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys),
478+
GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys),
479+
NULL, NULL);
462480
}
463481

464482
pfree(item);
@@ -496,7 +514,7 @@ gistgettuple(PG_FUNCTION_ARGS)
496514

497515
fakeItem.blkno = GIST_ROOT_BLKNO;
498516
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
499-
gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
517+
gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
500518
}
501519

502520
if (scan->numberOfOrderBys > 0)
@@ -534,7 +552,10 @@ gistgettuple(PG_FUNCTION_ARGS)
534552
* this page, we fall out of the inner "do" and loop around to
535553
* return them.
536554
*/
537-
gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
555+
gistScanPage(scan, item,
556+
GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys),
557+
GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys),
558+
NULL, NULL);
538559

539560
pfree(item);
540561
} while (so->nPageData == 0);
@@ -565,7 +586,7 @@ gistgetbitmap(PG_FUNCTION_ARGS)
565586

566587
fakeItem.blkno = GIST_ROOT_BLKNO;
567588
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
568-
gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
589+
gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
569590

570591
/*
571592
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -580,7 +601,10 @@ gistgetbitmap(PG_FUNCTION_ARGS)
580601

581602
CHECK_FOR_INTERRUPTS();
582603

583-
gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids);
604+
gistScanPage(scan, item,
605+
GISTSearchTreeItemDistanceValues(so->curTreeItem, scan->numberOfOrderBys),
606+
GISTSearchTreeItemDistanceNulls(so->curTreeItem, scan->numberOfOrderBys),
607+
tbm, &ntids);
584608

585609
pfree(item);
586610
}

src/backend/access/gist/gistscan.c

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,30 @@ GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg)
3333
const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b;
3434
IndexScanDesc scan = (IndexScanDesc) arg;
3535
int i;
36+
double *da = GISTSearchTreeItemDistanceValues(sa, scan->numberOfOrderBys),
37+
*db = GISTSearchTreeItemDistanceValues(sb, scan->numberOfOrderBys);
38+
bool *na = GISTSearchTreeItemDistanceNulls(sa, scan->numberOfOrderBys),
39+
*nb = GISTSearchTreeItemDistanceNulls(sb, scan->numberOfOrderBys);
3640

3741
/* Order according to distance comparison */
3842
for (i = 0; i < scan->numberOfOrderBys; i++)
3943
{
40-
int cmp = float8_cmp_internal(sa->distances[i], sb->distances[i]);
44+
if (na[i])
45+
{
46+
if (!nb[i])
47+
return 1;
48+
}
49+
else if (nb[i])
50+
{
51+
return -1;
52+
}
53+
else
54+
{
55+
int cmp = float8_cmp_internal(da[i], db[i]);
4156

42-
if (cmp != 0)
43-
return cmp;
57+
if (cmp != 0)
58+
return cmp;
59+
}
4460
}
4561

4662
return 0;
@@ -86,7 +102,7 @@ GISTSearchTreeItemAllocator(void *arg)
86102
{
87103
IndexScanDesc scan = (IndexScanDesc) arg;
88104

89-
return palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
105+
return palloc(SizeOfGISTSearchTreeItem(scan->numberOfOrderBys));
90106
}
91107

92108
static void
@@ -130,8 +146,9 @@ gistbeginscan(PG_FUNCTION_ARGS)
130146
so->queueCxt = giststate->scanCxt; /* see gistrescan */
131147

132148
/* workspaces with size dependent on numberOfOrderBys: */
133-
so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
134-
so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
149+
so->tmpTreeItem = palloc(SizeOfGISTSearchTreeItem(scan->numberOfOrderBys));
150+
so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
151+
so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
135152
so->qual_ok = true; /* in case there are zero keys */
136153

137154
scan->opaque = so;
@@ -191,7 +208,7 @@ gistrescan(PG_FUNCTION_ARGS)
191208

192209
/* create new, empty RBTree for search queue */
193210
oldCxt = MemoryContextSwitchTo(so->queueCxt);
194-
so->queue = rb_create(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys,
211+
so->queue = rb_create(SizeOfGISTSearchTreeItem(scan->numberOfOrderBys),
195212
GISTSearchTreeItemComparator,
196213
GISTSearchTreeItemCombiner,
197214
GISTSearchTreeItemAllocator,

src/include/access/gist_private.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,28 @@ typedef struct GISTSearchTreeItem
144144
RBNode rbnode; /* this is an RBTree item */
145145
GISTSearchItem *head; /* first chain member */
146146
GISTSearchItem *lastHeap; /* last heap-tuple member, if any */
147-
double distances[1]; /* array with numberOfOrderBys entries */
147+
148+
/*
149+
* This data structure is followed by arrays of distance values and
150+
* distance null flags. Size of both arrays is
151+
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
152+
* arrays.
153+
*/
148154
} GISTSearchTreeItem;
149155

150-
#define GSTIHDRSZ offsetof(GISTSearchTreeItem, distances)
156+
#define SizeOfGISTSearchTreeItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchTreeItem)) + \
157+
(sizeof(double) + sizeof(bool)) * (n_distances))
158+
159+
/*
160+
* We actually don't need n_distances compute pointer to distance values.
161+
* Nevertheless take n_distances as argument to have same arguments list for
162+
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
163+
*/
164+
#define GISTSearchTreeItemDistanceValues(item, n_distances) \
165+
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchTreeItem))))
166+
167+
#define GISTSearchTreeItemDistanceNulls(item, n_distances) \
168+
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchTreeItem)) + sizeof(double) * (n_distances)))
151169

152170
/*
153171
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -164,7 +182,8 @@ typedef struct GISTScanOpaqueData
164182

165183
/* pre-allocated workspace arrays */
166184
GISTSearchTreeItem *tmpTreeItem; /* workspace to pass to rb_insert */
167-
double *distances; /* output area for gistindex_keytest */
185+
double *distanceValues; /* output area for gistindex_keytest */
186+
bool *distanceNulls;
168187

169188
/* In a non-ordered search, returnable heap items are stored here: */
170189
GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];

0 commit comments

Comments
 (0)