Skip to content

Commit 2da8e56

Browse files
committed
Improve handling of NULLs in KNN-GiST and KNN-SP-GiST
This commit improves subject in two ways: * It removes ugliness of 02f9087, which stores distance values and null flags in two separate arrays after GISTSearchItem struct. Instead we pack both distance value and null flag in IndexOrderByDistance struct. Alignment overhead should be negligible, because we typically deal with at most few "col op const" expressions in ORDER BY clause. * It fixes handling of "col op NULL" expression in KNN-SP-GiST. Now, these expression are not passed to support functions, which can't deal with them. Instead, NULL result is implicitly assumed. It future we may decide to teach support functions to deal with NULL arguments, but current solution is bugfix suitable for backpatch. Reported-by: Nikita Glukhov Discussion: https://postgr.es/m/826f57ee-afc7-8977-c44c-6111d18b02ec%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4
1 parent d84db2d commit 2da8e56

File tree

5 files changed

+47
-79
lines changed

5 files changed

+47
-79
lines changed

src/backend/access/gist/gistget.c

Lines changed: 27 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,8 @@ gistkillitems(IndexScanDesc scan)
110110
* Similarly, *recheck_distances_p is set to indicate whether the distances
111111
* need to be rechecked, and it is also ignored for non-leaf entries.
112112
*
113-
* If we are doing an ordered scan, so->distancesValues[] and
114-
* so->distancesNulls[] is filled with distance data from the distance()
115-
* functions before returning success.
113+
* If we are doing an ordered scan, so->distances[] is filled with distance
114+
* data from the distance() functions before returning success.
116115
*
117116
* We must decompress the key in the IndexTuple before passing it to the
118117
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -133,8 +132,7 @@ gistindex_keytest(IndexScanDesc scan,
133132
GISTSTATE *giststate = so->giststate;
134133
ScanKey key = scan->keyData;
135134
int keySize = scan->numberOfKeys;
136-
double *distance_value_p;
137-
bool *distance_null_p;
135+
IndexOrderByDistance *distance_p;
138136
Relation r = scan->indexRelation;
139137

140138
*recheck_p = false;
@@ -153,8 +151,8 @@ gistindex_keytest(IndexScanDesc scan,
153151
elog(ERROR, "invalid GiST tuple found on leaf page");
154152
for (i = 0; i < scan->numberOfOrderBys; i++)
155153
{
156-
so->distanceValues[i] = -get_float8_infinity();
157-
so->distanceNulls[i] = false;
154+
so->distances[i].value = -get_float8_infinity();
155+
so->distances[i].isnull = false;
158156
}
159157
return true;
160158
}
@@ -238,8 +236,7 @@ gistindex_keytest(IndexScanDesc scan,
238236

239237
/* OK, it passes --- now let's compute the distances */
240238
key = scan->orderByData;
241-
distance_value_p = so->distanceValues;
242-
distance_null_p = so->distanceNulls;
239+
distance_p = so->distances;
243240
keySize = scan->numberOfOrderBys;
244241
while (keySize > 0)
245242
{
@@ -254,8 +251,8 @@ gistindex_keytest(IndexScanDesc scan,
254251
if ((key->sk_flags & SK_ISNULL) || isNull)
255252
{
256253
/* Assume distance computes as null */
257-
*distance_value_p = 0.0;
258-
*distance_null_p = true;
254+
distance_p->value = 0.0;
255+
distance_p->isnull = true;
259256
}
260257
else
261258
{
@@ -292,13 +289,12 @@ gistindex_keytest(IndexScanDesc scan,
292289
ObjectIdGetDatum(key->sk_subtype),
293290
PointerGetDatum(&recheck));
294291
*recheck_distances_p |= recheck;
295-
*distance_value_p = DatumGetFloat8(dist);
296-
*distance_null_p = false;
292+
distance_p->value = DatumGetFloat8(dist);
293+
distance_p->isnull = false;
297294
}
298295

299296
key++;
300-
distance_value_p++;
301-
distance_null_p++;
297+
distance_p++;
302298
keySize--;
303299
}
304300

@@ -311,8 +307,7 @@ gistindex_keytest(IndexScanDesc scan,
311307
*
312308
* scan: index scan we are executing
313309
* pageItem: search queue item identifying an index page to scan
314-
* myDistanceValues: distances array associated with pageItem, or NULL at the root
315-
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
310+
* myDistances: distances array associated with pageItem, or NULL at the root
316311
* tbm: if not NULL, gistgetbitmap's output bitmap
317312
* ntids: if not NULL, gistgetbitmap's output tuple counter
318313
*
@@ -330,8 +325,7 @@ gistindex_keytest(IndexScanDesc scan,
330325
*/
331326
static void
332327
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
333-
double *myDistanceValues, bool *myDistanceNulls,
334-
TIDBitmap *tbm, int64 *ntids)
328+
IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
335329
{
336330
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
337331
GISTSTATE *giststate = so->giststate;
@@ -367,7 +361,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
367361
GISTSearchItem *item;
368362

369363
/* This can't happen when starting at the root */
370-
Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
364+
Assert(myDistances != NULL);
371365

372366
oldcxt = MemoryContextSwitchTo(so->queueCxt);
373367

@@ -377,10 +371,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
377371
item->data.parentlsn = pageItem->data.parentlsn;
378372

379373
/* Insert it into the queue using same distances as for this page */
380-
memcpy(GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
381-
myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
382-
memcpy(GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
383-
myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
374+
memcpy(item->distances, myDistances,
375+
sizeof(item->distances[0]) * scan->numberOfOrderBys);
384376

385377
pairingheap_add(so->queue, &item->phNode);
386378

@@ -510,10 +502,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
510502
}
511503

512504
/* Insert it into the queue using new distance data */
513-
memcpy(GISTSearchItemDistanceValues(item, nOrderBys),
514-
so->distanceValues, sizeof(double) * nOrderBys);
515-
memcpy(GISTSearchItemDistanceNulls(item, nOrderBys),
516-
so->distanceNulls, sizeof(bool) * nOrderBys);
505+
memcpy(item->distances, so->distances,
506+
sizeof(item->distances[0]) * nOrderBys);
517507

518508
pairingheap_add(so->queue, &item->phNode);
519509

@@ -568,8 +558,6 @@ getNextNearest(IndexScanDesc scan)
568558
do
569559
{
570560
GISTSearchItem *item = getNextGISTSearchItem(so);
571-
float8 *distanceValues = GISTSearchItemDistanceValues(item, scan->numberOfOrderBys);
572-
bool *distanceNulls = GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys);
573561

574562
if (!item)
575563
break;
@@ -589,8 +577,8 @@ getNextNearest(IndexScanDesc scan)
589577
if (!scan->xs_orderbynulls[i])
590578
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
591579
#endif
592-
scan->xs_orderbyvals[i] = Float8GetDatum(distanceValues[i]);
593-
scan->xs_orderbynulls[i] = distanceNulls[i];
580+
scan->xs_orderbyvals[i] = item->distances[i].value;
581+
scan->xs_orderbynulls[i] = item->distances[i].isnull;
594582
}
595583
else if (so->orderByTypes[i] == FLOAT4OID)
596584
{
@@ -600,8 +588,8 @@ getNextNearest(IndexScanDesc scan)
600588
if (!scan->xs_orderbynulls[i])
601589
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
602590
#endif
603-
scan->xs_orderbyvals[i] = Float4GetDatum(distanceValues[i]);
604-
scan->xs_orderbynulls[i] = distanceNulls[i];
591+
scan->xs_orderbyvals[i] = Float4GetDatum(item->distances[i].value);
592+
scan->xs_orderbynulls[i] = item->distances[i].isnull;
605593
}
606594
else
607595
{
@@ -629,10 +617,7 @@ getNextNearest(IndexScanDesc scan)
629617
/* visit an index page, extract its items into queue */
630618
CHECK_FOR_INTERRUPTS();
631619

632-
gistScanPage(scan, item,
633-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
634-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
635-
NULL, NULL);
620+
gistScanPage(scan, item, item->distances, NULL, NULL);
636621
}
637622

638623
pfree(item);
@@ -670,7 +655,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
670655

671656
fakeItem.blkno = GIST_ROOT_BLKNO;
672657
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
673-
gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
658+
gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
674659
}
675660

676661
if (scan->numberOfOrderBys > 0)
@@ -764,10 +749,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
764749
* this page, we fall out of the inner "do" and loop around to
765750
* return them.
766751
*/
767-
gistScanPage(scan, item,
768-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
769-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
770-
NULL, NULL);
752+
gistScanPage(scan, item, item->distances, NULL, NULL);
771753

772754
pfree(item);
773755
} while (so->nPageData == 0);
@@ -798,7 +780,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
798780

799781
fakeItem.blkno = GIST_ROOT_BLKNO;
800782
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
801-
gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
783+
gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
802784

803785
/*
804786
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -813,10 +795,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
813795

814796
CHECK_FOR_INTERRUPTS();
815797

816-
gistScanPage(scan, item,
817-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
818-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
819-
tbm, &ntids);
798+
gistScanPage(scan, item, item->distances, tbm, &ntids);
820799

821800
pfree(item);
822801
}

src/backend/access/gist/gistscan.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,26 +33,23 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
3333
const GISTSearchItem *sb = (const GISTSearchItem *) b;
3434
IndexScanDesc scan = (IndexScanDesc) arg;
3535
int i;
36-
double *da = GISTSearchItemDistanceValues(sa, scan->numberOfOrderBys),
37-
*db = GISTSearchItemDistanceValues(sb, scan->numberOfOrderBys);
38-
bool *na = GISTSearchItemDistanceNulls(sa, scan->numberOfOrderBys),
39-
*nb = GISTSearchItemDistanceNulls(sb, scan->numberOfOrderBys);
4036

4137
/* Order according to distance comparison */
4238
for (i = 0; i < scan->numberOfOrderBys; i++)
4339
{
44-
if (na[i])
40+
if (sa->distances[i].isnull)
4541
{
46-
if (!nb[i])
42+
if (!sb->distances[i].isnull)
4743
return -1;
4844
}
49-
else if (nb[i])
45+
else if (sb->distances[i].isnull)
5046
{
5147
return 1;
5248
}
5349
else
5450
{
55-
int cmp = -float8_cmp_internal(da[i], db[i]);
51+
int cmp = -float8_cmp_internal(sa->distances[i].value,
52+
sb->distances[i].value);
5653

5754
if (cmp != 0)
5855
return cmp;
@@ -100,8 +97,7 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
10097
so->queueCxt = giststate->scanCxt; /* see gistrescan */
10198

10299
/* workspaces with size dependent on numberOfOrderBys: */
103-
so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
104-
so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
100+
so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
105101
so->qual_ok = true; /* in case there are zero keys */
106102
if (scan->numberOfOrderBys > 0)
107103
{

src/include/access/genam.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,13 @@ typedef enum IndexUniqueCheck
117117
} IndexUniqueCheck;
118118

119119

120+
/* Nullable "ORDER BY col op const" distance */
121+
typedef struct IndexOrderByDistance
122+
{
123+
double value;
124+
bool isnull;
125+
} IndexOrderByDistance;
126+
120127
/*
121128
* generalized index_ interface routines (in indexam.c)
122129
*/

src/include/access/gist_private.h

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -137,29 +137,15 @@ typedef struct GISTSearchItem
137137
GISTSearchHeapItem heap; /* heap info, if heap tuple */
138138
} data;
139139

140-
/*
141-
* This data structure is followed by arrays of distance values and
142-
* distance null flags. Size of both arrays is
143-
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
144-
* arrays.
145-
*/
140+
/* numberOfOrderBys entries */
141+
IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
146142
} GISTSearchItem;
147143

148144
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
149145

150-
#define SizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
151-
(sizeof(double) + sizeof(bool)) * (n_distances))
152-
153-
/*
154-
* We actually don't need n_distances compute pointer to distance values.
155-
* Nevertheless take n_distances as argument to have same arguments list for
156-
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
157-
*/
158-
#define GISTSearchItemDistanceValues(item, n_distances) \
159-
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
160-
161-
#define GISTSearchItemDistanceNulls(item, n_distances) \
162-
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
146+
#define SizeOfGISTSearchItem(n_distances) \
147+
(offsetof(GISTSearchItem, distances) + \
148+
sizeof(IndexOrderByDistance) * (n_distances))
163149

164150
/*
165151
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -175,8 +161,7 @@ typedef struct GISTScanOpaqueData
175161
bool firstCall; /* true until first gistgettuple call */
176162

177163
/* pre-allocated workspace arrays */
178-
double *distanceValues; /* output area for gistindex_keytest */
179-
bool *distanceNulls;
164+
IndexOrderByDistance *distances; /* output area for gistindex_keytest */
180165

181166
/* info about killed items if any (killedItems is NULL if never used) */
182167
OffsetNumber *killedItems; /* offset numbers of killed items */

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,7 @@ IndexList
991991
IndexOnlyScan
992992
IndexOnlyScanState
993993
IndexOptInfo
994+
IndexOrderByDistance
994995
IndexPath
995996
IndexQualInfo
996997
IndexRuntimeKeyInfo

0 commit comments

Comments
 (0)