Skip to content

Commit 6cae9d2

Browse files
committed
Improve handling of NULLs in KNN-GiST and KNN-SP-GiST
This commit improves subject in two ways: * It removes ugliness of 02f9087, which stores distance values and null flags in two separate arrays after GISTSearchItem struct. Instead we pack both distance value and null flag in IndexOrderByDistance struct. Alignment overhead should be negligible, because we typically deal with at most few "col op const" expressions in ORDER BY clause. * It fixes handling of "col op NULL" expression in KNN-SP-GiST. Now, these expression are not passed to support functions, which can't deal with them. Instead, NULL result is implicitly assumed. It future we may decide to teach support functions to deal with NULL arguments, but current solution is bugfix suitable for backpatch. Reported-by: Nikita Glukhov Discussion: https://postgr.es/m/826f57ee-afc7-8977-c44c-6111d18b02ec%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4
1 parent 0a97edb commit 6cae9d2

File tree

10 files changed

+140
-101
lines changed

10 files changed

+140
-101
lines changed

src/backend/access/gist/gistget.c

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,8 @@ gistkillitems(IndexScanDesc scan)
112112
* Similarly, *recheck_distances_p is set to indicate whether the distances
113113
* need to be rechecked, and it is also ignored for non-leaf entries.
114114
*
115-
* If we are doing an ordered scan, so->distancesValues[] and
116-
* so->distancesNulls[] is filled with distance data from the distance()
117-
* functions before returning success.
115+
* If we are doing an ordered scan, so->distances[] is filled with distance
116+
* data from the distance() functions before returning success.
118117
*
119118
* We must decompress the key in the IndexTuple before passing it to the
120119
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -135,8 +134,7 @@ gistindex_keytest(IndexScanDesc scan,
135134
GISTSTATE *giststate = so->giststate;
136135
ScanKey key = scan->keyData;
137136
int keySize = scan->numberOfKeys;
138-
double *distance_value_p;
139-
bool *distance_null_p;
137+
IndexOrderByDistance *distance_p;
140138
Relation r = scan->indexRelation;
141139

142140
*recheck_p = false;
@@ -155,8 +153,8 @@ gistindex_keytest(IndexScanDesc scan,
155153
elog(ERROR, "invalid GiST tuple found on leaf page");
156154
for (i = 0; i < scan->numberOfOrderBys; i++)
157155
{
158-
so->distanceValues[i] = -get_float8_infinity();
159-
so->distanceNulls[i] = false;
156+
so->distances[i].value = -get_float8_infinity();
157+
so->distances[i].isnull = false;
160158
}
161159
return true;
162160
}
@@ -240,8 +238,7 @@ gistindex_keytest(IndexScanDesc scan,
240238

241239
/* OK, it passes --- now let's compute the distances */
242240
key = scan->orderByData;
243-
distance_value_p = so->distanceValues;
244-
distance_null_p = so->distanceNulls;
241+
distance_p = so->distances;
245242
keySize = scan->numberOfOrderBys;
246243
while (keySize > 0)
247244
{
@@ -256,8 +253,8 @@ gistindex_keytest(IndexScanDesc scan,
256253
if ((key->sk_flags & SK_ISNULL) || isNull)
257254
{
258255
/* Assume distance computes as null */
259-
*distance_value_p = 0.0;
260-
*distance_null_p = true;
256+
distance_p->value = 0.0;
257+
distance_p->isnull = true;
261258
}
262259
else
263260
{
@@ -294,13 +291,12 @@ gistindex_keytest(IndexScanDesc scan,
294291
ObjectIdGetDatum(key->sk_subtype),
295292
PointerGetDatum(&recheck));
296293
*recheck_distances_p |= recheck;
297-
*distance_value_p = DatumGetFloat8(dist);
298-
*distance_null_p = false;
294+
distance_p->value = DatumGetFloat8(dist);
295+
distance_p->isnull = false;
299296
}
300297

301298
key++;
302-
distance_value_p++;
303-
distance_null_p++;
299+
distance_p++;
304300
keySize--;
305301
}
306302

@@ -313,8 +309,7 @@ gistindex_keytest(IndexScanDesc scan,
313309
*
314310
* scan: index scan we are executing
315311
* pageItem: search queue item identifying an index page to scan
316-
* myDistanceValues: distances array associated with pageItem, or NULL at the root
317-
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
312+
* myDistances: distances array associated with pageItem, or NULL at the root
318313
* tbm: if not NULL, gistgetbitmap's output bitmap
319314
* ntids: if not NULL, gistgetbitmap's output tuple counter
320315
*
@@ -332,8 +327,7 @@ gistindex_keytest(IndexScanDesc scan,
332327
*/
333328
static void
334329
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
335-
double *myDistanceValues, bool *myDistanceNulls,
336-
TIDBitmap *tbm, int64 *ntids)
330+
IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
337331
{
338332
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
339333
GISTSTATE *giststate = so->giststate;
@@ -370,7 +364,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
370364
GISTSearchItem *item;
371365

372366
/* This can't happen when starting at the root */
373-
Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
367+
Assert(myDistances != NULL);
374368

375369
oldcxt = MemoryContextSwitchTo(so->queueCxt);
376370

@@ -380,10 +374,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
380374
item->data.parentlsn = pageItem->data.parentlsn;
381375

382376
/* Insert it into the queue using same distances as for this page */
383-
memcpy(GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
384-
myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
385-
memcpy(GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
386-
myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
377+
memcpy(item->distances, myDistances,
378+
sizeof(item->distances[0]) * scan->numberOfOrderBys);
387379

388380
pairingheap_add(so->queue, &item->phNode);
389381

@@ -527,10 +519,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
527519
}
528520

529521
/* Insert it into the queue using new distance data */
530-
memcpy(GISTSearchItemDistanceValues(item, nOrderBys),
531-
so->distanceValues, sizeof(double) * nOrderBys);
532-
memcpy(GISTSearchItemDistanceNulls(item, nOrderBys),
533-
so->distanceNulls, sizeof(bool) * nOrderBys);
522+
memcpy(item->distances, so->distances,
523+
sizeof(item->distances[0]) * nOrderBys);
534524

535525
pairingheap_add(so->queue, &item->phNode);
536526

@@ -595,8 +585,7 @@ getNextNearest(IndexScanDesc scan)
595585
scan->xs_recheck = item->data.heap.recheck;
596586

597587
index_store_float8_orderby_distances(scan, so->orderByTypes,
598-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
599-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
588+
item->distances,
600589
item->data.heap.recheckDistances);
601590

602591
/* in an index-only scan, also return the reconstructed tuple. */
@@ -609,10 +598,7 @@ getNextNearest(IndexScanDesc scan)
609598
/* visit an index page, extract its items into queue */
610599
CHECK_FOR_INTERRUPTS();
611600

612-
gistScanPage(scan, item,
613-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
614-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
615-
NULL, NULL);
601+
gistScanPage(scan, item, item->distances, NULL, NULL);
616602
}
617603

618604
pfree(item);
@@ -650,7 +636,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
650636

651637
fakeItem.blkno = GIST_ROOT_BLKNO;
652638
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
653-
gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
639+
gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
654640
}
655641

656642
if (scan->numberOfOrderBys > 0)
@@ -744,10 +730,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
744730
* this page, we fall out of the inner "do" and loop around to
745731
* return them.
746732
*/
747-
gistScanPage(scan, item,
748-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
749-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
750-
NULL, NULL);
733+
gistScanPage(scan, item, item->distances, NULL, NULL);
751734

752735
pfree(item);
753736
} while (so->nPageData == 0);
@@ -778,7 +761,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
778761

779762
fakeItem.blkno = GIST_ROOT_BLKNO;
780763
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
781-
gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
764+
gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
782765

783766
/*
784767
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -793,10 +776,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
793776

794777
CHECK_FOR_INTERRUPTS();
795778

796-
gistScanPage(scan, item,
797-
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
798-
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
799-
tbm, &ntids);
779+
gistScanPage(scan, item, item->distances, tbm, &ntids);
800780

801781
pfree(item);
802782
}

src/backend/access/gist/gistscan.c

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,26 +33,23 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
3333
const GISTSearchItem *sb = (const GISTSearchItem *) b;
3434
IndexScanDesc scan = (IndexScanDesc) arg;
3535
int i;
36-
double *da = GISTSearchItemDistanceValues(sa, scan->numberOfOrderBys),
37-
*db = GISTSearchItemDistanceValues(sb, scan->numberOfOrderBys);
38-
bool *na = GISTSearchItemDistanceNulls(sa, scan->numberOfOrderBys),
39-
*nb = GISTSearchItemDistanceNulls(sb, scan->numberOfOrderBys);
4036

4137
/* Order according to distance comparison */
4238
for (i = 0; i < scan->numberOfOrderBys; i++)
4339
{
44-
if (na[i])
40+
if (sa->distances[i].isnull)
4541
{
46-
if (!nb[i])
42+
if (!sb->distances[i].isnull)
4743
return -1;
4844
}
49-
else if (nb[i])
45+
else if (sb->distances[i].isnull)
5046
{
5147
return 1;
5248
}
5349
else
5450
{
55-
int cmp = -float8_cmp_internal(da[i], db[i]);
51+
int cmp = -float8_cmp_internal(sa->distances[i].value,
52+
sb->distances[i].value);
5653

5754
if (cmp != 0)
5855
return cmp;
@@ -100,8 +97,7 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
10097
so->queueCxt = giststate->scanCxt; /* see gistrescan */
10198

10299
/* workspaces with size dependent on numberOfOrderBys: */
103-
so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
104-
so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
100+
so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
105101
so->qual_ok = true; /* in case there are zero keys */
106102
if (scan->numberOfOrderBys > 0)
107103
{

src/backend/access/index/indexam.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -847,14 +847,14 @@ index_getprocinfo(Relation irel,
847847
*/
848848
void
849849
index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
850-
double *distanceValues,
851-
bool *distanceNulls, bool recheckOrderBy)
850+
IndexOrderByDistance *distances,
851+
bool recheckOrderBy)
852852
{
853853
int i;
854854

855855
scan->xs_recheckorderby = recheckOrderBy;
856856

857-
if (!distanceValues)
857+
if (!distances)
858858
{
859859
Assert(!scan->xs_recheckorderby);
860860

@@ -869,20 +869,20 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
869869

870870
for (i = 0; i < scan->numberOfOrderBys; i++)
871871
{
872-
if (distanceNulls && distanceNulls[i])
873-
{
872+
scan->xs_orderbynulls[i] = distances[i].isnull;
873+
874+
if (scan->xs_orderbynulls[i])
874875
scan->xs_orderbyvals[i] = (Datum) 0;
875-
scan->xs_orderbynulls[i] = true;
876-
}
876+
877877
if (orderByTypes[i] == FLOAT8OID)
878878
{
879879
#ifndef USE_FLOAT8_BYVAL
880880
/* must free any old value to avoid memory leakage */
881881
if (!scan->xs_orderbynulls[i])
882882
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
883883
#endif
884-
scan->xs_orderbyvals[i] = Float8GetDatum(distanceValues[i]);
885-
scan->xs_orderbynulls[i] = false;
884+
if (!scan->xs_orderbynulls[i])
885+
scan->xs_orderbyvals[i] = Float8GetDatum(distances[i].value);
886886
}
887887
else if (orderByTypes[i] == FLOAT4OID)
888888
{
@@ -892,8 +892,8 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
892892
if (!scan->xs_orderbynulls[i])
893893
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
894894
#endif
895-
scan->xs_orderbyvals[i] = Float4GetDatum((float4) distanceValues[i]);
896-
scan->xs_orderbynulls[i] = false;
895+
if (!scan->xs_orderbynulls[i])
896+
scan->xs_orderbyvals[i] = Float4GetDatum((float4) distances[i].value);
897897
}
898898
else
899899
{

0 commit comments

Comments
 (0)