Skip to content

Commit 2a63683

Browse files
committed
Add support for nearest-neighbor (KNN) searches to SP-GiST
Currently, KNN searches were supported only by GiST. SP-GiST also capable to support them. This commit implements that support. SP-GiST scan stack is replaced with queue, which serves as stack if no ordering is specified. KNN support is provided for three SP-GIST opclasses: quad_point_ops, kd_point_ops and poly_ops (catversion is bumped). Some common parts between GiST and SP-GiST KNNs are extracted into separate functions. Discussion: https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ru Author: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad Sterzhanov Review: Andrey Borodin, Alexander Korotkov
1 parent d0cfc3d commit 2a63683

29 files changed

+1669
-416
lines changed

doc/src/sgml/indices.sgml

+7
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,13 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
281281
For more information see <xref linkend="spgist"/>.
282282
</para>
283283

284+
<para>
285+
Like GiST, SP-GiST supports <quote>nearest-neighbor</quote> searches.
286+
For SP-GiST operator classes that support distance ordering, the
287+
corresponding operator is specified in the <quote>Ordering Operators</quote>
288+
column in <xref linkend="spgist-builtin-opclasses-table"/>.
289+
</para>
290+
284291
<para>
285292
<indexterm>
286293
<primary>index</primary>

doc/src/sgml/spgist.sgml

+53-5
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,13 @@
6464

6565
<table id="spgist-builtin-opclasses-table">
6666
<title>Built-in <acronym>SP-GiST</acronym> Operator Classes</title>
67-
<tgroup cols="3">
67+
<tgroup cols="4">
6868
<thead>
6969
<row>
7070
<entry>Name</entry>
7171
<entry>Indexed Data Type</entry>
7272
<entry>Indexable Operators</entry>
73+
<entry>Ordering Operators</entry>
7374
</row>
7475
</thead>
7576
<tbody>
@@ -84,6 +85,9 @@
8485
<literal>&gt;^</literal>
8586
<literal>~=</literal>
8687
</entry>
88+
<entry>
89+
<literal>&lt;-&gt;</literal>
90+
</entry>
8791
</row>
8892
<row>
8993
<entry><literal>quad_point_ops</literal></entry>
@@ -96,6 +100,9 @@
96100
<literal>&gt;^</literal>
97101
<literal>~=</literal>
98102
</entry>
103+
<entry>
104+
<literal>&lt;-&gt;</literal>
105+
</entry>
99106
</row>
100107
<row>
101108
<entry><literal>range_ops</literal></entry>
@@ -111,6 +118,8 @@
111118
<literal>&gt;&gt;</literal>
112119
<literal>@&gt;</literal>
113120
</entry>
121+
<entry>
122+
</entry>
114123
</row>
115124
<row>
116125
<entry><literal>box_ops</literal></entry>
@@ -129,6 +138,8 @@
129138
<literal>|&gt;&gt;</literal>
130139
<literal>|&amp;&gt;</literal>
131140
</entry>
141+
<entry>
142+
</entry>
132143
</row>
133144
<row>
134145
<entry><literal>poly_ops</literal></entry>
@@ -147,6 +158,9 @@
147158
<literal>|&gt;&gt;</literal>
148159
<literal>|&amp;&gt;</literal>
149160
</entry>
161+
<entry>
162+
<literal>&lt;-&gt;</literal>
163+
</entry>
150164
</row>
151165
<row>
152166
<entry><literal>text_ops</literal></entry>
@@ -163,6 +177,8 @@
163177
<literal>~&gt;~</literal>
164178
<literal>^@</literal>
165179
</entry>
180+
<entry>
181+
</entry>
166182
</row>
167183
<row>
168184
<entry><literal>inet_ops</literal></entry>
@@ -180,6 +196,8 @@
180196
<literal>&lt;=</literal>
181197
<literal>=</literal>
182198
</entry>
199+
<entry>
200+
</entry>
183201
</row>
184202
</tbody>
185203
</tgroup>
@@ -191,6 +209,12 @@
191209
supports the same operators but uses a different index data structure which
192210
may offer better performance in some applications.
193211
</para>
212+
<para>
213+
The <literal>quad_point_ops</literal>, <literal>kd_point_ops</literal> and
214+
<literal>poly_ops</literal> operator classes support the <literal>&lt;-&gt;</literal>
215+
ordering operator, which enables the k-nearest neighbor (<literal>k-NN</literal>)
216+
search over indexed point or polygon datasets.
217+
</para>
194218

195219
</sect1>
196220

@@ -630,7 +654,10 @@ CREATE FUNCTION my_inner_consistent(internal, internal) RETURNS void ...
630654
typedef struct spgInnerConsistentIn
631655
{
632656
ScanKey scankeys; /* array of operators and comparison values */
633-
int nkeys; /* length of array */
657+
ScanKey orderbys; /* array of ordering operators and comparison
658+
* values */
659+
int nkeys; /* length of scankeys array */
660+
int norderbys; /* length of orderbys array */
634661

635662
Datum reconstructedValue; /* value reconstructed at parent */
636663
void *traversalValue; /* opclass-specific traverse value */
@@ -653,6 +680,7 @@ typedef struct spgInnerConsistentOut
653680
int *levelAdds; /* increment level by this much for each */
654681
Datum *reconstructedValues; /* associated reconstructed values */
655682
void **traversalValues; /* opclass-specific traverse values */
683+
double **distances; /* associated distances */
656684
} spgInnerConsistentOut;
657685
</programlisting>
658686

@@ -667,6 +695,8 @@ typedef struct spgInnerConsistentOut
667695
In particular it is not necessary to check <structfield>sk_flags</structfield> to
668696
see if the comparison value is NULL, because the SP-GiST core code
669697
will filter out such conditions.
698+
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
699+
describes ordering operators (if any) in the same manner.
670700
<structfield>reconstructedValue</structfield> is the value reconstructed for the
671701
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
672702
<function>inner_consistent</function> function did not provide a value at the
@@ -709,6 +739,10 @@ typedef struct spgInnerConsistentOut
709739
of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type
710740
reconstructed for each child node to be visited; otherwise, leave
711741
<structfield>reconstructedValues</structfield> as NULL.
742+
If ordered search is performed, set <structfield>distances</structfield>
743+
to an array of distance values according to <structfield>orderbys</structfield>
744+
array (nodes with lowest distances will be processed first). Leave it
745+
NULL otherwise.
712746
If it is desired to pass down additional out-of-band information
713747
(<quote>traverse values</quote>) to lower levels of the tree search,
714748
set <structfield>traversalValues</structfield> to an array of the appropriate
@@ -717,6 +751,7 @@ typedef struct spgInnerConsistentOut
717751
Note that the <function>inner_consistent</function> function is
718752
responsible for palloc'ing the
719753
<structfield>nodeNumbers</structfield>, <structfield>levelAdds</structfield>,
754+
<structfield>distances</structfield>,
720755
<structfield>reconstructedValues</structfield>, and
721756
<structfield>traversalValues</structfield> arrays in the current memory context.
722757
However, any output traverse values pointed to by
@@ -747,7 +782,10 @@ CREATE FUNCTION my_leaf_consistent(internal, internal) RETURNS bool ...
747782
typedef struct spgLeafConsistentIn
748783
{
749784
ScanKey scankeys; /* array of operators and comparison values */
750-
int nkeys; /* length of array */
785+
ScanKey orderbys; /* array of ordering operators and comparison
786+
* values */
787+
int nkeys; /* length of scankeys array */
788+
int norderbys; /* length of orderbys array */
751789

752790
Datum reconstructedValue; /* value reconstructed at parent */
753791
void *traversalValue; /* opclass-specific traverse value */
@@ -759,8 +797,10 @@ typedef struct spgLeafConsistentIn
759797

760798
typedef struct spgLeafConsistentOut
761799
{
762-
Datum leafValue; /* reconstructed original data, if any */
763-
bool recheck; /* set true if operator must be rechecked */
800+
Datum leafValue; /* reconstructed original data, if any */
801+
bool recheck; /* set true if operator must be rechecked */
802+
bool recheckDistances; /* set true if distances must be rechecked */
803+
double *distances; /* associated distances */
764804
} spgLeafConsistentOut;
765805
</programlisting>
766806

@@ -775,6 +815,8 @@ typedef struct spgLeafConsistentOut
775815
In particular it is not necessary to check <structfield>sk_flags</structfield> to
776816
see if the comparison value is NULL, because the SP-GiST core code
777817
will filter out such conditions.
818+
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
819+
describes the ordering operators in the same manner.
778820
<structfield>reconstructedValue</structfield> is the value reconstructed for the
779821
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
780822
<function>inner_consistent</function> function did not provide a value at the
@@ -803,6 +845,12 @@ typedef struct spgLeafConsistentOut
803845
<structfield>recheck</structfield> may be set to <literal>true</literal> if the match
804846
is uncertain and so the operator(s) must be re-applied to the actual
805847
heap tuple to verify the match.
848+
If ordered search is performed, set <structfield>distances</structfield>
849+
to an array of distance values according to <structfield>orderbys</structfield>
850+
array. Leave it NULL otherwise. If at least one of returned distances
851+
is not exact, set <structfield>recheckDistances</structfield> to true.
852+
In this case, the executor will calculate the exact distances after
853+
fetching the tuple from the heap, and will reorder the tuples if needed.
806854
</para>
807855
</listitem>
808856
</varlistentry>

doc/src/sgml/xindex.sgml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,7 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING)
12421242
<title>Ordering Operators</title>
12431243

12441244
<para>
1245-
Some index access methods (currently, only GiST) support the concept of
1245+
Some index access methods (currently, only GiST and SP-GiST) support the concept of
12461246
<firstterm>ordering operators</firstterm>. What we have been discussing so far
12471247
are <firstterm>search operators</firstterm>. A search operator is one for which
12481248
the index can be searched to find all rows satisfying

src/backend/access/gist/gistget.c

+5-41
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
*/
1515
#include "postgres.h"
1616

17+
#include "access/genam.h"
1718
#include "access/gist_private.h"
1819
#include "access/relscan.h"
19-
#include "catalog/pg_type.h"
2020
#include "miscadmin.h"
2121
#include "storage/lmgr.h"
2222
#include "storage/predicate.h"
@@ -543,7 +543,6 @@ getNextNearest(IndexScanDesc scan)
543543
{
544544
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
545545
bool res = false;
546-
int i;
547546

548547
if (scan->xs_hitup)
549548
{
@@ -564,45 +563,10 @@ getNextNearest(IndexScanDesc scan)
564563
/* found a heap item at currently minimal distance */
565564
scan->xs_ctup.t_self = item->data.heap.heapPtr;
566565
scan->xs_recheck = item->data.heap.recheck;
567-
scan->xs_recheckorderby = item->data.heap.recheckDistances;
568-
for (i = 0; i < scan->numberOfOrderBys; i++)
569-
{
570-
if (so->orderByTypes[i] == FLOAT8OID)
571-
{
572-
#ifndef USE_FLOAT8_BYVAL
573-
/* must free any old value to avoid memory leakage */
574-
if (!scan->xs_orderbynulls[i])
575-
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
576-
#endif
577-
scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
578-
scan->xs_orderbynulls[i] = false;
579-
}
580-
else if (so->orderByTypes[i] == FLOAT4OID)
581-
{
582-
/* convert distance function's result to ORDER BY type */
583-
#ifndef USE_FLOAT4_BYVAL
584-
/* must free any old value to avoid memory leakage */
585-
if (!scan->xs_orderbynulls[i])
586-
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
587-
#endif
588-
scan->xs_orderbyvals[i] = Float4GetDatum((float4) item->distances[i]);
589-
scan->xs_orderbynulls[i] = false;
590-
}
591-
else
592-
{
593-
/*
594-
* If the ordering operator's return value is anything
595-
* else, we don't know how to convert the float8 bound
596-
* calculated by the distance function to that. The
597-
* executor won't actually need the order by values we
598-
* return here, if there are no lossy results, so only
599-
* insist on converting if the *recheck flag is set.
600-
*/
601-
if (scan->xs_recheckorderby)
602-
elog(ERROR, "GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy");
603-
scan->xs_orderbynulls[i] = true;
604-
}
605-
}
566+
567+
index_store_float8_orderby_distances(scan, so->orderByTypes,
568+
item->distances,
569+
item->data.heap.recheckDistances);
606570

607571
/* in an index-only scan, also return the reconstructed tuple. */
608572
if (scan->xs_want_itup)

src/backend/access/gist/gistutil.c

+6-31
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "storage/lmgr.h"
2424
#include "utils/float.h"
2525
#include "utils/syscache.h"
26+
#include "utils/lsyscache.h"
2627

2728

2829
/*
@@ -871,12 +872,6 @@ gistproperty(Oid index_oid, int attno,
871872
IndexAMProperty prop, const char *propname,
872873
bool *res, bool *isnull)
873874
{
874-
HeapTuple tuple;
875-
Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY;
876-
Form_pg_opclass rd_opclass;
877-
Datum datum;
878-
bool disnull;
879-
oidvector *indclass;
880875
Oid opclass,
881876
opfamily,
882877
opcintype;
@@ -910,41 +905,19 @@ gistproperty(Oid index_oid, int attno,
910905
}
911906

912907
/* First we need to know the column's opclass. */
913-
914-
tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
915-
if (!HeapTupleIsValid(tuple))
908+
opclass = get_index_column_opclass(index_oid, attno);
909+
if (!OidIsValid(opclass))
916910
{
917911
*isnull = true;
918912
return true;
919913
}
920-
rd_index = (Form_pg_index) GETSTRUCT(tuple);
921-
922-
/* caller is supposed to guarantee this */
923-
Assert(attno > 0 && attno <= rd_index->indnatts);
924-
925-
datum = SysCacheGetAttr(INDEXRELID, tuple,
926-
Anum_pg_index_indclass, &disnull);
927-
Assert(!disnull);
928-
929-
indclass = ((oidvector *) DatumGetPointer(datum));
930-
opclass = indclass->values[attno - 1];
931-
932-
ReleaseSysCache(tuple);
933914

934915
/* Now look up the opclass family and input datatype. */
935-
936-
tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
937-
if (!HeapTupleIsValid(tuple))
916+
if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
938917
{
939918
*isnull = true;
940919
return true;
941920
}
942-
rd_opclass = (Form_pg_opclass) GETSTRUCT(tuple);
943-
944-
opfamily = rd_opclass->opcfamily;
945-
opcintype = rd_opclass->opcintype;
946-
947-
ReleaseSysCache(tuple);
948921

949922
/* And now we can check whether the function is provided. */
950923

@@ -967,6 +940,8 @@ gistproperty(Oid index_oid, int attno,
967940
Int16GetDatum(GIST_COMPRESS_PROC));
968941
}
969942

943+
*isnull = false;
944+
970945
return true;
971946
}
972947

0 commit comments

Comments
 (0)