Skip to content

Commit 57dba87

Browse files
committed
Fix GiST index build for NaN values in geometric types.
GiST index build could go into an infinite loop when presented with boxes (or points, circles or polygons) containing NaN component values. This happened essentially because the code assumed that x == x is true for any "double" value x; but it's not true for NaNs. The looping behavior was not the only problem though: we also attempted to sort the items using simple double comparisons. Since NaNs violate the trichotomy law, qsort could (in principle at least) get arbitrarily confused and mess up the sorting of ordinary values as well as NaNs. And we based splitting choices on box size calculations that could produce NaNs, again resulting in undesirable behavior. To fix, replace all comparisons of doubles in this logic with float8_cmp_internal, which is NaN-aware and is careful to sort NaNs consistently, higher than any non-NaN. Also rearrange the box size calculation to not produce NaNs; instead it should produce an infinity for a box with NaN on one side and not-NaN on the other. I don't by any means claim that this solves all problems with NaNs in geometric values, but it should at least make GiST index insertion work reliably with such data. It's likely that the index search side of things still needs some work, and probably regular geometric operations too. But with this patch we're laying down a convention for how such cases ought to behave. Per bug #14238 from Guang-Dih Lei. Back-patch to 9.2; the code used before commit 7f3bd86 is quite different and doesn't lock up on my simple test case, nor on the submitter's dataset. Report: <20160708151747.1426.60150@wrigleys.postgresql.org> Discussion: <28685.1468246504@sss.pgh.pa.us>
1 parent 7d70bf9 commit 57dba87

File tree

3 files changed

+92
-68
lines changed

3 files changed

+92
-68
lines changed

src/backend/access/gist/gistproc.c

Lines changed: 88 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,31 @@
1717
*/
1818
#include "postgres.h"
1919

20+
#include <math.h>
21+
2022
#include "access/gist.h"
2123
#include "access/skey.h"
24+
#include "utils/builtins.h"
2225
#include "utils/geo_decls.h"
2326

2427

2528
static bool gist_box_leaf_consistent(BOX *key, BOX *query,
2629
StrategyNumber strategy);
27-
static double size_box(BOX *box);
2830
static bool rtree_internal_consistent(BOX *key, BOX *query,
2931
StrategyNumber strategy);
3032

3133
/* Minimum accepted ratio of split */
3234
#define LIMIT_RATIO 0.3
3335

36+
/* Convenience macros for NaN-aware comparisons */
37+
#define FLOAT8_EQ(a,b) (float8_cmp_internal(a, b) == 0)
38+
#define FLOAT8_LT(a,b) (float8_cmp_internal(a, b) < 0)
39+
#define FLOAT8_LE(a,b) (float8_cmp_internal(a, b) <= 0)
40+
#define FLOAT8_GT(a,b) (float8_cmp_internal(a, b) > 0)
41+
#define FLOAT8_GE(a,b) (float8_cmp_internal(a, b) >= 0)
42+
#define FLOAT8_MAX(a,b) (FLOAT8_GT(a, b) ? (a) : (b))
43+
#define FLOAT8_MIN(a,b) (FLOAT8_LT(a, b) ? (a) : (b))
44+
3445

3546
/**************************************************
3647
* Box ops
@@ -40,12 +51,53 @@ static bool rtree_internal_consistent(BOX *key, BOX *query,
4051
* Calculates union of two boxes, a and b. The result is stored in *n.
4152
*/
4253
static void
43-
rt_box_union(BOX *n, BOX *a, BOX *b)
54+
rt_box_union(BOX *n, const BOX *a, const BOX *b)
55+
{
56+
n->high.x = FLOAT8_MAX(a->high.x, b->high.x);
57+
n->high.y = FLOAT8_MAX(a->high.y, b->high.y);
58+
n->low.x = FLOAT8_MIN(a->low.x, b->low.x);
59+
n->low.y = FLOAT8_MIN(a->low.y, b->low.y);
60+
}
61+
62+
/*
63+
* Size of a BOX for penalty-calculation purposes.
64+
* The result can be +Infinity, but not NaN.
65+
*/
66+
static double
67+
size_box(const BOX *box)
68+
{
69+
/*
70+
* Check for zero-width cases. Note that we define the size of a zero-
71+
* by-infinity box as zero. It's important to special-case this somehow,
72+
* as naively multiplying infinity by zero will produce NaN.
73+
*
74+
* The less-than cases should not happen, but if they do, say "zero".
75+
*/
76+
if (FLOAT8_LE(box->high.x, box->low.x) ||
77+
FLOAT8_LE(box->high.y, box->low.y))
78+
return 0.0;
79+
80+
/*
81+
* We treat NaN as larger than +Infinity, so any distance involving a NaN
82+
* and a non-NaN is infinite. Note the previous check eliminated the
83+
* possibility that the low fields are NaNs.
84+
*/
85+
if (isnan(box->high.x) || isnan(box->high.y))
86+
return get_float8_infinity();
87+
return (box->high.x - box->low.x) * (box->high.y - box->low.y);
88+
}
89+
90+
/*
91+
* Return amount by which the union of the two boxes is larger than
92+
* the original BOX's area. The result can be +Infinity, but not NaN.
93+
*/
94+
static double
95+
box_penalty(const BOX *original, const BOX *new)
4496
{
45-
n->high.x = Max(a->high.x, b->high.x);
46-
n->high.y = Max(a->high.y, b->high.y);
47-
n->low.x = Min(a->low.x, b->low.x);
48-
n->low.y = Min(a->low.y, b->low.y);
97+
BOX unionbox;
98+
99+
rt_box_union(&unionbox, original, new);
100+
return size_box(&unionbox) - size_box(original);
49101
}
50102

51103
/*
@@ -85,16 +137,19 @@ gist_box_consistent(PG_FUNCTION_ARGS)
85137
strategy));
86138
}
87139

140+
/*
141+
* Increase BOX b to include addon.
142+
*/
88143
static void
89-
adjustBox(BOX *b, BOX *addon)
144+
adjustBox(BOX *b, const BOX *addon)
90145
{
91-
if (b->high.x < addon->high.x)
146+
if (FLOAT8_LT(b->high.x, addon->high.x))
92147
b->high.x = addon->high.x;
93-
if (b->low.x > addon->low.x)
148+
if (FLOAT8_GT(b->low.x, addon->low.x))
94149
b->low.x = addon->low.x;
95-
if (b->high.y < addon->high.y)
150+
if (FLOAT8_LT(b->high.y, addon->high.y))
96151
b->high.y = addon->high.y;
97-
if (b->low.y > addon->low.y)
152+
if (FLOAT8_GT(b->low.y, addon->low.y))
98153
b->low.y = addon->low.y;
99154
}
100155

@@ -164,10 +219,8 @@ gist_box_penalty(PG_FUNCTION_ARGS)
164219
float *result = (float *) PG_GETARG_POINTER(2);
165220
BOX *origbox = DatumGetBoxP(origentry->key);
166221
BOX *newbox = DatumGetBoxP(newentry->key);
167-
BOX unionbox;
168222

169-
rt_box_union(&unionbox, origbox, newbox);
170-
*result = (float) (size_box(&unionbox) - size_box(origbox));
223+
*result = (float) box_penalty(origbox, newbox);
171224
PG_RETURN_POINTER(result);
172225
}
173226

@@ -280,12 +333,7 @@ interval_cmp_lower(const void *i1, const void *i2)
280333
double lower1 = ((const SplitInterval *) i1)->lower,
281334
lower2 = ((const SplitInterval *) i2)->lower;
282335

283-
if (lower1 < lower2)
284-
return -1;
285-
else if (lower1 > lower2)
286-
return 1;
287-
else
288-
return 0;
336+
return float8_cmp_internal(lower1, lower2);
289337
}
290338

291339
/*
@@ -297,16 +345,11 @@ interval_cmp_upper(const void *i1, const void *i2)
297345
double upper1 = ((const SplitInterval *) i1)->upper,
298346
upper2 = ((const SplitInterval *) i2)->upper;
299347

300-
if (upper1 < upper2)
301-
return -1;
302-
else if (upper1 > upper2)
303-
return 1;
304-
else
305-
return 0;
348+
return float8_cmp_internal(upper1, upper2);
306349
}
307350

308351
/*
309-
* Replace negative value with zero.
352+
* Replace negative (or NaN) value with zero.
310353
*/
311354
static inline float
312355
non_negative(float val)
@@ -425,25 +468,9 @@ g_box_consider_split(ConsiderSplitContext *context, int dimNum,
425468
}
426469
}
427470

428-
/*
429-
* Return increase of original BOX area by new BOX area insertion.
430-
*/
431-
static double
432-
box_penalty(BOX *original, BOX *new)
433-
{
434-
double union_width,
435-
union_height;
436-
437-
union_width = Max(original->high.x, new->high.x) -
438-
Min(original->low.x, new->low.x);
439-
union_height = Max(original->high.y, new->high.y) -
440-
Min(original->low.y, new->low.y);
441-
return union_width * union_height - (original->high.x - original->low.x) *
442-
(original->high.y - original->low.y);
443-
}
444-
445471
/*
446472
* Compare common entries by their deltas.
473+
* (We assume the deltas can't be NaN.)
447474
*/
448475
static int
449476
common_entry_cmp(const void *i1, const void *i2)
@@ -605,9 +632,11 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
605632
/*
606633
* Find next lower bound of right group.
607634
*/
608-
while (i1 < nentries && rightLower == intervalsLower[i1].lower)
635+
while (i1 < nentries &&
636+
FLOAT8_EQ(rightLower, intervalsLower[i1].lower))
609637
{
610-
leftUpper = Max(leftUpper, intervalsLower[i1].upper);
638+
if (FLOAT8_LT(leftUpper, intervalsLower[i1].upper))
639+
leftUpper = intervalsLower[i1].upper;
611640
i1++;
612641
}
613642
if (i1 >= nentries)
@@ -618,7 +647,8 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
618647
* Find count of intervals which anyway should be placed to the
619648
* left group.
620649
*/
621-
while (i2 < nentries && intervalsUpper[i2].upper <= leftUpper)
650+
while (i2 < nentries &&
651+
FLOAT8_LE(intervalsUpper[i2].upper, leftUpper))
622652
i2++;
623653

624654
/*
@@ -640,9 +670,10 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
640670
/*
641671
* Find next upper bound of left group.
642672
*/
643-
while (i2 >= 0 && leftUpper == intervalsUpper[i2].upper)
673+
while (i2 >= 0 && FLOAT8_EQ(leftUpper, intervalsUpper[i2].upper))
644674
{
645-
rightLower = Min(rightLower, intervalsUpper[i2].lower);
675+
if (FLOAT8_GT(rightLower, intervalsUpper[i2].lower))
676+
rightLower = intervalsUpper[i2].lower;
646677
i2--;
647678
}
648679
if (i2 < 0)
@@ -653,7 +684,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
653684
* Find count of intervals which anyway should be placed to the
654685
* right group.
655686
*/
656-
while (i1 >= 0 && intervalsLower[i1].lower >= rightLower)
687+
while (i1 >= 0 && FLOAT8_GE(intervalsLower[i1].lower, rightLower))
657688
i1--;
658689

659690
/*
@@ -741,10 +772,10 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
741772
upper = box->high.y;
742773
}
743774

744-
if (upper <= context.leftUpper)
775+
if (FLOAT8_LE(upper, context.leftUpper))
745776
{
746777
/* Fits to the left group */
747-
if (lower >= context.rightLower)
778+
if (FLOAT8_GE(lower, context.rightLower))
748779
{
749780
/* Fits also to the right group, so "common entry" */
750781
commonEntries[commonEntriesCount++].index = i;
@@ -762,7 +793,7 @@ gist_box_picksplit(PG_FUNCTION_ARGS)
762793
* entry didn't fit on the left group, it better fit in the right
763794
* group.
764795
*/
765-
Assert(lower >= context.rightLower);
796+
Assert(FLOAT8_GE(lower, context.rightLower));
766797

767798
/* Doesn't fit to the left group, so join to the right group */
768799
PLACE_RIGHT(box, i);
@@ -846,8 +877,10 @@ gist_box_same(PG_FUNCTION_ARGS)
846877
bool *result = (bool *) PG_GETARG_POINTER(2);
847878

848879
if (b1 && b2)
849-
*result = (b1->low.x == b2->low.x && b1->low.y == b2->low.y &&
850-
b1->high.x == b2->high.x && b1->high.y == b2->high.y);
880+
*result = (FLOAT8_EQ(b1->low.x, b2->low.x) &&
881+
FLOAT8_EQ(b1->low.y, b2->low.y) &&
882+
FLOAT8_EQ(b1->high.x, b2->high.x) &&
883+
FLOAT8_EQ(b1->high.y, b2->high.y));
851884
else
852885
*result = (b1 == NULL && b2 == NULL);
853886
PG_RETURN_POINTER(result);
@@ -931,14 +964,6 @@ gist_box_leaf_consistent(BOX *key, BOX *query, StrategyNumber strategy)
931964
return retval;
932965
}
933966

934-
static double
935-
size_box(BOX *box)
936-
{
937-
if (box->high.x <= box->low.x || box->high.y <= box->low.y)
938-
return 0.0;
939-
return (box->high.x - box->low.x) * (box->high.y - box->low.y);
940-
}
941-
942967
/*****************************************
943968
* Common rtree functions (for boxes, polygons, and circles)
944969
*****************************************/

src/backend/utils/adt/float.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,6 @@ do { \
6868
int extra_float_digits = 0; /* Added to DBL_DIG or FLT_DIG */
6969

7070

71-
static int float4_cmp_internal(float4 a, float4 b);
72-
static int float8_cmp_internal(float8 a, float8 b);
73-
7471
#ifndef HAVE_CBRT
7572
/*
7673
* Some machines (in particular, some versions of AIX) have an extern
@@ -931,7 +928,7 @@ float8div(PG_FUNCTION_ARGS)
931928
/*
932929
* float4{eq,ne,lt,le,gt,ge} - float4/float4 comparison operations
933930
*/
934-
static int
931+
int
935932
float4_cmp_internal(float4 a, float4 b)
936933
{
937934
/*
@@ -1045,7 +1042,7 @@ btfloat4sortsupport(PG_FUNCTION_ARGS)
10451042
/*
10461043
* float8{eq,ne,lt,le,gt,ge} - float8/float8 comparison operations
10471044
*/
1048-
static int
1045+
int
10491046
float8_cmp_internal(float8 a, float8 b)
10501047
{
10511048
/*

src/include/utils/builtins.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,8 @@ extern float get_float4_infinity(void);
334334
extern double get_float8_nan(void);
335335
extern float get_float4_nan(void);
336336
extern int is_infinite(double val);
337+
extern int float4_cmp_internal(float4 a, float4 b);
338+
extern int float8_cmp_internal(float8 a, float8 b);
337339

338340
extern Datum float4in(PG_FUNCTION_ARGS);
339341
extern Datum float4out(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)