Skip to content

Commit 6bd567b

Browse files
committed
Fix edge-case crashes and misestimation in range containment selectivity.
When estimating the selectivity of "range_var <@ range_constant" or "range_var @> range_constant", if the upper (or respectively lower) bound of the range_constant was above the last bin of the range_var's histogram, the code would access uninitialized memory and potentially crash (though it seems the probability of a crash is quite low). Handle the endpoint cases explicitly to fix that. While at it, be more paranoid about the possibility of getting NaN or other silly results from the range type's subdiff function. And improve some comments. Ordinarily we'd probably add a regression test case demonstrating the bug in unpatched code. But it's too hard to get it to crash reliably because of the uninitialized-memory dependence, so skip that. Per bug #16122 from Adam Scott. It's been broken from the beginning, apparently, so backpatch to all supported branches. Diagnosis by Michael Paquier, patch by Andrey Borodin and Tom Lane. Discussion: https://postgr.es/m/16122-eb35bc248c806c15@postgresql.org
1 parent 27676e2 commit 6bd567b

File tree

1 file changed

+72
-28
lines changed

1 file changed

+72
-28
lines changed

src/backend/utils/adt/rangetypes_selfuncs.c

+72-28
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
#include "postgres.h"
1919

20+
#include <math.h>
21+
2022
#include "access/htup_details.h"
2123
#include "catalog/pg_operator.h"
2224
#include "catalog/pg_statistic.h"
@@ -405,6 +407,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
405407
NULL, NULL)))
406408
return -1.0;
407409

410+
/* check that it's a histogram, not just a dummy entry */
411+
if (nhist < 2)
412+
{
413+
free_attstatsslot(vardata->atttype, hist_values, nhist, NULL, 0);
414+
return -1.0;
415+
}
416+
408417
/*
409418
* Convert histogram of ranges into histograms of its lower and upper
410419
* bounds.
@@ -693,7 +702,8 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
693702
/*
694703
* Both bounds are finite. Assuming the subtype's comparison function
695704
* works sanely, the value must be finite, too, because it lies
696-
* somewhere between the bounds. If it doesn't, just return something.
705+
* somewhere between the bounds. If it doesn't, arbitrarily return
706+
* 0.5.
697707
*/
698708
if (value->infinite)
699709
return 0.5;
@@ -703,21 +713,22 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
703713
return 0.5;
704714

705715
/* Calculate relative position using subdiff function. */
706-
bin_width = DatumGetFloat8(FunctionCall2Coll(
707-
&typcache->rng_subdiff_finfo,
716+
bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
708717
typcache->rng_collation,
709718
hist2->val,
710719
hist1->val));
711-
if (bin_width <= 0.0)
712-
return 0.5; /* zero width bin */
720+
if (isnan(bin_width) || bin_width <= 0.0)
721+
return 0.5; /* punt for NaN or zero-width bin */
713722

714-
position = DatumGetFloat8(FunctionCall2Coll(
715-
&typcache->rng_subdiff_finfo,
723+
position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
716724
typcache->rng_collation,
717725
value->val,
718726
hist1->val))
719727
/ bin_width;
720728

729+
if (isnan(position))
730+
return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
731+
721732
/* Relative position must be in [0,1] range */
722733
position = Max(position, 0.0);
723734
position = Min(position, 1.0);
@@ -809,15 +820,23 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
809820
if (!bound1->infinite && !bound2->infinite)
810821
{
811822
/*
812-
* No bounds are infinite, use subdiff function or return default
823+
* Neither bound is infinite, use subdiff function or return default
813824
* value of 1.0 if no subdiff is available.
814825
*/
815826
if (has_subdiff)
816-
return
817-
DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
818-
typcache->rng_collation,
819-
bound2->val,
820-
bound1->val));
827+
{
828+
float8 res;
829+
830+
res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
831+
typcache->rng_collation,
832+
bound2->val,
833+
bound1->val));
834+
/* Reject possible NaN result, also negative result */
835+
if (isnan(res) || res < 0.0)
836+
return 1.0;
837+
else
838+
return res;
839+
}
821840
else
822841
return 1.0;
823842
}
@@ -831,7 +850,7 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
831850
}
832851
else
833852
{
834-
/* One bound is infinite, another is not */
853+
/* One bound is infinite, the other is not */
835854
return get_float8_infinity();
836855
}
837856
}
@@ -1027,17 +1046,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
10271046
upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
10281047
false);
10291048

1049+
/*
1050+
* If the upper bound value is below the histogram's lower limit, there
1051+
* are no matches.
1052+
*/
1053+
if (upper_index < 0)
1054+
return 0.0;
1055+
1056+
/*
1057+
* If the upper bound value is at or beyond the histogram's upper limit,
1058+
* start our loop at the last actual bin, as though the upper bound were
1059+
* within that bin; get_position will clamp its result to 1.0 anyway.
1060+
* (This corresponds to assuming that the data population above the
1061+
* histogram's upper limit is empty, exactly like what we just assumed for
1062+
* the lower limit.)
1063+
*/
1064+
upper_index = Min(upper_index, hist_nvalues - 2);
1065+
10301066
/*
10311067
* Calculate upper_bin_width, ie. the fraction of the (upper_index,
10321068
* upper_index + 1) bin which is greater than upper bound of query range
10331069
* using linear interpolation of subdiff function.
10341070
*/
1035-
if (upper_index >= 0 && upper_index < hist_nvalues - 1)
1036-
upper_bin_width = get_position(typcache, upper,
1037-
&hist_lower[upper_index],
1038-
&hist_lower[upper_index + 1]);
1039-
else
1040-
upper_bin_width = 0.0;
1071+
upper_bin_width = get_position(typcache, upper,
1072+
&hist_lower[upper_index],
1073+
&hist_lower[upper_index + 1]);
10411074

10421075
/*
10431076
* In the loop, dist and prev_dist are the distance of the "current" bin's
@@ -1110,9 +1143,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
11101143
* of ranges that contain the constant lower and upper bounds. This uses
11111144
* the histograms of range lower bounds and range lengths, on the assumption
11121145
* that the range lengths are independent of the lower bounds.
1113-
*
1114-
* Note, this is "var @> const", ie. estimate the fraction of ranges that
1115-
* contain the constant lower and upper bounds.
11161146
*/
11171147
static double
11181148
calc_hist_selectivity_contains(TypeCacheEntry *typcache,
@@ -1131,16 +1161,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
11311161
lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
11321162
true);
11331163

1164+
/*
1165+
* If the lower bound value is below the histogram's lower limit, there
1166+
* are no matches.
1167+
*/
1168+
if (lower_index < 0)
1169+
return 0.0;
1170+
1171+
/*
1172+
* If the lower bound value is at or beyond the histogram's upper limit,
1173+
* start our loop at the last actual bin, as though the upper bound were
1174+
* within that bin; get_position will clamp its result to 1.0 anyway.
1175+
* (This corresponds to assuming that the data population above the
1176+
* histogram's upper limit is empty, exactly like what we just assumed for
1177+
* the lower limit.)
1178+
*/
1179+
lower_index = Min(lower_index, hist_nvalues - 2);
1180+
11341181
/*
11351182
* Calculate lower_bin_width, ie. the fraction of the of (lower_index,
11361183
* lower_index + 1) bin which is greater than lower bound of query range
11371184
* using linear interpolation of subdiff function.
11381185
*/
1139-
if (lower_index >= 0 && lower_index < hist_nvalues - 1)
1140-
lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
1141-
&hist_lower[lower_index + 1]);
1142-
else
1143-
lower_bin_width = 0.0;
1186+
lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
1187+
&hist_lower[lower_index + 1]);
11441188

11451189
/*
11461190
* Loop through all the lower bound bins, smaller than the query lower

0 commit comments

Comments
 (0)