17
17
*/
18
18
#include "postgres.h"
19
19
20
+ #include <math.h>
21
+
20
22
#include "access/htup_details.h"
21
23
#include "catalog/pg_operator.h"
22
24
#include "catalog/pg_statistic.h"
@@ -405,6 +407,13 @@ calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
405
407
NULL , NULL )))
406
408
return -1.0 ;
407
409
410
+ /* check that it's a histogram, not just a dummy entry */
411
+ if (nhist < 2 )
412
+ {
413
+ free_attstatsslot (vardata -> atttype , hist_values , nhist , NULL , 0 );
414
+ return -1.0 ;
415
+ }
416
+
408
417
/*
409
418
* Convert histogram of ranges into histograms of its lower and upper
410
419
* bounds.
@@ -693,7 +702,8 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
693
702
/*
694
703
* Both bounds are finite. Assuming the subtype's comparison function
695
704
* works sanely, the value must be finite, too, because it lies
696
- * somewhere between the bounds. If it doesn't, just return something.
705
+ * somewhere between the bounds. If it doesn't, arbitrarily return
706
+ * 0.5.
697
707
*/
698
708
if (value -> infinite )
699
709
return 0.5 ;
@@ -703,21 +713,22 @@ get_position(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist1,
703
713
return 0.5 ;
704
714
705
715
/* Calculate relative position using subdiff function. */
706
- bin_width = DatumGetFloat8 (FunctionCall2Coll (
707
- & typcache -> rng_subdiff_finfo ,
716
+ bin_width = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
708
717
typcache -> rng_collation ,
709
718
hist2 -> val ,
710
719
hist1 -> val ));
711
- if (bin_width <= 0.0 )
712
- return 0.5 ; /* zero width bin */
720
+ if (isnan ( bin_width ) || bin_width <= 0.0 )
721
+ return 0.5 ; /* punt for NaN or zero- width bin */
713
722
714
- position = DatumGetFloat8 (FunctionCall2Coll (
715
- & typcache -> rng_subdiff_finfo ,
723
+ position = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
716
724
typcache -> rng_collation ,
717
725
value -> val ,
718
726
hist1 -> val ))
719
727
/ bin_width ;
720
728
729
+ if (isnan (position ))
730
+ return 0.5 ; /* punt for NaN from subdiff, Inf/Inf, etc */
731
+
721
732
/* Relative position must be in [0,1] range */
722
733
position = Max (position , 0.0 );
723
734
position = Min (position , 1.0 );
@@ -809,15 +820,23 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
809
820
if (!bound1 -> infinite && !bound2 -> infinite )
810
821
{
811
822
/*
812
- * No bounds are infinite, use subdiff function or return default
823
+ * Neither bound is infinite, use subdiff function or return default
813
824
* value of 1.0 if no subdiff is available.
814
825
*/
815
826
if (has_subdiff )
816
- return
817
- DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
818
- typcache -> rng_collation ,
819
- bound2 -> val ,
820
- bound1 -> val ));
827
+ {
828
+ float8 res ;
829
+
830
+ res = DatumGetFloat8 (FunctionCall2Coll (& typcache -> rng_subdiff_finfo ,
831
+ typcache -> rng_collation ,
832
+ bound2 -> val ,
833
+ bound1 -> val ));
834
+ /* Reject possible NaN result, also negative result */
835
+ if (isnan (res ) || res < 0.0 )
836
+ return 1.0 ;
837
+ else
838
+ return res ;
839
+ }
821
840
else
822
841
return 1.0 ;
823
842
}
@@ -831,7 +850,7 @@ get_distance(TypeCacheEntry *typcache, RangeBound *bound1, RangeBound *bound2)
831
850
}
832
851
else
833
852
{
834
- /* One bound is infinite, another is not */
853
+ /* One bound is infinite, the other is not */
835
854
return get_float8_infinity ();
836
855
}
837
856
}
@@ -1027,17 +1046,31 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
1027
1046
upper_index = rbound_bsearch (typcache , upper , hist_lower , hist_nvalues ,
1028
1047
false);
1029
1048
1049
+ /*
1050
+ * If the upper bound value is below the histogram's lower limit, there
1051
+ * are no matches.
1052
+ */
1053
+ if (upper_index < 0 )
1054
+ return 0.0 ;
1055
+
1056
+ /*
1057
+ * If the upper bound value is at or beyond the histogram's upper limit,
1058
+ * start our loop at the last actual bin, as though the upper bound were
1059
+ * within that bin; get_position will clamp its result to 1.0 anyway.
1060
+ * (This corresponds to assuming that the data population above the
1061
+ * histogram's upper limit is empty, exactly like what we just assumed for
1062
+ * the lower limit.)
1063
+ */
1064
+ upper_index = Min (upper_index , hist_nvalues - 2 );
1065
+
1030
1066
/*
1031
1067
* Calculate upper_bin_width, ie. the fraction of the (upper_index,
1032
1068
* upper_index + 1) bin which is greater than upper bound of query range
1033
1069
* using linear interpolation of subdiff function.
1034
1070
*/
1035
- if (upper_index >= 0 && upper_index < hist_nvalues - 1 )
1036
- upper_bin_width = get_position (typcache , upper ,
1037
- & hist_lower [upper_index ],
1038
- & hist_lower [upper_index + 1 ]);
1039
- else
1040
- upper_bin_width = 0.0 ;
1071
+ upper_bin_width = get_position (typcache , upper ,
1072
+ & hist_lower [upper_index ],
1073
+ & hist_lower [upper_index + 1 ]);
1041
1074
1042
1075
/*
1043
1076
* In the loop, dist and prev_dist are the distance of the "current" bin's
@@ -1110,9 +1143,6 @@ calc_hist_selectivity_contained(TypeCacheEntry *typcache,
1110
1143
* of ranges that contain the constant lower and upper bounds. This uses
1111
1144
* the histograms of range lower bounds and range lengths, on the assumption
1112
1145
* that the range lengths are independent of the lower bounds.
1113
- *
1114
- * Note, this is "var @> const", ie. estimate the fraction of ranges that
1115
- * contain the constant lower and upper bounds.
1116
1146
*/
1117
1147
static double
1118
1148
calc_hist_selectivity_contains (TypeCacheEntry * typcache ,
@@ -1131,16 +1161,30 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
1131
1161
lower_index = rbound_bsearch (typcache , lower , hist_lower , hist_nvalues ,
1132
1162
true);
1133
1163
1164
+ /*
1165
+ * If the lower bound value is below the histogram's lower limit, there
1166
+ * are no matches.
1167
+ */
1168
+ if (lower_index < 0 )
1169
+ return 0.0 ;
1170
+
1171
+ /*
1172
+ * If the lower bound value is at or beyond the histogram's upper limit,
1173
+ * start our loop at the last actual bin, as though the upper bound were
1174
+ * within that bin; get_position will clamp its result to 1.0 anyway.
1175
+ * (This corresponds to assuming that the data population above the
1176
+ * histogram's upper limit is empty, exactly like what we just assumed for
1177
+ * the lower limit.)
1178
+ */
1179
+ lower_index = Min (lower_index , hist_nvalues - 2 );
1180
+
1134
1181
/*
1135
1182
* Calculate lower_bin_width, ie. the fraction of the of (lower_index,
1136
1183
* lower_index + 1) bin which is greater than lower bound of query range
1137
1184
* using linear interpolation of subdiff function.
1138
1185
*/
1139
- if (lower_index >= 0 && lower_index < hist_nvalues - 1 )
1140
- lower_bin_width = get_position (typcache , lower , & hist_lower [lower_index ],
1141
- & hist_lower [lower_index + 1 ]);
1142
- else
1143
- lower_bin_width = 0.0 ;
1186
+ lower_bin_width = get_position (typcache , lower , & hist_lower [lower_index ],
1187
+ & hist_lower [lower_index + 1 ]);
1144
1188
1145
1189
/*
1146
1190
* Loop through all the lower bound bins, smaller than the query lower
0 commit comments