Further tweaking of width_bucket() edge cases.

tglsfdc · tglsfdc · commit a2a0c7c29e47 · 2023-03-31T16:29:55.000-04:00
I realized that the third overflow case I posited in commit b0e9e4d actually should be handled in a different way: rather than tolerating the idea that the quotient could round to 1, we should clamp so that the output cannot be more than "count" when we know that the operand is less than bound2. That being the case, we don't need an overflow-aware increment in that code path, which leads me to revert the movement of the pg_add_s32_overflow() call. (The diff in width_bucket_float8 might be easier to read by comparing against b0e9e4d^.) What's more, width_bucket_numeric also has this problem of the quotient potentially rounding to 1, so add a clamp there too. As before, I'm not quite convinced that a back-patch is warranted. Discussion: https://postgr.es/m/391415.1680268470@sss.pgh.pa.us
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
@@ -4108,39 +4108,63 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 
 	if (bound1 < bound2)
 	{
-		/* In all cases, we'll add one at the end */
 		if (operand < bound1)
-			result = -1;
+			result = 0;
 		else if (operand >= bound2)
-			result = count;
-		else if (!isinf(bound2 - bound1))
 		{
-			/* Result of division is surely in [0,1], so this can't overflow */
-			result = count * ((operand - bound1) / (bound2 - bound1));
+			if (pg_add_s32_overflow(count, 1, &result))
+				ereport(ERROR,
+						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+						 errmsg("integer out of range")));
 		}
 		else
 		{
-			/*
-			 * We get here if bound2 - bound1 overflows DBL_MAX.  Since both
-			 * bounds are finite, their difference can't exceed twice DBL_MAX;
-			 * so we can perform the computation without overflow by dividing
-			 * all the inputs by 2.  That should be exact, too, except in the
-			 * case where a very small operand underflows to zero, which would
-			 * have negligible impact on the result given such large bounds.
-			 */
-			result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
+			if (!isinf(bound2 - bound1))
+			{
+				/* The quotient is surely in [0,1], so this can't overflow */
+				result = count * ((operand - bound1) / (bound2 - bound1));
+			}
+			else
+			{
+				/*
+				 * We get here if bound2 - bound1 overflows DBL_MAX.  Since
+				 * both bounds are finite, their difference can't exceed twice
+				 * DBL_MAX; so we can perform the computation without overflow
+				 * by dividing all the inputs by 2.  That should be exact too,
+				 * except in the case where a very small operand underflows to
+				 * zero, which would have negligible impact on the result
+				 * given such large bounds.
+				 */
+				result = count * ((operand / 2 - bound1 / 2) / (bound2 / 2 - bound1 / 2));
+			}
+			/* The quotient could round to 1.0, which would be a lie */
+			if (result >= count)
+				result = count - 1;
+			/* Having done that, we can add 1 without fear of overflow */
+			result++;
 		}
 	}
 	else if (bound1 > bound2)
 	{
 		if (operand > bound1)
-			result = -1;
+			result = 0;
 		else if (operand <= bound2)
-			result = count;
-		else if (!isinf(bound1 - bound2))
-			result = count * ((bound1 - operand) / (bound1 - bound2));
+		{
+			if (pg_add_s32_overflow(count, 1, &result))
+				ereport(ERROR,
+						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+						 errmsg("integer out of range")));
+		}
 		else
-			result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
+		{
+			if (!isinf(bound1 - bound2))
+				result = count * ((bound1 - operand) / (bound1 - bound2));
+			else
+				result = count * ((bound1 / 2 - operand / 2) / (bound1 / 2 - bound2 / 2));
+			if (result >= count)
+				result = count - 1;
+			result++;
+		}
 	}
 	else
 	{
@@ -4150,10 +4174,5 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 		result = 0;				/* keep the compiler quiet */
 	}
 
-	if (pg_add_s32_overflow(result, 1, &result))
-		ereport(ERROR,
-				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				 errmsg("integer out of range")));
-
 	PG_RETURN_INT32(result);
 }
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
@@ -1907,7 +1907,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
 }
 
 /*
- * If 'operand' is not outside the bucket range, determine the correct
+ * 'operand' is inside the bucket range, so determine the correct
  * bucket for it to go. The calculations performed by this function
  * are derived directly from the SQL2003 spec. Note however that we
  * multiply by count before dividing, to avoid unnecessary roundoff error.
@@ -1940,8 +1940,19 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
 			operand_var.dscale + count_var->dscale);
 	div_var(&operand_var, &bound2_var, result_var,
 			select_div_scale(&operand_var, &bound2_var), true);
-	add_var(result_var, &const_one, result_var);
-	floor_var(result_var, result_var);
+
+	/*
+	 * Roundoff in the division could give us a quotient exactly equal to
+	 * "count", which is too large.  Clamp so that we do not emit a result
+	 * larger than "count".
+	 */
+	if (cmp_var(result_var, count_var) >= 0)
+		set_var_from_var(count_var, result_var);
+	else
+	{
+		add_var(result_var, &const_one, result_var);
+		floor_var(result_var, result_var);
+	}
 
 	free_var(&bound1_var);
 	free_var(&bound2_var);
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
@@ -1473,6 +1473,31 @@ FROM generate_series(0, 110, 10) x;
  110 |   0 |   0
 (12 rows)
 
+-- Another roundoff-error hazard
+SELECT width_bucket(0, -1e100::numeric, 1, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(0, -1e100::float8, 1, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(1, 1e100::numeric, 0, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
+SELECT width_bucket(1, 1e100::float8, 0, 10);
+ width_bucket 
+--------------
+           10
+(1 row)
+
 -- Check cases that could trigger overflow or underflow within the calculation
 SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)
 FROM
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
@@ -909,6 +909,11 @@ FROM generate_series(0, 110, 10) x;
 SELECT x, width_bucket(x::float8, 100, 10, 9) as flt,
        width_bucket(x::numeric, 100, 10, 9) as num
 FROM generate_series(0, 110, 10) x;
+-- Another roundoff-error hazard
+SELECT width_bucket(0, -1e100::numeric, 1, 10);
+SELECT width_bucket(0, -1e100::float8, 1, 10);
+SELECT width_bucket(1, 1e100::numeric, 0, 10);
+SELECT width_bucket(1, 1e100::float8, 0, 10);
 
 -- Check cases that could trigger overflow or underflow within the calculation
 SELECT oper, low, high, cnt, width_bucket(oper, low, high, cnt)