From d97e0fe88abdd7e271bf36316c370dc62a09d6bd Mon Sep 17 00:00:00 2001 From: Yakov Danishevsky Date: Tue, 21 Jan 2025 10:17:09 +0200 Subject: [PATCH] BUG: Fix ``from_float_positional`` errors for huge pads (#28149) This PR adds graceful error handling for np.format_float_positional when provided pad_left or pad_right arguments are too large. * TST: Added tests for correct handling of overflow * BUG: fixed pad_left and pad_right causing overflow if too large * TST: added overflow test and fixed formatting * BUG: fixed overflow checks and simplified error handling * BUG: rewritten excpetion message and fixed overflow check * TST: split test into smaller tests, added large input value * Apply suggestions from code review --------- Co-authored-by: Sebastian Berg --- numpy/_core/src/multiarray/dragon4.c | 64 +++++++------ numpy/_core/tests/test_scalarprint.py | 129 +++++++++++++++++--------- 2 files changed, 121 insertions(+), 72 deletions(-) diff --git a/numpy/_core/src/multiarray/dragon4.c b/numpy/_core/src/multiarray/dragon4.c index 7cd8afbed6d8..b936f4dc213e 100644 --- a/numpy/_core/src/multiarray/dragon4.c +++ b/numpy/_core/src/multiarray/dragon4.c @@ -1615,7 +1615,8 @@ typedef struct Dragon4_Options { * * See Dragon4_Options for description of remaining arguments. */ -static npy_uint32 + +static npy_int32 FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, @@ -1646,7 +1647,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, buffer[pos++] = '-'; has_sign = 1; } - + numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, cutoff_mode, precision, min_digits, buffer + has_sign, maxPrintLen - has_sign, @@ -1658,14 +1659,14 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, /* if output has a whole number */ if (printExponent >= 0) { /* leave the whole number at the start of the buffer */ - numWholeDigits = printExponent+1; + numWholeDigits = printExponent+1; if (numDigits <= numWholeDigits) { npy_int32 count = numWholeDigits - numDigits; pos += numDigits; - /* don't overflow the buffer */ - if (pos + count > maxPrintLen) { - count = maxPrintLen - pos; + if (count > maxPrintLen - pos) { + PyErr_SetString(PyExc_RuntimeError, "Float formating result too large"); + return -1; } /* add trailing zeros up to the decimal point */ @@ -1767,9 +1768,12 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, pos < maxPrintLen) { /* add trailing zeros up to add_digits length */ /* compute the number of trailing zeros needed */ + npy_int32 count = desiredFractionalDigits - numFractionDigits; - if (pos + count > maxPrintLen) { - count = maxPrintLen - pos; + + if (count > maxPrintLen - pos) { + PyErr_SetString(PyExc_RuntimeError, "Float formating result too large"); + return -1; } numFractionDigits += count; @@ -1802,7 +1806,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, } /* add any whitespace padding to right side */ - if (digits_right >= numFractionDigits) { + if (digits_right >= numFractionDigits) { npy_int32 count = digits_right - numFractionDigits; /* in trim_mode DptZeros, if right padding, add a space for the . */ @@ -1811,8 +1815,9 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, buffer[pos++] = ' '; } - if (pos + count > maxPrintLen) { - count = maxPrintLen - pos; + if (count > maxPrintLen - pos) { + PyErr_SetString(PyExc_RuntimeError, "Float formating result too large"); + return -1; } for ( ; count > 0; count--) { @@ -1823,14 +1828,16 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, if (digits_left > numWholeDigits + has_sign) { npy_int32 shift = digits_left - (numWholeDigits + has_sign); npy_int32 count = pos; - - if (count + shift > maxPrintLen) { - count = maxPrintLen - shift; + + if (count > maxPrintLen - shift) { + PyErr_SetString(PyExc_RuntimeError, "Float formating result too large"); + return -1; } if (count > 0) { memmove(buffer + shift, buffer, count); } + pos = shift + count; for ( ; shift > 0; shift--) { buffer[shift - 1] = ' '; @@ -1860,7 +1867,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * * See Dragon4_Options for description of remaining arguments. */ -static npy_uint32 +static npy_int32 FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, @@ -2158,7 +2165,7 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, * Helper function that takes Dragon4 parameters and options and * calls Dragon4. */ -static npy_uint32 +static npy_int32 Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, Dragon4_Options *opt) @@ -2187,7 +2194,7 @@ Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, * exponent: 5 bits * mantissa: 10 bits */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary16( npy_half *value, Dragon4_Options *opt) { @@ -2274,7 +2281,7 @@ Dragon4_PrintFloat_IEEE_binary16( * exponent: 8 bits * mantissa: 23 bits */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary32( npy_float32 *value, Dragon4_Options *opt) @@ -2367,7 +2374,7 @@ Dragon4_PrintFloat_IEEE_binary32( * exponent: 11 bits * mantissa: 52 bits */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary64( npy_float64 *value, Dragon4_Options *opt) { @@ -2482,7 +2489,7 @@ typedef struct FloatVal128 { * intbit 1 bit, first u64 * mantissa: 63 bits, first u64 */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_Intel_extended( FloatVal128 value, Dragon4_Options *opt) { @@ -2580,7 +2587,7 @@ Dragon4_PrintFloat_Intel_extended( * system. But numpy defines NPY_FLOAT80, so if we come across it, assume it is * an Intel extended format. */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_Intel_extended80( npy_float80 *value, Dragon4_Options *opt) { @@ -2604,7 +2611,7 @@ Dragon4_PrintFloat_Intel_extended80( #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE /* Intel's 80-bit IEEE extended precision format, 96-bit storage */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_Intel_extended96( npy_float96 *value, Dragon4_Options *opt) { @@ -2628,7 +2635,7 @@ Dragon4_PrintFloat_Intel_extended96( #ifdef HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE /* Motorola Big-endian equivalent of the Intel-extended 96 fp format */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_Motorola_extended96( npy_float96 *value, Dragon4_Options *opt) { @@ -2665,7 +2672,7 @@ typedef union FloatUnion128 #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE /* Intel's 80-bit IEEE extended precision format, 128-bit storage */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_Intel_extended128( npy_float128 *value, Dragon4_Options *opt) { @@ -2694,7 +2701,7 @@ Dragon4_PrintFloat_Intel_extended128( * I am not sure if the arch also supports uint128, and C does not seem to * support int128 literals. So we use uint64 to do manipulation. */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary128( FloatVal128 val128, Dragon4_Options *opt) { @@ -2779,7 +2786,7 @@ Dragon4_PrintFloat_IEEE_binary128( } #if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary128_le( npy_float128 *value, Dragon4_Options *opt) { @@ -2799,7 +2806,7 @@ Dragon4_PrintFloat_IEEE_binary128_le( * This function is untested, very few, if any, architectures implement * big endian IEEE binary128 floating point. */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IEEE_binary128_be( npy_float128 *value, Dragon4_Options *opt) { @@ -2854,7 +2861,7 @@ Dragon4_PrintFloat_IEEE_binary128_be( * https://gcc.gnu.org/wiki/Ieee128PowerPCA * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm */ -static npy_uint32 +static npy_int32 Dragon4_PrintFloat_IBM_double_double( npy_float128 *value, Dragon4_Options *opt) { @@ -3041,6 +3048,7 @@ Dragon4_PrintFloat_IBM_double_double( * which goes up to about 10^4932. The Dragon4_scratch struct provides a string * buffer of this size. */ + #define make_dragon4_typefuncs_inner(Type, npy_type, format) \ \ PyObject *\ diff --git a/numpy/_core/tests/test_scalarprint.py b/numpy/_core/tests/test_scalarprint.py index f47542ef779c..b6872c2b482b 100644 --- a/numpy/_core/tests/test_scalarprint.py +++ b/numpy/_core/tests/test_scalarprint.py @@ -8,7 +8,8 @@ from tempfile import TemporaryFile import numpy as np -from numpy.testing import assert_, assert_equal, assert_raises, IS_MUSL +from numpy.testing import ( + assert_, assert_equal, assert_raises, assert_raises_regex, IS_MUSL) class TestRealScalars: def test_str(self): @@ -260,53 +261,93 @@ def test_dragon4(self): assert_equal(fpos64('324', unique=False, precision=5, fractional=False), "324.00") - def test_dragon4_interface(self): - tps = [np.float16, np.float32, np.float64] + available_float_dtypes = [np.float16, np.float32, np.float64, np.float128]\ + if hasattr(np, 'float128') else [np.float16, np.float32, np.float64] + + @pytest.mark.parametrize("tp", available_float_dtypes) + def test_dragon4_positional_interface(self, tp): # test is flaky for musllinux on np.float128 - if hasattr(np, 'float128') and not IS_MUSL: - tps.append(np.float128) - + if IS_MUSL and tp == np.float128: + pytest.skip("Skipping flaky test of float128 on musllinux") + + fpos = np.format_float_positional + + # test padding + assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), " 1. ") + assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), " -1. ") + assert_equal(fpos(tp('-10.2'), + pad_left=4, pad_right=4), " -10.2 ") + + # test fixed (non-unique) mode + assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000") + + @pytest.mark.parametrize("tp", available_float_dtypes) + def test_dragon4_positional_interface_trim(self, tp): + # test is flaky for musllinux on np.float128 + if IS_MUSL and tp == np.float128: + pytest.skip("Skipping flaky test of float128 on musllinux") + fpos = np.format_float_positional + # test trimming + # trim of 'k' or '.' only affects non-unique mode, since unique + # mode will not output trailing 0s. + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'), + "1.0000") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'), + "1.") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'), + "1.2" if tp != np.float16 else "1.2002") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'), + "1.0") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='0'), "1.0") + + assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'), + "1") + assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'), + "1.2" if tp != np.float16 else "1.2002") + assert_equal(fpos(tp('1.'), trim='-'), "1") + assert_equal(fpos(tp('1.001'), precision=1, trim='-'), "1") + + @pytest.mark.parametrize("tp", available_float_dtypes) + @pytest.mark.parametrize("pad_val", [10**5, np.iinfo("int32").max]) + def test_dragon4_positional_interface_overflow(self, tp, pad_val): + # test is flaky for musllinux on np.float128 + if IS_MUSL and tp == np.float128: + pytest.skip("Skipping flaky test of float128 on musllinux") + + fpos = np.format_float_positional + + #gh-28068 + with pytest.raises(RuntimeError, + match="Float formating result too large"): + fpos(tp('1.047'), unique=False, precision=pad_val) + + with pytest.raises(RuntimeError, + match="Float formating result too large"): + fpos(tp('1.047'), precision=2, pad_left=pad_val) + + with pytest.raises(RuntimeError, + match="Float formating result too large"): + fpos(tp('1.047'), precision=2, pad_right=pad_val) + + @pytest.mark.parametrize("tp", available_float_dtypes) + def test_dragon4_scientific_interface(self, tp): + # test is flaky for musllinux on np.float128 + if IS_MUSL and tp == np.float128: + pytest.skip("Skipping flaky test of float128 on musllinux") + fsci = np.format_float_scientific - for tp in tps: - # test padding - assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), " 1. ") - assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), " -1. ") - assert_equal(fpos(tp('-10.2'), - pad_left=4, pad_right=4), " -10.2 ") - - # test exp_digits - assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001") - - # test fixed (non-unique) mode - assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000") - assert_equal(fsci(tp('1.0'), unique=False, precision=4), - "1.0000e+00") - - # test trimming - # trim of 'k' or '.' only affects non-unique mode, since unique - # mode will not output trailing 0s. - assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'), - "1.0000") - - assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'), - "1.") - assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'), - "1.2" if tp != np.float16 else "1.2002") - - assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'), - "1.0") - assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'), - "1.2" if tp != np.float16 else "1.2002") - assert_equal(fpos(tp('1.'), trim='0'), "1.0") - - assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'), - "1") - assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'), - "1.2" if tp != np.float16 else "1.2002") - assert_equal(fpos(tp('1.'), trim='-'), "1") - assert_equal(fpos(tp('1.001'), precision=1, trim='-'), "1") + # test exp_digits + assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001") + + # test fixed (non-unique) mode + assert_equal(fsci(tp('1.0'), unique=False, precision=4), + "1.0000e+00") @pytest.mark.skipif(not platform.machine().startswith("ppc64"), reason="only applies to ppc float128 values")