From d97e0fe88abdd7e271bf36316c370dc62a09d6bd Mon Sep 17 00:00:00 2001
From: Yakov Danishevsky <yakovdan@gmail.com>
Date: Tue, 21 Jan 2025 10:17:09 +0200
Subject: [PATCH] BUG: Fix ``from_float_positional`` errors for huge pads
 (#28149)

This PR adds graceful error handling for np.format_float_positional  when provided pad_left or pad_right arguments are too large.

* TST: Added tests for correct handling of overflow

* BUG: fixed pad_left and pad_right causing overflow if too large

* TST: added overflow test and fixed formatting

* BUG: fixed overflow checks and simplified error handling

* BUG: rewritten excpetion message and fixed overflow check

* TST: split test into smaller tests, added large input value

* Apply suggestions from code review

---------

Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
---
 numpy/_core/src/multiarray/dragon4.c  |  64 +++++++------
 numpy/_core/tests/test_scalarprint.py | 129 +++++++++++++++++---------
 2 files changed, 121 insertions(+), 72 deletions(-)

diff --git a/numpy/_core/src/multiarray/dragon4.c b/numpy/_core/src/multiarray/dragon4.c
index 7cd8afbed6d8..b936f4dc213e 100644
--- a/numpy/_core/src/multiarray/dragon4.c
+++ b/numpy/_core/src/multiarray/dragon4.c
@@ -1615,7 +1615,8 @@ typedef struct Dragon4_Options {
  *
  * See Dragon4_Options for description of remaining arguments.
  */
-static npy_uint32
+
+static npy_int32
 FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
                  npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
                  npy_bool hasUnequalMargins, DigitMode digit_mode,
@@ -1646,7 +1647,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
         buffer[pos++] = '-';
         has_sign = 1;
     }
-
+        
     numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
                         digit_mode, cutoff_mode, precision, min_digits,
                         buffer + has_sign, maxPrintLen - has_sign,
@@ -1658,14 +1659,14 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     /* if output has a whole number */
     if (printExponent >= 0) {
         /* leave the whole number at the start of the buffer */
-        numWholeDigits = printExponent+1;
+        numWholeDigits = printExponent+1;        
         if (numDigits <= numWholeDigits) {
             npy_int32 count = numWholeDigits - numDigits;
             pos += numDigits;
 
-            /* don't overflow the buffer */
-            if (pos + count > maxPrintLen) {
-                count = maxPrintLen - pos;
+            if (count > maxPrintLen - pos) {
+                PyErr_SetString(PyExc_RuntimeError, "Float formating result too large");
+                return -1;
             }
 
             /* add trailing zeros up to the decimal point */
@@ -1767,9 +1768,12 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
              pos < maxPrintLen) {
         /* add trailing zeros up to add_digits length */
         /* compute the number of trailing zeros needed */
+        
         npy_int32 count = desiredFractionalDigits - numFractionDigits;
-        if (pos + count > maxPrintLen) {
-            count = maxPrintLen - pos;
+        
+        if (count > maxPrintLen - pos) {
+            PyErr_SetString(PyExc_RuntimeError, "Float formating result too large");
+            return -1;
         }
         numFractionDigits += count;
 
@@ -1802,7 +1806,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     }
 
     /* add any whitespace padding to right side */
-    if (digits_right >= numFractionDigits) {
+    if (digits_right >= numFractionDigits) {        
         npy_int32 count = digits_right - numFractionDigits;
 
         /* in trim_mode DptZeros, if right padding, add a space for the . */
@@ -1811,8 +1815,9 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
             buffer[pos++] = ' ';
         }
 
-        if (pos + count > maxPrintLen) {
-            count = maxPrintLen - pos;
+        if (count > maxPrintLen - pos) {
+            PyErr_SetString(PyExc_RuntimeError, "Float formating result too large");
+            return -1;
         }
 
         for ( ; count > 0; count--) {
@@ -1823,14 +1828,16 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
     if (digits_left > numWholeDigits + has_sign) {
         npy_int32 shift = digits_left - (numWholeDigits + has_sign);
         npy_int32 count = pos;
-
-        if (count + shift > maxPrintLen) {
-            count = maxPrintLen - shift;
+                
+        if (count > maxPrintLen - shift) {
+            PyErr_SetString(PyExc_RuntimeError, "Float formating result too large");
+            return -1;
         }
 
         if (count > 0) {
             memmove(buffer + shift, buffer, count);
         }
+
         pos = shift + count;
         for ( ; shift > 0; shift--) {
             buffer[shift - 1] = ' ';
@@ -1860,7 +1867,7 @@ FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
  *
  * See Dragon4_Options for description of remaining arguments.
  */
-static npy_uint32
+static npy_int32
 FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
                   npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
                   npy_bool hasUnequalMargins, DigitMode digit_mode,
@@ -2158,7 +2165,7 @@ PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
  * Helper function that takes Dragon4 parameters and options and
  * calls Dragon4.
  */
-static npy_uint32
+static npy_int32
 Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
                  npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
                  npy_bool hasUnequalMargins, Dragon4_Options *opt)
@@ -2187,7 +2194,7 @@ Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa,
  * exponent:  5 bits
  * mantissa: 10 bits
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary16(
         npy_half *value, Dragon4_Options *opt)
 {
@@ -2274,7 +2281,7 @@ Dragon4_PrintFloat_IEEE_binary16(
  * exponent:  8 bits
  * mantissa: 23 bits
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary32(
         npy_float32 *value,
         Dragon4_Options *opt)
@@ -2367,7 +2374,7 @@ Dragon4_PrintFloat_IEEE_binary32(
  * exponent: 11 bits
  * mantissa: 52 bits
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary64(
         npy_float64 *value, Dragon4_Options *opt)
 {
@@ -2482,7 +2489,7 @@ typedef struct FloatVal128 {
  * intbit     1 bit,  first u64
  * mantissa: 63 bits, first u64
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_Intel_extended(
     FloatVal128 value, Dragon4_Options *opt)
 {
@@ -2580,7 +2587,7 @@ Dragon4_PrintFloat_Intel_extended(
  * system. But numpy defines NPY_FLOAT80, so if we come across it, assume it is
  * an Intel extended format.
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_Intel_extended80(
     npy_float80 *value, Dragon4_Options *opt)
 {
@@ -2604,7 +2611,7 @@ Dragon4_PrintFloat_Intel_extended80(
 
 #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE
 /* Intel's 80-bit IEEE extended precision format, 96-bit storage */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_Intel_extended96(
     npy_float96 *value, Dragon4_Options *opt)
 {
@@ -2628,7 +2635,7 @@ Dragon4_PrintFloat_Intel_extended96(
 
 #ifdef HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE
 /* Motorola Big-endian equivalent of the Intel-extended 96 fp format */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_Motorola_extended96(
     npy_float96 *value, Dragon4_Options *opt)
 {
@@ -2665,7 +2672,7 @@ typedef union FloatUnion128
 
 #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE
 /* Intel's 80-bit IEEE extended precision format, 128-bit storage */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_Intel_extended128(
     npy_float128 *value, Dragon4_Options *opt)
 {
@@ -2694,7 +2701,7 @@ Dragon4_PrintFloat_Intel_extended128(
  * I am not sure if the arch also supports uint128, and C does not seem to
  * support int128 literals. So we use uint64 to do manipulation.
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary128(
     FloatVal128 val128, Dragon4_Options *opt)
 {
@@ -2779,7 +2786,7 @@ Dragon4_PrintFloat_IEEE_binary128(
 }
 
 #if defined(HAVE_LDOUBLE_IEEE_QUAD_LE)
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary128_le(
     npy_float128 *value, Dragon4_Options *opt)
 {
@@ -2799,7 +2806,7 @@ Dragon4_PrintFloat_IEEE_binary128_le(
  * This function is untested, very few, if any, architectures implement
  * big endian IEEE binary128 floating point.
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IEEE_binary128_be(
     npy_float128 *value, Dragon4_Options *opt)
 {
@@ -2854,7 +2861,7 @@ Dragon4_PrintFloat_IEEE_binary128_be(
  * https://gcc.gnu.org/wiki/Ieee128PowerPCA
  * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm
  */
-static npy_uint32
+static npy_int32
 Dragon4_PrintFloat_IBM_double_double(
     npy_float128 *value, Dragon4_Options *opt)
 {
@@ -3041,6 +3048,7 @@ Dragon4_PrintFloat_IBM_double_double(
  * which goes up to about 10^4932. The Dragon4_scratch struct provides a string
  * buffer of this size.
  */
+
 #define make_dragon4_typefuncs_inner(Type, npy_type, format) \
 \
 PyObject *\
diff --git a/numpy/_core/tests/test_scalarprint.py b/numpy/_core/tests/test_scalarprint.py
index f47542ef779c..b6872c2b482b 100644
--- a/numpy/_core/tests/test_scalarprint.py
+++ b/numpy/_core/tests/test_scalarprint.py
@@ -8,7 +8,8 @@
 
 from tempfile import TemporaryFile
 import numpy as np
-from numpy.testing import assert_, assert_equal, assert_raises, IS_MUSL
+from numpy.testing import (
+    assert_, assert_equal, assert_raises, assert_raises_regex, IS_MUSL)
 
 class TestRealScalars:
     def test_str(self):
@@ -260,53 +261,93 @@ def test_dragon4(self):
         assert_equal(fpos64('324', unique=False, precision=5,
                                    fractional=False), "324.00")
 
-    def test_dragon4_interface(self):
-        tps = [np.float16, np.float32, np.float64]
+    available_float_dtypes = [np.float16, np.float32, np.float64, np.float128]\
+        if hasattr(np, 'float128') else [np.float16, np.float32, np.float64]
+    
+    @pytest.mark.parametrize("tp", available_float_dtypes)
+    def test_dragon4_positional_interface(self, tp):
         # test is flaky for musllinux on np.float128
-        if hasattr(np, 'float128') and not IS_MUSL:
-            tps.append(np.float128)
-
+        if IS_MUSL and tp == np.float128:
+            pytest.skip("Skipping flaky test of float128 on musllinux")
+                
+        fpos = np.format_float_positional
+        
+        # test padding
+        assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), "   1.    ")
+        assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), "  -1.    ")
+        assert_equal(fpos(tp('-10.2'),
+                        pad_left=4, pad_right=4), " -10.2   ")
+        
+        # test fixed (non-unique) mode
+        assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000")
+
+    @pytest.mark.parametrize("tp", available_float_dtypes)
+    def test_dragon4_positional_interface_trim(self, tp):
+        # test is flaky for musllinux on np.float128
+        if IS_MUSL and tp == np.float128:
+            pytest.skip("Skipping flaky test of float128 on musllinux")
+                        
         fpos = np.format_float_positional
+        # test trimming
+        # trim of 'k' or '.' only affects non-unique mode, since unique
+        # mode will not output trailing 0s.
+        assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'),
+                        "1.0000")
+
+        assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'),
+                        "1.")
+        assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'),
+                        "1.2" if tp != np.float16 else "1.2002")
+
+        assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'),
+                        "1.0")
+        assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'),
+                        "1.2" if tp != np.float16 else "1.2002")
+        assert_equal(fpos(tp('1.'), trim='0'), "1.0")
+
+        assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'),
+                        "1")
+        assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'),
+                        "1.2" if tp != np.float16 else "1.2002")
+        assert_equal(fpos(tp('1.'), trim='-'), "1")
+        assert_equal(fpos(tp('1.001'), precision=1, trim='-'), "1")
+                
+    @pytest.mark.parametrize("tp", available_float_dtypes)
+    @pytest.mark.parametrize("pad_val", [10**5, np.iinfo("int32").max])
+    def test_dragon4_positional_interface_overflow(self, tp, pad_val):
+        # test is flaky for musllinux on np.float128
+        if IS_MUSL and tp == np.float128:
+            pytest.skip("Skipping flaky test of float128 on musllinux")
+                
+        fpos = np.format_float_positional
+
+        #gh-28068            
+        with pytest.raises(RuntimeError, 
+                           match="Float formating result too large"):
+            fpos(tp('1.047'), unique=False, precision=pad_val)
+
+        with pytest.raises(RuntimeError, 
+                           match="Float formating result too large"):
+            fpos(tp('1.047'), precision=2, pad_left=pad_val)
+
+        with pytest.raises(RuntimeError, 
+                           match="Float formating result too large"):
+            fpos(tp('1.047'), precision=2, pad_right=pad_val)
+
+    @pytest.mark.parametrize("tp", available_float_dtypes)
+    def test_dragon4_scientific_interface(self, tp):
+        # test is flaky for musllinux on np.float128
+        if IS_MUSL and tp == np.float128:
+            pytest.skip("Skipping flaky test of float128 on musllinux")
+                        
         fsci = np.format_float_scientific
 
-        for tp in tps:
-            # test padding
-            assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), "   1.    ")
-            assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), "  -1.    ")
-            assert_equal(fpos(tp('-10.2'),
-                         pad_left=4, pad_right=4), " -10.2   ")
-
-            # test exp_digits
-            assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001")
-
-            # test fixed (non-unique) mode
-            assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000")
-            assert_equal(fsci(tp('1.0'), unique=False, precision=4),
-                         "1.0000e+00")
-
-            # test trimming
-            # trim of 'k' or '.' only affects non-unique mode, since unique
-            # mode will not output trailing 0s.
-            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'),
-                         "1.0000")
-
-            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'),
-                         "1.")
-            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'),
-                         "1.2" if tp != np.float16 else "1.2002")
-
-            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'),
-                         "1.0")
-            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'),
-                         "1.2" if tp != np.float16 else "1.2002")
-            assert_equal(fpos(tp('1.'), trim='0'), "1.0")
-
-            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'),
-                         "1")
-            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'),
-                         "1.2" if tp != np.float16 else "1.2002")
-            assert_equal(fpos(tp('1.'), trim='-'), "1")
-            assert_equal(fpos(tp('1.001'), precision=1, trim='-'), "1")
+        # test exp_digits
+        assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001")
+
+        # test fixed (non-unique) mode
+        assert_equal(fsci(tp('1.0'), unique=False, precision=4),
+                        "1.0000e+00")
 
     @pytest.mark.skipif(not platform.machine().startswith("ppc64"),
                         reason="only applies to ppc float128 values")