diff --git a/numpy/core/src/umath/loops_modulo.dispatch.c.src b/numpy/core/src/umath/loops_modulo.dispatch.c.src index d0ecc016fb97..53b7da2892b0 100644 --- a/numpy/core/src/umath/loops_modulo.dispatch.c.src +++ b/numpy/core/src/umath/loops_modulo.dispatch.c.src @@ -12,6 +12,21 @@ // Provides the various *_LOOP macros #include "fast_loop_macros.h" + +#define DIVIDEBYZERO_OVERFLOW_CHECK(x, y, min_val, signed) \ + (NPY_UNLIKELY( \ + (signed) ? \ + ((y == 0) || ((x == min_val) && (y == -1))) : \ + (y == 0)) \ + ) + +#define FLAG_IF_DIVIDEBYZERO(x) do { \ + if (NPY_UNLIKELY(x == 0)) { \ + npy_set_floatstatus_divbyzero(); \ + } \ +} while (0) + + #if NPY_SIMD && defined(NPY_HAVE_VSX4) typedef struct { npyv_u32x2 hi; @@ -166,7 +181,6 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) const int vstep = npyv_nlanes_@sfx@; #if @id@ == 2 /* divmod */ npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3]; - const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1); npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@()); for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep, @@ -176,11 +190,11 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) npyv_@sfx@ quo = vsx4_div_@sfx@(a, b); npyv_@sfx@ rem = npyv_sub_@sfx@(a, vec_mul(b, quo)); npyv_b@len@ bzero = npyv_cmpeq_@sfx@(b, vzero); - // when b is 0, 'cvtozero' forces the modulo to be 0 too - npyv_@sfx@ cvtozero = npyv_select_@sfx@(bzero, vzero, vneg_one); + // when b is 0, forces the remainder to be 0 too + rem = npyv_select_@sfx@(bzero, vzero, rem); warn = npyv_or_@sfx@(bzero, warn); npyv_store_@sfx@(dst1, quo); - npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem)); + npyv_store_@sfx@(dst2, rem); } if (!vec_all_eq(warn, vzero)) { @@ -290,7 +304,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3]; const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1); const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@); - npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@()); + npyv_b@len@ warn_zero = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@()); + npyv_b@len@ warn_overflow = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@()); for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep, dst1 += vstep, dst2 += vstep) { @@ -310,10 +325,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin); npyv_b@len@ bneg_one = npyv_cmpeq_@sfx@(b, vneg_one); npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin); - npyv_b@len@ error = npyv_or_@sfx@(bzero, overflow); - // in case of overflow or b = 0, 'cvtozero' forces quo/rem to be 0 - npyv_@sfx@ cvtozero = npyv_select_@sfx@(error, vzero, vneg_one); - warn = npyv_or_@sfx@(error, warn); + warn_zero = npyv_or_@sfx@(bzero, warn_zero); + warn_overflow = npyv_or_@sfx@(overflow, warn_overflow); #endif #if @id@ >= 1 /* remainder and divmod */ // handle mixed case the way Python does @@ -329,8 +342,14 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) #if @id@ == 2 /* divmod */ npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one); quo = npyv_add_@sfx@(quo, to_sub); - npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo)); - npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem)); + // Divide by zero + quo = npyv_select_@sfx@(bzero, vzero, quo); + rem = npyv_select_@sfx@(bzero, vzero, rem); + // Overflow + quo = npyv_select_@sfx@(overflow, vmin, quo); + rem = npyv_select_@sfx@(overflow, vzero, rem); + npyv_store_@sfx@(dst1, quo); + npyv_store_@sfx@(dst2, rem); #else /* fmod and remainder */ npyv_store_@sfx@(dst1, rem); if (NPY_UNLIKELY(vec_any_eq(b, vzero))) { @@ -340,17 +359,27 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) } #if @id@ == 2 /* divmod */ - if (!vec_all_eq(warn, vzero)) { + if (!vec_all_eq(warn_zero, vzero)) { npy_set_floatstatus_divbyzero(); } + if (!vec_all_eq(warn_overflow, vzero)) { + npy_set_floatstatus_overflow(); + } for (; len > 0; --len, ++src1, ++src2, ++dst1, ++dst2) { const npyv_lanetype_@sfx@ a = *src1; const npyv_lanetype_@sfx@ b = *src2; - if (b == 0 || (a == NPY_MIN_INT@len@ && b == -1)) { - npy_set_floatstatus_divbyzero(); - *dst1 = 0; - *dst2 = 0; + if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) { + if (b == 0) { + npy_set_floatstatus_divbyzero(); + *dst1 = 0; + *dst2 = 0; + } + else { + npy_set_floatstatus_overflow(); + *dst1 = NPY_MIN_INT@len@; + *dst2 = 0; + } } else { *dst1 = a / b; @@ -365,8 +394,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len) for (; len > 0; --len, ++src1, ++src2, ++dst1) { const npyv_lanetype_@sfx@ a = *src1; const npyv_lanetype_@sfx@ b = *src2; - if (NPY_UNLIKELY(b == 0)) { - npy_set_floatstatus_divbyzero(); + if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) { + FLAG_IF_DIVIDEBYZERO(b); *dst1 = 0; } else{ *dst1 = a % b; @@ -415,8 +444,6 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len) // (a == NPY_MIN_INT@len@ && b == -1) npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin); npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin); - // in case of overflow, 'cvtozero' forces quo/rem to be 0 - npyv_@sfx@ cvtozero = npyv_select_@sfx@(overflow, vzero, vneg_one); warn = npyv_or_@sfx@(overflow, warn); #endif #if @id@ >= 1 /* remainder and divmod */ @@ -432,8 +459,11 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len) #if @id@ == 2 /* divmod */ npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one); quo = npyv_add_@sfx@(quo, to_sub); - npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo)); - npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem)); + // Overflow: set quo to minimum and rem to 0 + quo = npyv_select_@sfx@(overflow, vmin, quo); + rem = npyv_select_@sfx@(overflow, vzero, rem); + npyv_store_@sfx@(dst1, quo); + npyv_store_@sfx@(dst2, rem); #else /* fmod and remainder */ npyv_store_@sfx@(dst1, rem); #endif @@ -441,14 +471,14 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len) #if @id@ == 2 /* divmod */ if (!vec_all_eq(warn, vzero)) { - npy_set_floatstatus_divbyzero(); + npy_set_floatstatus_overflow(); } for (; len > 0; --len, ++src1, ++dst1, ++dst2) { const npyv_lanetype_@sfx@ a = *src1; - if (a == NPY_MIN_INT@len@ && scalar == -1) { - npy_set_floatstatus_divbyzero(); - *dst1 = 0; + if (NPY_UNLIKELY(a == NPY_MIN_INT@len@ && scalar == -1)) { + npy_set_floatstatus_overflow(); + *dst1 = NPY_MIN_INT@len@; *dst2 = 0; } else { @@ -524,8 +554,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_fmod) BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - if (NPY_UNLIKELY(in2 == 0)) { - npy_set_floatstatus_divbyzero(); +#if @signed@ + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) { +#else + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) { +#endif + FLAG_IF_DIVIDEBYZERO(in2); *((@type@ *)op1) = 0; } else{ *((@type@ *)op1)= in1 % in2; @@ -552,8 +586,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_remainder) BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - if (NPY_UNLIKELY(in2 == 0)) { - npy_set_floatstatus_divbyzero(); +#if @signed@ + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) { +#else + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) { +#endif + FLAG_IF_DIVIDEBYZERO(in2); *((@type@ *)op1) = 0; } else{ #if @signed@ @@ -593,10 +631,17 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod) const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; /* see FIXME note for divide above */ - if (NPY_UNLIKELY(in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1))) { - npy_set_floatstatus_divbyzero(); - *((@type@ *)op1) = 0; - *((@type@ *)op2) = 0; + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) { + if (in2 == 0) { + npy_set_floatstatus_divbyzero(); + *((@type@ *)op1) = 0; + *((@type@ *)op2) = 0; + } + else { + npy_set_floatstatus_overflow(); + *((@type@ *)op1) = NPY_MIN_@TYPE@; + *((@type@ *)op2) = 0; + } } else { /* handle mixed case the way Python does */ @@ -616,7 +661,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod) BINARY_LOOP_TWO_OUT { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - if (NPY_UNLIKELY(in2 == 0)) { + if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) { npy_set_floatstatus_divbyzero(); *((@type@ *)op1) = 0; *((@type@ *)op2) = 0; diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index c322ca33d1be..e50032bc0b42 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -161,6 +161,13 @@ static NPY_INLINE int * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, * LONG, ULONG, LONGLONG, ULONGLONG# */ + +#if @neg@ + #define DIVIDEBYZERO_CHECK (b == 0 || (a == NPY_MIN_@NAME@ && b == -1)) +#else + #define DIVIDEBYZERO_CHECK (b == 0) +#endif + static NPY_INLINE int @name@_ctype_divide(@type@ a, @type@ b, @type@ *out) { if (b == 0) { @@ -169,7 +176,7 @@ static NPY_INLINE int } #if @neg@ else if (b == -1 && a == NPY_MIN_@NAME@) { - *out = a / b; + *out = NPY_MIN_@NAME@; return NPY_FPE_OVERFLOW; } #endif @@ -192,7 +199,7 @@ static NPY_INLINE int static NPY_INLINE int @name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) { - if (a == 0 || b == 0) { + if (DIVIDEBYZERO_CHECK) { *out = 0; if (b == 0) { return NPY_FPE_DIVIDEBYZERO; @@ -213,6 +220,7 @@ static NPY_INLINE int #endif return 0; } +#undef DIVIDEBYZERO_CHECK /**end repeat**/ /**begin repeat diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index a696fceb8a1b..5b7abd4c62b9 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -5,8 +5,10 @@ import pytest import sys import os +import operator from fractions import Fraction from functools import reduce +from collections import namedtuple import numpy.core.umath as ncu from numpy.core import _umath_tests as ncu_tests @@ -20,6 +22,62 @@ from numpy.testing._private.utils import _glibc_older_than +def interesting_binop_operands(val1, val2, dtype): + """ + Helper to create "interesting" operands to cover common code paths: + * scalar inputs + * only first "values" is an array (e.g. scalar division fast-paths) + * Longer array (SIMD) placing the value of interest at different positions + * Oddly strided arrays which may not be SIMD compatible + + It does not attempt to cover unaligned access or mixed dtypes. + These are normally handled by the casting/buffering machinery. + + This is not a fixture (currently), since I believe a fixture normally + only yields once? + """ + fill_value = 1 # could be a parameter, but maybe not an optional one? + + arr1 = np.full(10003, dtype=dtype, fill_value=fill_value) + arr2 = np.full(10003, dtype=dtype, fill_value=fill_value) + + arr1[0] = val1 + arr2[0] = val2 + + extractor = lambda res: res + yield arr1[0], arr2[0], extractor, "scalars" + + extractor = lambda res: res + yield arr1[0, ...], arr2[0, ...], extractor, "scalar-arrays" + + # reset array values to fill_value: + arr1[0] = fill_value + arr2[0] = fill_value + + for pos in [0, 1, 2, 3, 4, 5, -1, -2, -3, -4]: + arr1[pos] = val1 + arr2[pos] = val2 + + extractor = lambda res: res[pos] + yield arr1, arr2, extractor, f"off-{pos}" + yield arr1, arr2[pos], extractor, f"off-{pos}-with-scalar" + + arr1[pos] = fill_value + arr2[pos] = fill_value + + for stride in [-1, 113]: + op1 = arr1[::stride] + op2 = arr2[::stride] + op1[10] = val1 + op2[10] = val2 + + extractor = lambda res: res[10] + yield op1, op2, extractor, f"stride-{stride}" + + op1[10] = fill_value + op2[10] = fill_value + + def on_powerpc(): """ True if we are running on a Power PC platform.""" return platform.processor() == 'powerpc' or \ @@ -740,6 +798,140 @@ def test_float_remainder_corner_cases(self): assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem)) +class TestDivisionIntegerOverflowsAndDivideByZero: + result_type = namedtuple('result_type', + ['nocast', 'casted']) + helper_lambdas = { + 'zero': lambda dtype: 0, + 'min': lambda dtype: np.iinfo(dtype).min, + 'neg_min': lambda dtype: -np.iinfo(dtype).min, + 'min-zero': lambda dtype: (np.iinfo(dtype).min, 0), + 'neg_min-zero': lambda dtype: (-np.iinfo(dtype).min, 0), + } + overflow_results = { + np.remainder: result_type( + helper_lambdas['zero'], helper_lambdas['zero']), + np.fmod: result_type( + helper_lambdas['zero'], helper_lambdas['zero']), + operator.mod: result_type( + helper_lambdas['zero'], helper_lambdas['zero']), + operator.floordiv: result_type( + helper_lambdas['min'], helper_lambdas['neg_min']), + np.floor_divide: result_type( + helper_lambdas['min'], helper_lambdas['neg_min']), + np.divmod: result_type( + helper_lambdas['min-zero'], helper_lambdas['neg_min-zero']) + } + + @pytest.mark.parametrize("dtype", np.typecodes["Integer"]) + def test_signed_division_overflow(self, dtype): + to_check = interesting_binop_operands(np.iinfo(dtype).min, -1, dtype) + for op1, op2, extractor, operand_identifier in to_check: + with pytest.warns(RuntimeWarning, match="overflow encountered"): + res = op1 // op2 + + assert res.dtype == op1.dtype + assert extractor(res) == np.iinfo(op1.dtype).min + + # Remainder is well defined though, and does not warn: + res = op1 % op2 + assert res.dtype == op1.dtype + assert extractor(res) == 0 + # Check fmod as well: + res = np.fmod(op1, op2) + assert extractor(res) == 0 + + # Divmod warns for the division part: + with pytest.warns(RuntimeWarning, match="overflow encountered"): + res1, res2 = np.divmod(op1, op2) + + assert res1.dtype == res2.dtype == op1.dtype + assert extractor(res1) == np.iinfo(op1.dtype).min + assert extractor(res2) == 0 + + @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"]) + def test_divide_by_zero(self, dtype): + # Note that the return value cannot be well defined here, but NumPy + # currently uses 0 consistently. This could be changed. + to_check = interesting_binop_operands(1, 0, dtype) + for op1, op2, extractor, operand_identifier in to_check: + with pytest.warns(RuntimeWarning, match="divide by zero"): + res = op1 // op2 + + assert res.dtype == op1.dtype + assert extractor(res) == 0 + + with pytest.warns(RuntimeWarning, match="divide by zero"): + res1, res2 = np.divmod(op1, op2) + + assert res1.dtype == res2.dtype == op1.dtype + assert extractor(res1) == 0 + assert extractor(res2) == 0 + + @pytest.mark.parametrize("dividend_dtype", + np.sctypes['int']) + @pytest.mark.parametrize("divisor_dtype", + np.sctypes['int']) + @pytest.mark.parametrize("operation", + [np.remainder, np.fmod, np.divmod, np.floor_divide, + operator.mod, operator.floordiv]) + @np.errstate(divide='warn', over='warn') + def test_overflows(self, dividend_dtype, divisor_dtype, operation): + # SIMD tries to perform the operation on as many elements as possible + # that is a multiple of the register's size. We resort to the + # default implementation for the leftover elements. + # We try to cover all paths here. + arrays = [np.array([np.iinfo(dividend_dtype).min]*i, + dtype=dividend_dtype) for i in range(1, 129)] + divisor = np.array([-1], dtype=divisor_dtype) + # If dividend is a larger type than the divisor (`else` case), + # then, result will be a larger type than dividend and will not + # result in an overflow for `divmod` and `floor_divide`. + if np.dtype(dividend_dtype).itemsize >= np.dtype( + divisor_dtype).itemsize and operation in ( + np.divmod, np.floor_divide, operator.floordiv): + with pytest.warns( + RuntimeWarning, + match="overflow encountered in"): + result = operation( + dividend_dtype(np.iinfo(dividend_dtype).min), + divisor_dtype(-1) + ) + assert result == self.overflow_results[operation].nocast( + dividend_dtype) + + # Arrays + for a in arrays: + # In case of divmod, we need to flatten the result + # column first as we get a column vector of quotient and + # remainder and a normal flatten of the expected result. + with pytest.warns( + RuntimeWarning, + match="overflow encountered in"): + result = np.array(operation(a, divisor)).flatten('f') + expected_array = np.array( + [self.overflow_results[operation].nocast( + dividend_dtype)]*len(a)).flatten() + assert_array_equal(result, expected_array) + else: + # Scalars + result = operation( + dividend_dtype(np.iinfo(dividend_dtype).min), + divisor_dtype(-1) + ) + assert result == self.overflow_results[operation].casted( + dividend_dtype) + + # Arrays + for a in arrays: + # See above comment on flatten + result = np.array(operation(a, divisor)).flatten('f') + expected_array = np.array( + [self.overflow_results[operation].casted( + dividend_dtype)]*len(a)).flatten() + assert_array_equal(result, expected_array) + + class TestCbrt: def test_cbrt_scalar(self): assert_almost_equal((np.cbrt(np.float32(-2.5)**3)), -2.5)