diff --git a/doc/release/upcoming_changes/29569.new_feature.rst b/doc/release/upcoming_changes/29569.new_feature.rst new file mode 100644 index 000000000000..ac014c07c7a0 --- /dev/null +++ b/doc/release/upcoming_changes/29569.new_feature.rst @@ -0,0 +1,27 @@ +``ndmax`` option for `numpy.array` +---------------------------------------------------- +The ``ndmax`` option is now available for `numpy.array`. +It explicitly limits the maximum number of dimensions created from nested sequences. + +This is particularly useful when creating arrays of list-like objects with ``dtype=object``. +By default, NumPy recurses through all nesting levels to create the highest possible +dimensional array, but this behavior may not be desired when the intent is to preserve +nested structures as objects. The ``ndmax`` parameter provides explicit control over +this recursion depth. + +.. code-block:: python + + # Default behavior: Creates a 2D array + >>> a = np.array([[1, 2], [3, 4]], dtype=object) + >>> a + array([[1, 2], + [3, 4]], dtype=object) + >>> a.shape + (2, 2) + + # With ndmax=1: Creates a 1D array + >>> b = np.array([[1, 2], [3, 4]], dtype=object, ndmax=1) + >>> b + array([list([1, 2]), list([3, 4])], dtype=object) + >>> b.shape + (2,) diff --git a/numpy/_core/_add_newdocs.py b/numpy/_core/_add_newdocs.py index ed8cf50ee360..e3009a490bd3 100644 --- a/numpy/_core/_add_newdocs.py +++ b/numpy/_core/_add_newdocs.py @@ -806,7 +806,7 @@ add_newdoc('numpy._core.multiarray', 'array', """ array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0, - like=None) + ndmax=None, like=None) Create an array. @@ -855,6 +855,15 @@ Specifies the minimum number of dimensions that the resulting array should have. Ones will be prepended to the shape as needed to meet this requirement. + ndmax : int, optional + Specifies the maximum number of dimensions to create when inferring + shape from nested sequences. By default, NumPy recurses through all + nesting levels (up to the compile-time constant ``NPY_MAXDIMS``). + Setting ``ndmax`` stops recursion at the specified depth, preserving + deeper nested structures as objects instead of promoting them to + higher-dimensional arrays. In this case, ``dtype=object`` is required. + + .. versionadded:: 2.4.0 ${ARRAY_FUNCTION_LIKE} .. versionadded:: 1.20.0 @@ -926,6 +935,21 @@ matrix([[1, 2], [3, 4]]) + Limiting the maximum dimensions with ``ndmax``: + + >>> a = np.array([[1, 2], [3, 4]], dtype=object, ndmax=2) + >>> a + array([[1, 2], + [3, 4]], dtype=object) + >>> a.shape + (2, 2) + + >>> b = np.array([[1, 2], [3, 4]], dtype=object, ndmax=1) + >>> b + array([list([1, 2]), list([3, 4])], dtype=object) + >>> b.shape + (2,) + """) add_newdoc('numpy._core.multiarray', 'asarray', diff --git a/numpy/_core/src/multiarray/array_converter.c b/numpy/_core/src/multiarray/array_converter.c index 496173038954..10dc83ac657f 100644 --- a/numpy/_core/src/multiarray/array_converter.c +++ b/numpy/_core/src/multiarray/array_converter.c @@ -83,7 +83,7 @@ array_converter_new( } else { item->array = (PyArrayObject *)PyArray_FromAny_int( - item->object, NULL, NULL, 0, 0, 0, NULL, + item->object, NULL, NULL, 0, NPY_MAXDIMS, 0, NULL, &item->scalar_input); if (item->array == NULL) { goto fail; diff --git a/numpy/_core/src/multiarray/ctors.c b/numpy/_core/src/multiarray/ctors.c index 498fa78118b3..38da6f314848 100644 --- a/numpy/_core/src/multiarray/ctors.c +++ b/numpy/_core/src/multiarray/ctors.c @@ -1508,6 +1508,16 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, return NULL; } + /* + * The internal implementation treats 0 as actually wanting a zero-dimensional + * array, but the API for this function has typically treated it as + * "anything is fine", so convert here. + * TODO: should we use another value as a placeholder instead? + */ + if (max_depth == 0 || max_depth > NPY_MAXDIMS) { + max_depth = NPY_MAXDIMS; + } + int was_scalar; PyObject* ret = PyArray_FromAny_int( op, dt_info.descr, dt_info.dtype, @@ -1563,7 +1573,7 @@ PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr, Py_BEGIN_CRITICAL_SECTION(op); ndim = PyArray_DiscoverDTypeAndShape( - op, NPY_MAXDIMS, dims, &cache, in_DType, in_descr, &dtype, + op, max_depth, dims, &cache, in_DType, in_descr, &dtype, copy, &was_copied_by__array__); if (ndim < 0) { @@ -1583,7 +1593,7 @@ PyArray_FromAny_int(PyObject *op, PyArray_Descr *in_descr, npy_free_coercion_cache(cache); goto cleanup; } - if (max_depth != 0 && ndim > max_depth) { + if (ndim > max_depth && (in_DType == NULL || in_DType->type_num != NPY_OBJECT)) { PyErr_SetString(PyExc_ValueError, "object too deep for desired array"); npy_free_coercion_cache(cache); @@ -1798,6 +1808,11 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth, return NULL; } + /* See comment in PyArray_FromAny for rationale */ + if (max_depth == 0 || max_depth > NPY_MAXDIMS) { + max_depth = NPY_MAXDIMS; + } + PyObject* ret = PyArray_CheckFromAny_int( op, dt_info.descr, dt_info.dtype, min_depth, max_depth, requires, context); diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 3d82e6c7f448..a7fdf3efba17 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -1560,7 +1560,7 @@ _prepend_ones(PyArrayObject *arr, int nd, int ndmin, NPY_ORDER order) static inline PyObject * _array_fromobject_generic( PyObject *op, PyArray_Descr *in_descr, PyArray_DTypeMeta *in_DType, - NPY_COPYMODE copy, NPY_ORDER order, npy_bool subok, int ndmin) + NPY_COPYMODE copy, NPY_ORDER order, npy_bool subok, int ndmin, int ndmax) { PyArrayObject *oparr = NULL, *ret = NULL; PyArray_Descr *oldtype = NULL; @@ -1570,10 +1570,9 @@ _array_fromobject_generic( Py_XINCREF(in_descr); PyArray_Descr *dtype = in_descr; - if (ndmin > NPY_MAXDIMS) { + if (ndmin > ndmax) { PyErr_Format(PyExc_ValueError, - "ndmin bigger than allowable number of dimensions " - "NPY_MAXDIMS (=%d)", NPY_MAXDIMS); + "ndmin must be <= ndmax (%d)", ndmax); goto finish; } /* fast exit if simple call */ @@ -1682,7 +1681,7 @@ _array_fromobject_generic( flags |= NPY_ARRAY_FORCECAST; ret = (PyArrayObject *)PyArray_CheckFromAny_int( - op, dtype, in_DType, 0, 0, flags, NULL); + op, dtype, in_DType, 0, ndmax, flags, NULL); finish: Py_XDECREF(dtype); @@ -1713,6 +1712,7 @@ array_array(PyObject *NPY_UNUSED(ignored), npy_bool subok = NPY_FALSE; NPY_COPYMODE copy = NPY_COPY_ALWAYS; int ndmin = 0; + int ndmax = NPY_MAXDIMS; npy_dtype_info dt_info = {NULL, NULL}; NPY_ORDER order = NPY_KEEPORDER; PyObject *like = Py_None; @@ -1726,6 +1726,7 @@ array_array(PyObject *NPY_UNUSED(ignored), "$order", &PyArray_OrderConverter, &order, "$subok", &PyArray_BoolConverter, &subok, "$ndmin", &PyArray_PythonPyIntFromInt, &ndmin, + "$ndmax", &PyArray_PythonPyIntFromInt, &ndmax, "$like", NULL, &like, NULL, NULL, NULL) < 0) { Py_XDECREF(dt_info.descr); @@ -1747,8 +1748,15 @@ array_array(PyObject *NPY_UNUSED(ignored), op = args[0]; } + if (ndmax > NPY_MAXDIMS || ndmax < 0) { + PyErr_Format(PyExc_ValueError, "ndmax must be in the range [0, NPY_MAXDIMS (%d)] ", NPY_MAXDIMS); + Py_XDECREF(dt_info.descr); + Py_XDECREF(dt_info.dtype); + return NULL; + } + PyObject *res = _array_fromobject_generic( - op, dt_info.descr, dt_info.dtype, copy, order, subok, ndmin); + op, dt_info.descr, dt_info.dtype, copy, order, subok, ndmin, ndmax); Py_XDECREF(dt_info.descr); Py_XDECREF(dt_info.dtype); return res; @@ -1794,7 +1802,7 @@ array_asarray(PyObject *NPY_UNUSED(ignored), } PyObject *res = _array_fromobject_generic( - op, dt_info.descr, dt_info.dtype, copy, order, NPY_FALSE, 0); + op, dt_info.descr, dt_info.dtype, copy, order, NPY_FALSE, 0, NPY_MAXDIMS); Py_XDECREF(dt_info.descr); Py_XDECREF(dt_info.dtype); return res; @@ -1840,7 +1848,7 @@ array_asanyarray(PyObject *NPY_UNUSED(ignored), } PyObject *res = _array_fromobject_generic( - op, dt_info.descr, dt_info.dtype, copy, order, NPY_TRUE, 0); + op, dt_info.descr, dt_info.dtype, copy, order, NPY_TRUE, 0, NPY_MAXDIMS); Py_XDECREF(dt_info.descr); Py_XDECREF(dt_info.dtype); return res; @@ -1882,7 +1890,7 @@ array_ascontiguousarray(PyObject *NPY_UNUSED(ignored), PyObject *res = _array_fromobject_generic( op, dt_info.descr, dt_info.dtype, NPY_COPY_IF_NEEDED, NPY_CORDER, NPY_FALSE, - 1); + 1, NPY_MAXDIMS); Py_XDECREF(dt_info.descr); Py_XDECREF(dt_info.dtype); return res; @@ -1924,7 +1932,7 @@ array_asfortranarray(PyObject *NPY_UNUSED(ignored), PyObject *res = _array_fromobject_generic( op, dt_info.descr, dt_info.dtype, NPY_COPY_IF_NEEDED, NPY_FORTRANORDER, - NPY_FALSE, 1); + NPY_FALSE, 1, NPY_MAXDIMS); Py_XDECREF(dt_info.descr); Py_XDECREF(dt_info.dtype); return res; diff --git a/numpy/_core/src/multiarray/scalartypes.c.src b/numpy/_core/src/multiarray/scalartypes.c.src index 5e3a3ba71d3e..a6170936a5f3 100644 --- a/numpy/_core/src/multiarray/scalartypes.c.src +++ b/numpy/_core/src/multiarray/scalartypes.c.src @@ -226,7 +226,7 @@ find_binary_operation_path( */ int was_scalar; PyArrayObject *arr = (PyArrayObject *)PyArray_FromAny_int( - other, NULL, NULL, 0, 0, 0, NULL, &was_scalar); + other, NULL, NULL, 0, NPY_MAXDIMS, 0, NULL, &was_scalar); if (arr == NULL) { return -1; } diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index cf2f899b7991..da4eeb91cfc2 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -1279,6 +1279,79 @@ def test_creation_from_dtypemeta(self, func): assert_array_equal(arr1, arr2) assert arr2.dtype == dtype + def test_ndmax_less_than_actual_dims_dtype_object(self): + data = [[1, 2, 3], [4, 5, 6]] + arr = np.array(data, ndmax=1, dtype=object) + assert arr.ndim == 1 + assert arr.shape == (2,) + assert arr.dtype == object + + data = [[1, 2, 3], [4, 5]] + arr = np.array(data, ndmax=1, dtype=object) + assert arr.ndim == 1 + assert arr.shape == (2,) + assert arr.dtype == object + + data = [[[1], [2]], [[3], [4]]] + arr = np.array(data, ndmax=2, dtype=object) + assert arr.ndim == 2 + assert arr.shape == (2, 2) + assert arr.dtype == object + + def test_ndmax_equal_to_actual_dims(self): + data = [[1, 2], [3, 4]] + arr = np.array(data, ndmax=2) + assert arr.ndim == 2 + assert_array_equal(arr, np.array(data)) + + def test_ndmax_greater_than_actual_dims(self): + data = [[1, 2], [3, 4]] + arr = np.array(data, ndmax=3) + assert arr.ndim == 2 + assert_array_equal(arr, np.array(data)) + + def test_ndmax_less_than_actual_dims(self): + data = [[[1], [2]], [[3], [4]]] + with pytest.raises(ValueError, + match="setting an array element with a sequence. " + "The requested array would exceed the maximum number of dimension of 2."): + np.array(data, ndmax=2) + + def test_ndmax_is_zero(self): + data = [1, 2, 3] + arr = np.array(data, ndmax=0, dtype=object) + assert arr.ndim == 0 + assert arr.shape == () + assert arr.dtype == object + + data = [[1, 2, 3], [4, 5, 6]] + arr = np.array(data, ndmax=0, dtype=object) + assert arr.ndim == 0 + assert arr.shape == () + assert arr.dtype == object + + data = [[1, 2, 3], [4, 5]] + arr = np.array(data, ndmax=0, dtype=object) + assert arr.ndim == 0 + assert arr.shape == () + assert arr.dtype == object + + def test_ndmax_less_than_ndmin(self): + data = [[[1], [2]], [[3], [4]]] + with pytest.raises(ValueError, match="ndmin must be <= ndmax"): + np.array(data, ndmax=1, ndmin=2) + + def test_ndmax_is_negative(self): + data = [1, 2, 3] + with pytest.raises(ValueError, match="ndmax must be in the range"): + np.array(data, ndmax=-1) + + def test_ndmax_greather_than_NPY_MAXDIMS(self): + data = [1, 2, 3] + # current NPY_MAXDIMS is 64 + with pytest.raises(ValueError, match="ndmax must be in the range"): + np.array(data, ndmax=65) + class TestStructured: def test_subarray_field_access(self):