diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 198a7155e1a1e..1119117c411d3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -27,6 +27,14 @@ Copy-on-Write improvements of those Index objects for the columns of the DataFrame (:issue:`52947`) - Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`) +- Trying to operate inplace on a temporary column selection + (for example, ``df["a"].fillna(100, inplace=True)``) + will now always raise a warning when Copy-on-Write is enabled. In this mode, + operating inplace like this will never work, since the selection behaves + as a temporary copy. This holds true for: + + - DataFrame.fillna / Series.fillna + .. _whatsnew_210.enhancements.enhancement2: ``map(func, na_action="ignore")`` now works for all array types diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f049e9d479b26..68e5fbd696ab9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9,6 +9,7 @@ import operator import pickle import re +import sys from typing import ( TYPE_CHECKING, Any, @@ -89,13 +90,19 @@ WriteExcelBuffer, npt, ) +from pandas.compat import ( + PY311, + PYPY, +) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, + ChainedAssignmentError, InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, + _chained_assignment_method_msg, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -7083,6 +7090,16 @@ def fillna( Note that column D is not affected since it is not present in df2. """ inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + if not PYPY and using_copy_on_write(): + refcount = 2 if PY311 else 3 + if sys.getrefcount(self) <= refcount: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) + value, method = validate_fillna_kwargs(value, method) if method is not None: warnings.warn( diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 438f504968b2d..0c5b8caeaba6e 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -393,6 +393,17 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using 'df.method({col: value}, inplace=True)' instead, to perform " + "the operation inplace.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 5a4b958b2148d..eb10e3b4e716a 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -344,3 +344,16 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not np.shares_memory(get_array(df, "b"), get_array(view, "b")) df.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, view) + + +def test_fillna_chained_assignment(using_copy_on_write): + df = DataFrame({"a": [1, np.nan, 2], "b": 1}) + df_orig = df.copy() + if using_copy_on_write: + with tm.raises_chained_assignment_error(): + df["a"].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) + + with tm.raises_chained_assignment_error(): + df[["a"]].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 109520859af4d..40fe7d2ce9af5 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas import ( @@ -49,11 +50,12 @@ def test_fillna_on_column_view(self, using_copy_on_write): arr = np.full((40, 50), np.nan) df = DataFrame(arr, copy=False) - # TODO(CoW): This should raise a chained assignment error - df[0].fillna(-1, inplace=True) if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df[0].fillna(-1, inplace=True) assert np.isnan(arr[:, 0]).all() else: + df[0].fillna(-1, inplace=True) assert (arr[:, 0] == -1).all() # i.e. we didn't create a new 49-column block @@ -105,7 +107,9 @@ def test_fillna_mixed_float(self, mixed_float_frame): result = mf.fillna(method="pad") _check_mixed_float(result, dtype={"C": None}) - def test_fillna_empty(self): + def test_fillna_empty(self, using_copy_on_write): + if using_copy_on_write: + pytest.skip("condition is unnecessary complex and is deprecated anyway") # empty frame (GH#2778) df = DataFrame(columns=["x"]) for m in ["pad", "backfill"]: diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 6b6c1f6f64ff7..335901c457240 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas.errors import ( + ChainedAssignmentError, + PerformanceWarning, +) import pandas.util._test_decorators as td import pandas as pd @@ -410,7 +413,11 @@ def test_update_inplace_sets_valid_block_values(using_copy_on_write): df = DataFrame({"a": Series([1, 2, None], dtype="category")}) # inplace update of a single column - df["a"].fillna(1, inplace=True) + if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df["a"].fillna(1, inplace=True) + else: + df["a"].fillna(1, inplace=True) # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index e0868745a480a..d27a2cde9417e 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) import pandas.util._test_decorators as td from pandas import ( @@ -30,7 +33,8 @@ def test_detect_chained_assignment(using_copy_on_write): zed = DataFrame(events, index=["a", "b"], columns=multiind) if using_copy_on_write: - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.assert_produces_warning(ChainedAssignmentError): + zed["eyes"]["right"].fillna(value=555, inplace=True) else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index cffae7d18bee1..466419bf5093e 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -51,6 +51,7 @@ "_arrow_dtype_mapping", "_global_config", "_chained_assignment_msg", + "_chained_assignment_method_msg", }