From 6a9b47a466907879a63672cea404f669e90e0ef1 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 21 Jun 2023 22:30:08 +0200 Subject: [PATCH 1/9] CoW: Add warning for chained assignment with fillna --- doc/source/whatsnew/v2.1.0.rst | 8 ++++++++ pandas/core/generic.py | 13 +++++++++++++ pandas/errors/__init__.py | 11 +++++++++++ pandas/tests/copy_view/test_interp_fillna.py | 15 +++++++++++++++ scripts/validate_unwanted_patterns.py | 1 + 5 files changed, 48 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 511e5793608bc..41f77ced34cbb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -26,6 +26,14 @@ Copy-on-Write improvements of Index objects and specifying ``copy=False``, will now use a lazy copy of those Index objects for the columns of the DataFrame (:issue:`52947`) +- Trying to operate inplace on a temporary column selection + (for example, ``df["a"].fillna(100, inplace=True)``) + will now always raise a warning when Copy-on-Write is enabled. In this mode, + operating inplace like this will never work, since the selection behaves + as a temporary copy. This holds true for: + + - DataFrame.fillna / Series.fillna + .. _whatsnew_210.enhancements.enhancement2: ``map(func, na_action="ignore")`` now works for all array types diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5145a929247bd..557d8e4258994 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9,6 +9,7 @@ import operator import pickle import re +import sys from typing import ( TYPE_CHECKING, Any, @@ -87,13 +88,16 @@ WriteExcelBuffer, npt, ) +from pandas.compat import PYPY from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, + ChainedAssignmentError, InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, + _chained_assignment_method_msg, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -7050,6 +7054,15 @@ def fillna( Note that column D is not affected since it is not present in df2. """ inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + if not PYPY and using_copy_on_write(): + if sys.getrefcount(self) <= 3: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) + value, method = validate_fillna_kwargs(value, method) if method is not None: warnings.warn( diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 438f504968b2d..0c5b8caeaba6e 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -393,6 +393,17 @@ class ChainedAssignmentError(Warning): ) +_chained_assignment_method_msg = ( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.\n" + "When using the Copy-on-Write mode, such chained assignment never works " + "to update the original DataFrame or Series, because the intermediate " + "object on which we are setting values always behaves as a copy.\n\n" + "Try using 'df.method({col: value}, inplace=True)' instead, to perform " + "the operation inplace.\n\n" +) + + class NumExprClobberingError(NameError): """ Exception raised when trying to use a built-in numexpr name as a variable name. diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 71023a538a2c3..4560b3074f361 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.errors import ChainedAssignmentError + from pandas import ( NA, ArrowDtype, @@ -339,3 +341,16 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not np.shares_memory(get_array(df, "b"), get_array(view, "b")) df.iloc[0, 1] = 100 tm.assert_frame_equal(df_orig, view) + + +def test_fillna_chained_assignment(using_copy_on_write): + df = DataFrame({"a": [1, np.nan, 2], "b": 1}) + df_orig = df.copy() + if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df["a"].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) + + with tm.assert_produces_warning(ChainedAssignmentError): + df[["a"]].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index e171d1825ac48..c142d11f1348e 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -51,6 +51,7 @@ "_arrow_dtype_mapping", "_global_config", "_chained_assignment_msg", + "_chained_assignment_method_msg", } From 15880ae00eb8cd95a506ae50d899a381f2bf11b8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 21 Jun 2023 22:33:24 +0200 Subject: [PATCH 2/9] Fix tests --- pandas/tests/frame/methods/test_fillna.py | 14 ++++++++++---- pandas/tests/frame/test_block_internals.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index d340e048994a9..f163ac60d5384 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas.errors import ChainedAssignmentError import pandas.util._test_decorators as td from pandas import ( @@ -49,11 +50,12 @@ def test_fillna_on_column_view(self, using_copy_on_write): arr = np.full((40, 50), np.nan) df = DataFrame(arr, copy=False) - # TODO(CoW): This should raise a chained assignment error - df[0].fillna(-1, inplace=True) if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df[0].fillna(-1, inplace=True) assert np.isnan(arr[:, 0]).all() else: + df[0].fillna(-1, inplace=True) assert (arr[:, 0] == -1).all() # i.e. we didn't create a new 49-column block @@ -105,13 +107,17 @@ def test_fillna_mixed_float(self, mixed_float_frame): result = mf.fillna(method="pad") _check_mixed_float(result, dtype={"C": None}) - def test_fillna_empty(self): + def test_fillna_empty(self, using_copy_on_write): # empty frame (GH#2778) df = DataFrame(columns=["x"]) for m in ["pad", "backfill"]: msg = "Series.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): - df.x.fillna(method=m, inplace=True) + if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df.x.fillna(method=m, inplace=True) + else: + df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) def test_fillna_different_dtype(self): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 6b6c1f6f64ff7..335901c457240 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas.errors import ( + ChainedAssignmentError, + PerformanceWarning, +) import pandas.util._test_decorators as td import pandas as pd @@ -410,7 +413,11 @@ def test_update_inplace_sets_valid_block_values(using_copy_on_write): df = DataFrame({"a": Series([1, 2, None], dtype="category")}) # inplace update of a single column - df["a"].fillna(1, inplace=True) + if using_copy_on_write: + with tm.assert_produces_warning(ChainedAssignmentError): + df["a"].fillna(1, inplace=True) + else: + df["a"].fillna(1, inplace=True) # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) From 7a974302a383e3b2c19fe1e42f36734c2005eedc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 21 Jun 2023 23:41:44 +0200 Subject: [PATCH 3/9] Fix --- pandas/tests/frame/methods/test_fillna.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index f163ac60d5384..60c91ebfe7781 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -108,16 +108,14 @@ def test_fillna_mixed_float(self, mixed_float_frame): _check_mixed_float(result, dtype={"C": None}) def test_fillna_empty(self, using_copy_on_write): + if using_copy_on_write: + pytest.skip("condition is unnecessary complex and is deprecated anyway") # empty frame (GH#2778) df = DataFrame(columns=["x"]) for m in ["pad", "backfill"]: msg = "Series.fillna with 'method' is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): - if using_copy_on_write: - with tm.assert_produces_warning(ChainedAssignmentError): - df.x.fillna(method=m, inplace=True) - else: - df.x.fillna(method=m, inplace=True) + df.x.fillna(method=m, inplace=True) df.x.fillna(method=m) def test_fillna_different_dtype(self): From c9f948acb66454ded81d32ed54e65b170c8a5b43 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 2 Jul 2023 23:37:43 +0200 Subject: [PATCH 4/9] Fix --- .../indexing/multiindex/test_chaining_and_caching.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 932457eebcd8e..c545a77291a0d 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError +from pandas.errors import ( + ChainedAssignmentError, + SettingWithCopyError, +) import pandas.util._test_decorators as td from pandas import ( @@ -30,7 +33,8 @@ def test_detect_chained_assignment(using_copy_on_write): zed = DataFrame(events, index=["a", "b"], columns=multiind) if using_copy_on_write: - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.assert_produces_warning(ChainedAssignmentError): + zed["eyes"]["right"].fillna(value=555, inplace=True) else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): From c881ca73b23836f852cf710f61437f11b306303c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 3 Jul 2023 00:02:51 +0200 Subject: [PATCH 5/9] Raise --- pandas/core/generic.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9d9ae120d43e8..6abf32a8e5555 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -95,11 +95,9 @@ from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, - ChainedAssignmentError, InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, - _chained_assignment_method_msg, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -7089,12 +7087,13 @@ def fillna( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: if not PYPY and using_copy_on_write(): - if sys.getrefcount(self) <= 3: - warnings.warn( - _chained_assignment_method_msg, - ChainedAssignmentError, - stacklevel=2, - ) + raise ValueError(sys.getrefcount(self)) + # if sys.getrefcount(self) <= 3: + # warnings.warn( + # _chained_assignment_method_msg, + # ChainedAssignmentError, + # stacklevel=2, + # ) value, method = validate_fillna_kwargs(value, method) if method is not None: From 3f4b76380edfe03a217dc592d4bcba5ab09bcafe Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 3 Jul 2023 15:56:42 +0200 Subject: [PATCH 6/9] Revert "Raise" This reverts commit c881ca73b23836f852cf710f61437f11b306303c. --- pandas/core/generic.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6abf32a8e5555..9d9ae120d43e8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -95,9 +95,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, + ChainedAssignmentError, InvalidIndexError, SettingWithCopyError, SettingWithCopyWarning, + _chained_assignment_method_msg, ) from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -7087,13 +7089,12 @@ def fillna( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: if not PYPY and using_copy_on_write(): - raise ValueError(sys.getrefcount(self)) - # if sys.getrefcount(self) <= 3: - # warnings.warn( - # _chained_assignment_method_msg, - # ChainedAssignmentError, - # stacklevel=2, - # ) + if sys.getrefcount(self) <= 3: + warnings.warn( + _chained_assignment_method_msg, + ChainedAssignmentError, + stacklevel=2, + ) value, method = validate_fillna_kwargs(value, method) if method is not None: From 590ef69bf4d0419484fc79bc449682efc487d27f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 3 Jul 2023 16:31:20 +0200 Subject: [PATCH 7/9] Fix --- pandas/core/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9d9ae120d43e8..2ef39e04f6d52 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -90,7 +90,10 @@ WriteExcelBuffer, npt, ) -from pandas.compat import PYPY +from pandas.compat import ( + PY311, + PYPY, +) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.errors import ( @@ -7089,7 +7092,8 @@ def fillna( inplace = validate_bool_kwarg(inplace, "inplace") if inplace: if not PYPY and using_copy_on_write(): - if sys.getrefcount(self) <= 3: + refcount = 2 if PY311 else 3 + if sys.getrefcount(self) <= refcount: warnings.warn( _chained_assignment_method_msg, ChainedAssignmentError, From 10cea6ee38468047e26760326e5b03a052e2b036 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 5 Jul 2023 22:25:13 +0200 Subject: [PATCH 8/9] Update --- pandas/tests/copy_view/test_interp_fillna.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 5252872482271..604d582a8178d 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -352,10 +352,10 @@ def test_fillna_chained_assignment(using_copy_on_write): df = DataFrame({"a": [1, np.nan, 2], "b": 1}) df_orig = df.copy() if using_copy_on_write: - with tm.assert_produces_warning(ChainedAssignmentError): + with tm.raises_chained_assignment_error(ChainedAssignmentError): df["a"].fillna(100, inplace=True) tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(ChainedAssignmentError): + with tm.raises_chained_assignment_error(ChainedAssignmentError): df[["a"]].fillna(100, inplace=True) tm.assert_frame_equal(df, df_orig) From dd538f012d0a438a59ae42f8628bd0f69a071d9d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 5 Jul 2023 23:54:16 +0200 Subject: [PATCH 9/9] Fix --- pandas/tests/copy_view/test_interp_fillna.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 604d582a8178d..eb10e3b4e716a 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import ChainedAssignmentError - from pandas import ( NA, ArrowDtype, @@ -352,10 +350,10 @@ def test_fillna_chained_assignment(using_copy_on_write): df = DataFrame({"a": [1, np.nan, 2], "b": 1}) df_orig = df.copy() if using_copy_on_write: - with tm.raises_chained_assignment_error(ChainedAssignmentError): + with tm.raises_chained_assignment_error(): df["a"].fillna(100, inplace=True) tm.assert_frame_equal(df, df_orig) - with tm.raises_chained_assignment_error(ChainedAssignmentError): + with tm.raises_chained_assignment_error(): df[["a"]].fillna(100, inplace=True) tm.assert_frame_equal(df, df_orig)