From 45ccb3997e808bf8d66515ed2208efe6bfdbfd47 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Mon, 4 Mar 2019 15:26:00 +0100 Subject: [PATCH 1/8] ENH: GH13473 Add errors parameter to DataFrame.rename --- pandas/core/frame.py | 8 +++++++- pandas/core/generic.py | 15 ++++++++++++++- pandas/tests/frame/test_alter_axes.py | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b4d95055d06d..eeff1ab530d14 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3940,7 +3940,8 @@ def rename(self, *args, **kwargs): level : int or level name, default None In case of a MultiIndex, only rename labels in the specified level. - + errors : {'ignore', 'raise'}, default 'ignore' + If 'ignore', suppress error and existing labels are renamed. Returns ------- DataFrame @@ -3949,6 +3950,11 @@ def rename(self, *args, **kwargs): -------- DataFrame.rename_axis + Raises + ------ + KeyError + If any of the labels is not found in the selected axis. + Examples -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ee8f9cba951b3..fd5615d3640ed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -981,11 +981,18 @@ def rename(self, *args, **kwargs): level : int or level name, default None In case of a MultiIndex, only rename labels in the specified level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'ignore', suppress error and existing labels are renamed. Returns ------- renamed : %(klass)s (new object) + Raises + ------ + KeyError + If any of the labels is not found in the selected axis. + See Also -------- NDFrame.rename_axis @@ -1065,6 +1072,7 @@ def rename(self, *args, **kwargs): inplace = kwargs.pop('inplace', False) level = kwargs.pop('level', None) axis = kwargs.pop('axis', None) + errors = kwargs.pop('errors', 'ignore') if axis is not None: # Validate the axis self._get_axis_number(axis) @@ -1085,10 +1093,15 @@ def rename(self, *args, **kwargs): if v is None: continue f = com._get_rename_function(v) - baxis = self._get_block_manager_axis(axis) if level is not None: level = self.axes[axis]._get_level_number(level) + + # GH 13473 + labels_missing = (self.axes[axis].get_indexer_for(v) == -1).any() + if errors == 'raise' and labels_missing: + raise KeyError('{} not found in axis'.format(v)) + result._data = result._data.rename_axis(f, axis=baxis, copy=copy, level=level) result._clear_item_cache() diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index a25e893e08900..4abbaeb152674 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -871,6 +871,25 @@ def test_rename_bug2(self): columns=["a"]) tm.assert_frame_equal(df, expected) + def test_rename_errors(self): + # GH 13473 + # rename now works with errors parameter + + # Error has to be thrown and is thrown + df = DataFrame(columns=['A', 'B', 'C', 'D']) + with pytest.raises(KeyError): + df.rename(columns={'A': 'a', 'E': 'e'}, errors='raise') + + # Error should be ignored + renamed = df.rename(columns={'A': 'a', 'E': 'e'}) + expected = DataFrame(columns=['a', 'B', 'C', 'D']) + tm.assert_frame_equal(renamed, expected) + + # Correct behaviour with raising errors. + renamed = df.rename(columns={'A': 'a'}, errors='raise') + expected = DataFrame(columns=['a', 'B', 'C', 'D']) + tm.assert_frame_equal(renamed, expected) + def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], From 011e2c156c3a58ade6c5be5b6f4c6a28b071573a Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Mon, 4 Mar 2019 15:49:13 +0100 Subject: [PATCH 2/8] ENH: GH13473 Change KeyError to only include missing labels --- pandas/core/generic.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fd5615d3640ed..a24d6fe416ef2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1098,9 +1098,11 @@ def rename(self, *args, **kwargs): level = self.axes[axis]._get_level_number(level) # GH 13473 - labels_missing = (self.axes[axis].get_indexer_for(v) == -1).any() - if errors == 'raise' and labels_missing: - raise KeyError('{} not found in axis'.format(v)) + indexer = self.axes[axis].get_indexer_for(v) + missing_labels = [label for index, label in enumerate(v) + if indexer[index] == -1] + if errors == 'raise' and len(missing_labels) > 0: + raise KeyError('{} not found in axis'.format(missing_labels)) result._data = result._data.rename_axis(f, axis=baxis, copy=copy, level=level) From 383bc5974fb98c27b909a36d38132eceaa1bd54c Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Mon, 4 Mar 2019 16:25:13 +0100 Subject: [PATCH 3/8] ENH: Check that v is not a callable (#13473) In this commit, the bug is removed from the code, which raised a TypeError if a function was passed to rename instead of a dictionary. --- pandas/core/generic.py | 12 +++++++----- pandas/tests/frame/test_alter_axes.py | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a24d6fe416ef2..84bd23870291c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1098,11 +1098,13 @@ def rename(self, *args, **kwargs): level = self.axes[axis]._get_level_number(level) # GH 13473 - indexer = self.axes[axis].get_indexer_for(v) - missing_labels = [label for index, label in enumerate(v) - if indexer[index] == -1] - if errors == 'raise' and len(missing_labels) > 0: - raise KeyError('{} not found in axis'.format(missing_labels)) + if not callable(v): + indexer = self.axes[axis].get_indexer_for(v) + missing_labels = [label for index, label in enumerate(v) + if indexer[index] == -1] + if errors == 'raise' and len(missing_labels) > 0: + raise KeyError('{} not found in axis' + .format(missing_labels)) result._data = result._data.rename_axis(f, axis=baxis, copy=copy, level=level) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 4abbaeb152674..8d7526ee17eea 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -890,6 +890,10 @@ def test_rename_errors(self): expected = DataFrame(columns=['a', 'B', 'C', 'D']) tm.assert_frame_equal(renamed, expected) + renamed = df.rename(columns=str.lower, errors='raise') + expected = DataFrame(columns=['a', 'b', 'c', 'd']) + tm.assert_frame_equal(renamed, expected) + def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], From 79ace76ac5c08174d8aa6c10cca41e5534b03dc8 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Mon, 4 Mar 2019 17:25:38 +0100 Subject: [PATCH 4/8] ENH: Correct documentation of DataFrame.rename (#13473) --- pandas/core/frame.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eeff1ab530d14..c6f75195df70b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3911,7 +3911,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None, @rewrite_axis_style_signature('mapper', [('copy', True), ('inplace', False), - ('level', None)]) + ('level', None), + ('errors', 'ignore')]) def rename(self, *args, **kwargs): """ Alter axes labels. @@ -3924,17 +3925,23 @@ def rename(self, *args, **kwargs): Parameters ---------- - mapper, index, columns : dict-like or function, optional - dict-like or functions transformations to apply to + mapper : dict-like or function + Dict-like or functions transformations to apply to that axis' values. Use either ``mapper`` and ``axis`` to specify the axis to target with ``mapper``, or ``index`` and ``columns``. - axis : int or str, optional + index : dict-like or function + Alternative to specifying axis (``mapper, axis=0`` + is equivalent to ``index=mapper``). + columns : dict-like or function + Alternative to specifying axis (``mapper, axis=1`` + is equivalent to ``columns=mapper``). + axis : int or str Axis to target with ``mapper``. Can be either the axis name ('index', 'columns') or number (0, 1). The default is 'index'. - copy : boolean, default True - Also copy underlying data - inplace : boolean, default False + copy : bool, default True + Also copy underlying data. + inplace : bool, default False Whether to return a new DataFrame. If True then value of copy is ignored. level : int or level name, default None @@ -3942,19 +3949,22 @@ def rename(self, *args, **kwargs): level. errors : {'ignore', 'raise'}, default 'ignore' If 'ignore', suppress error and existing labels are renamed. + Returns ------- DataFrame - - See Also - -------- - DataFrame.rename_axis + DataFrame with the renamed axis labels. Raises ------ KeyError If any of the labels is not found in the selected axis. + See Also + -------- + DataFrame.rename_axis: Set the name of the axis for the index or + columns. + Examples -------- From 3095c5f84497755a02b23b8ff15f84d3f5fd7a53 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Mon, 4 Mar 2019 18:06:52 +0100 Subject: [PATCH 5/8] ENH: Change signature assertion (#13473) --- pandas/tests/frame/test_alter_axes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 8d7526ee17eea..ecceb8c927946 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1351,7 +1351,7 @@ def test_rename_signature(self): sig = inspect.signature(DataFrame.rename) parameters = set(sig.parameters) assert parameters == {"self", "mapper", "index", "columns", "axis", - "inplace", "copy", "level"} + "inplace", "copy", "level", "errors"} @pytest.mark.skipif(PY2, reason="inspect.signature") def test_reindex_signature(self): From e67223d9ac7b483ec955e2a5df8541baa908d077 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Tue, 5 Mar 2019 18:05:13 +0100 Subject: [PATCH 6/8] ENH: Implement feedback (#13473) --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/frame.py | 9 ++++++-- pandas/core/generic.py | 3 ++- pandas/tests/frame/test_alter_axes.py | 30 +++++++++++---------------- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 124ec8f4ab92c..67474918159d5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -25,7 +25,7 @@ Other Enhancements - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) -- +- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`) .. _whatsnew_0250.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c6f75195df70b..ea06116e7dd9c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3958,11 +3958,12 @@ def rename(self, *args, **kwargs): Raises ------ KeyError - If any of the labels is not found in the selected axis. + If any of the labels is not found in the selected axis and + "errors='raise'". See Also -------- - DataFrame.rename_axis: Set the name of the axis for the index or + DataFrame.rename_axis : Set the name of the axis for the index or columns. Examples @@ -3989,6 +3990,10 @@ def rename(self, *args, **kwargs): 1 2 5 2 3 6 + >>> df.rename(index=str, columns={"A": "a", "C": "c"}, errors="raise") + Traceback (most recent call last): + KeyError: ['C'] not found in axis + Using axis-style parameters >>> df.rename(str.lower, axis='columns') diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 84bd23870291c..e54fec9e14525 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -991,7 +991,8 @@ def rename(self, *args, **kwargs): Raises ------ KeyError - If any of the labels is not found in the selected axis. + If any of the labels is not found in the selected axis and + "errors='raise'". See Also -------- diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index ecceb8c927946..571455515e2a9 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -871,27 +871,21 @@ def test_rename_bug2(self): columns=["a"]) tm.assert_frame_equal(df, expected) - def test_rename_errors(self): - # GH 13473 - # rename now works with errors parameter - - # Error has to be thrown and is thrown + def test_rename_errors_raises(self): df = DataFrame(columns=['A', 'B', 'C', 'D']) - with pytest.raises(KeyError): + with pytest.raises(KeyError, match='\'E\'] not found in axis'): df.rename(columns={'A': 'a', 'E': 'e'}, errors='raise') - # Error should be ignored - renamed = df.rename(columns={'A': 'a', 'E': 'e'}) - expected = DataFrame(columns=['a', 'B', 'C', 'D']) - tm.assert_frame_equal(renamed, expected) - - # Correct behaviour with raising errors. - renamed = df.rename(columns={'A': 'a'}, errors='raise') - expected = DataFrame(columns=['a', 'B', 'C', 'D']) - tm.assert_frame_equal(renamed, expected) - - renamed = df.rename(columns=str.lower, errors='raise') - expected = DataFrame(columns=['a', 'b', 'c', 'd']) + @pytest.mark.parametrize('mapper, errors, expected_columns', [ + ({'A': 'a', 'E': 'e'}, 'ignore', ['a', 'B', 'C', 'D']), + ({'A': 'a'}, 'raise', ['a', 'B', 'C', 'D']), + (str.lower, 'raise', ['a', 'b', 'c', 'd'])]) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=['A', 'B', 'C', 'D']) + renamed = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) tm.assert_frame_equal(renamed, expected) def test_reorder_levels(self): From 525f8c5730638e677ed72493de42e85d15d44804 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Tue, 5 Mar 2019 20:30:12 +0100 Subject: [PATCH 7/8] ENH: Implement feedback (#13473) --- pandas/core/frame.py | 9 ++++++--- pandas/core/generic.py | 12 ++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ea06116e7dd9c..eadffb779734f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3948,7 +3948,11 @@ def rename(self, *args, **kwargs): In case of a MultiIndex, only rename labels in the specified level. errors : {'ignore', 'raise'}, default 'ignore' - If 'ignore', suppress error and existing labels are renamed. + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. Returns ------- @@ -3963,8 +3967,7 @@ def rename(self, *args, **kwargs): See Also -------- - DataFrame.rename_axis : Set the name of the axis for the index or - columns. + DataFrame.rename_axis : Set the name of the axis. Examples -------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e54fec9e14525..7915d98662c9e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -982,7 +982,11 @@ def rename(self, *args, **kwargs): In case of a MultiIndex, only rename labels in the specified level. errors : {'ignore', 'raise'}, default 'ignore' - If 'ignore', suppress error and existing labels are renamed. + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. Returns ------- @@ -1101,9 +1105,9 @@ def rename(self, *args, **kwargs): # GH 13473 if not callable(v): indexer = self.axes[axis].get_indexer_for(v) - missing_labels = [label for index, label in enumerate(v) - if indexer[index] == -1] - if errors == 'raise' and len(missing_labels) > 0: + if errors == 'raise' and len(indexer[indexer == -1]): + missing_labels = [label for index, label in enumerate(v) + if indexer[index] == -1] raise KeyError('{} not found in axis' .format(missing_labels)) From 97746e87063303e917a680659849d5178a198db7 Mon Sep 17 00:00:00 2001 From: Max van Deursen Date: Tue, 5 Mar 2019 21:59:37 +0100 Subject: [PATCH 8/8] ENH: Rename renamed to result in testcase (#13473) --- pandas/tests/frame/test_alter_axes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 571455515e2a9..f01b86f727fee 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -884,9 +884,9 @@ def test_rename_errors(self, mapper, errors, expected_columns): # GH 13473 # rename now works with errors parameter df = DataFrame(columns=['A', 'B', 'C', 'D']) - renamed = df.rename(columns=mapper, errors=errors) + result = df.rename(columns=mapper, errors=errors) expected = DataFrame(columns=expected_columns) - tm.assert_frame_equal(renamed, expected) + tm.assert_frame_equal(result, expected) def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],