Correct KeyError

pandas-dev · TomAugspurger · Dec 18, 2017 · Dec 11, 2017 · Dec 11, 2017 · Dec 15, 2017
commit d8c20e85ea9363bbdbe58c16b51fe9b76eb4e24d
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2861,13 +2861,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     is_tuple = isinstance(key, tuple)
     all_hashable = is_tuple and all(is_hashable(x) for x in key)
 
+    original_grouper = None
     if is_tuple:
         if not all_hashable or key not in obj:
             msg = ("Interpreting tuple 'by' as a list of keys, rather than "
-                   "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
-                   "future, a tuple will always mean a single key.".format(
-                       list(key), key))
+                   "a single key. Use 'by=[...]' instead of 'by=(...)'. In "
+                   "the future, a tuple will always mean a single key.")
             warnings.warn(msg, FutureWarning, stacklevel=5)
+            original_grouper = key
             key = list(key)
 
     if not isinstance(key, list):
@@ -2939,6 +2940,11 @@ def is_in_obj(gpr):
             elif obj._is_level_reference(gpr):
                 in_axis, name, level, gpr = False, None, gpr, None
             else:
+                # Want to raise with the correct KeyError here
+                # The deprecation in #18731 means we may have
+                # the wrong error message here.
+                if original_grouper:
+                    gpr = original_grouper
                 raise KeyError(gpr)
         elif isinstance(gpr, Grouper) and gpr.key is not None:
             # Add key to exclusions

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2755,6 +2755,14 @@ def test_tuple_warns_unhashable(self):
 
         assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
+    def test_tuple_correct_keyerror(self):
+        df = pd.DataFrame(1, index=range(3),
+                          columns=pd.MultiIndex.from_product([[1, 2],
+                                                              [3, 4]]))
+        with tm.assert_produces_warning(FutureWarning):  # just silence
+            with tm.assert_raises_regex(KeyError, "(7, 8)"):
+                df.groupby((7, 8)).mean()
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)