From 3fa684db0e44c5d3213566875f4011dd8083cfba Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Fri, 19 Feb 2016 15:52:55 -0500
Subject: [PATCH 1/4] add get_feature_names to PolynomialFeatures

---
 sklearn/preprocessing/data.py            | 29 ++++++++++++++++++++++++
 sklearn/preprocessing/tests/test_data.py | 22 ++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 094100cd7024c..5494ab1dd7a13 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -1182,6 +1182,35 @@ def powers_(self):
         return np.vstack(np.bincount(c, minlength=self.n_input_features_)
                          for c in combinations)
 
+    def get_feature_names(self, input_features=None):
+        """
+        Return feature names for output features
+
+        Parameters
+        ----------
+        input_features : list of string, length n_features, optional
+            String names for input features if available. By default,
+            "x0", "x1", ... "xn_features" is used.
+
+        Returns
+        -------
+        output_feature_names : list of string, length n_output_features
+
+        """
+        powers = self.powers_
+        if input_features is None:
+            input_features = ['x%d' % i for i in range(len(powers))]
+        feature_names = []
+        for row in powers:
+            inds = np.where(row)[0]
+            if len(inds):
+                name = " ".join("%s^%d" % (input_features[ind], exp)
+                                for ind, exp in zip(inds, row[inds]))
+            else:
+                name = "1"
+            feature_names.append(name)
+        return feature_names
+
     def fit(self, X, y=None):
         """
         Compute number of output features.
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index baa62cbbaffe2..e3631564810d4 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -10,6 +10,7 @@
 import numpy.linalg as la
 from scipy import sparse
 from distutils.version import LooseVersion
+from sklearn.externals.six import u
 
 from sklearn.utils import gen_batches
 
@@ -119,6 +120,27 @@ def test_polynomial_features():
     assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
 
 
+def test_polynomial_feature_names():
+    X = np.arange(30).reshape(10, 3)
+    poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names()
+    assert_array_equal(['1', 'x0^1', 'x1^1', 'x2^1', 'x0^2', 'x0^1 x1^1',
+                        'x0^1 x2^1', 'x1^2', 'x1^1 x2^1', 'x2^2'],
+                       feature_names)
+
+    poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
+    feature_names = poly.get_feature_names(["a", "b", "c"])
+    assert_array_equal(['a^1', 'b^1', 'c^1', 'a^2', 'a^1 b^1', 'a^1 c^1', 'b^2',
+                        'b^1 c^1', 'c^2', 'a^3', 'a^2 b^1', 'a^2 c^1',
+                        'a^1 b^2', 'a^1 b^1 c^1', 'a^1 c^2', 'b^3', 'b^2 c^1',
+                        'b^1 c^2', 'c^3'], feature_names)
+    # test some unicode
+    poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
+    feature_names = poly.get_feature_names([u"\u0001F40D", u"\u262E", u"\u05D0"])
+    assert_array_equal([u"1", u"\u0001F40D^1", u"\u262E^1", u"\u05D0^1"],
+                       feature_names)
+
+
 def test_standard_scaler_1d():
     # Test scaling of dataset along single axis
     for X in [X_1row, X_1col, X_list_1row, X_list_1row]:

From ddc1740207acbcd989352dd3f9a5c94e5ece9634 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Fri, 19 Feb 2016 15:53:37 -0500
Subject: [PATCH 2/4] fix PolynomialFeatures.powers_ in python 0.16.1

---
 sklearn/preprocessing/data.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 5494ab1dd7a13..0c950bcf66610 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -20,6 +20,7 @@
 from ..utils.extmath import row_norms
 from ..utils.extmath import _incremental_mean_and_var
 from ..utils.fixes import combinations_with_replacement as combinations_w_r
+from ..utils.fixes import bincount
 from ..utils.sparsefuncs_fast import (inplace_csr_row_normalize_l1,
                                       inplace_csr_row_normalize_l2)
 from ..utils.sparsefuncs import (inplace_column_scale,
@@ -1179,7 +1180,7 @@ def powers_(self):
         combinations = self._combinations(self.n_input_features_, self.degree,
                                           self.interaction_only,
                                           self.include_bias)
-        return np.vstack(np.bincount(c, minlength=self.n_input_features_)
+        return np.vstack(bincount(c, minlength=self.n_input_features_)
                          for c in combinations)
 
     def get_feature_names(self, input_features=None):

From 897f8b6cc6b347eff252185769fb7e9562fa9ace Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Mon, 22 Feb 2016 15:09:47 -0500
Subject: [PATCH 3/4] don't do ^1

---
 sklearn/preprocessing/data.py            |  1 +
 sklearn/preprocessing/tests/test_data.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 0c950bcf66610..0d0c4167f55cb 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -1206,6 +1206,7 @@ def get_feature_names(self, input_features=None):
             inds = np.where(row)[0]
             if len(inds):
                 name = " ".join("%s^%d" % (input_features[ind], exp)
+                                if exp != 1 else input_features[ind]
                                 for ind, exp in zip(inds, row[inds]))
             else:
                 name = "1"
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index e3631564810d4..195e80c30d428 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -124,20 +124,20 @@ def test_polynomial_feature_names():
     X = np.arange(30).reshape(10, 3)
     poly = PolynomialFeatures(degree=2, include_bias=True).fit(X)
     feature_names = poly.get_feature_names()
-    assert_array_equal(['1', 'x0^1', 'x1^1', 'x2^1', 'x0^2', 'x0^1 x1^1',
-                        'x0^1 x2^1', 'x1^2', 'x1^1 x2^1', 'x2^2'],
+    assert_array_equal(['1', 'x0', 'x1', 'x2', 'x0^2', 'x0 x1',
+                        'x0 x2', 'x1^2', 'x1 x2', 'x2^2'],
                        feature_names)
 
     poly = PolynomialFeatures(degree=3, include_bias=False).fit(X)
     feature_names = poly.get_feature_names(["a", "b", "c"])
-    assert_array_equal(['a^1', 'b^1', 'c^1', 'a^2', 'a^1 b^1', 'a^1 c^1', 'b^2',
-                        'b^1 c^1', 'c^2', 'a^3', 'a^2 b^1', 'a^2 c^1',
-                        'a^1 b^2', 'a^1 b^1 c^1', 'a^1 c^2', 'b^3', 'b^2 c^1',
-                        'b^1 c^2', 'c^3'], feature_names)
+    assert_array_equal(['a', 'b', 'c', 'a^2', 'a b', 'a c', 'b^2',
+                        'b c', 'c^2', 'a^3', 'a^2 b', 'a^2 c',
+                        'a b^2', 'a b c', 'a c^2', 'b^3', 'b^2 c',
+                        'b c^2', 'c^3'], feature_names)
     # test some unicode
     poly = PolynomialFeatures(degree=1, include_bias=True).fit(X)
     feature_names = poly.get_feature_names([u"\u0001F40D", u"\u262E", u"\u05D0"])
-    assert_array_equal([u"1", u"\u0001F40D^1", u"\u262E^1", u"\u05D0^1"],
+    assert_array_equal([u"1", u"\u0001F40D", u"\u262E", u"\u05D0"],
                        feature_names)
 
 

From 8fb928d6442fa92161db9b2a97a616e2625384b6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <amueller@nyu.edu>
Date: Wed, 24 Feb 2016 17:08:15 -0500
Subject: [PATCH 4/4] fixed doc for powers, added test

---
 sklearn/preprocessing/data.py            | 4 ++--
 sklearn/preprocessing/tests/test_data.py | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py
index 0d0c4167f55cb..2a6c66446c3a2 100644
--- a/sklearn/preprocessing/data.py
+++ b/sklearn/preprocessing/data.py
@@ -1141,7 +1141,7 @@ class PolynomialFeatures(BaseEstimator, TransformerMixin):
 
     Attributes
     ----------
-    powers_ : array, shape (n_input_features, n_output_features)
+    powers_ : array, shape (n_output_features, n_input_features)
         powers_[i, j] is the exponent of the jth input in the ith output.
 
     n_input_features_ : int
@@ -1200,7 +1200,7 @@ def get_feature_names(self, input_features=None):
         """
         powers = self.powers_
         if input_features is None:
-            input_features = ['x%d' % i for i in range(len(powers))]
+            input_features = ['x%d' % i for i in range(powers.shape[1])]
         feature_names = []
         for row in powers:
             inds = np.where(row)[0]
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 195e80c30d428..c741526de03ad 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -119,6 +119,9 @@ def test_polynomial_features():
     X_poly = interact.fit_transform(X)
     assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
 
+    assert_equal(interact.powers_.shape, (interact.n_output_features_,
+                 interact.n_input_features_))
+
 
 def test_polynomial_feature_names():
     X = np.arange(30).reshape(10, 3)