From a33909f574b3f8afb47c9e54b3af0ba677949c40 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 12:11:43 -0500
Subject: [PATCH 01/12] Relaxed criterion specification so plot_tree no longer
 writes entropy for any criterion in decision tree

---
 sklearn/tree/export.py            | 10 +++++-----
 sklearn/tree/tests/test_export.py |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 636ef03689a79..5320305478d36 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -547,15 +547,15 @@ def __init__(self, max_depth=None, feature_names=None,
 
         self.arrow_args = dict(arrowstyle="<-")
 
-    def _make_tree(self, node_id, et, depth=0):
+    def _make_tree(self, node_id, et, criterion, depth=0):
         # traverses _tree.Tree recursively, builds intermediate
         # "_reingold_tilford.Tree" object
-        name = self.node_to_str(et, node_id, criterion='entropy')
+        name = self.node_to_str(et, node_id, criterion=criterion)
         if (et.children_left[node_id] != _tree.TREE_LEAF
                 and (self.max_depth is None or depth <= self.max_depth)):
-            children = [self._make_tree(et.children_left[node_id], et,
+            children = [self._make_tree(et.children_left[node_id], et, criterion,
                                         depth=depth + 1),
-                        self._make_tree(et.children_right[node_id], et,
+                        self._make_tree(et.children_right[node_id], et, criterion,
                                         depth=depth + 1)]
         else:
             return Tree(name, node_id)
@@ -568,7 +568,7 @@ def export(self, decision_tree, ax=None):
             ax = plt.gca()
         ax.clear()
         ax.set_axis_off()
-        my_tree = self._make_tree(0, decision_tree.tree_)
+        my_tree = self._make_tree(0, decision_tree.tree_, decision_tree.criterion)
         draw_tree = buchheim(my_tree)
 
         # important to make sure we're still
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index eed9be7bcb5d9..7fce48a5cb022 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -412,7 +412,7 @@ def test_plot_tree(pyplot):
     feature_names = ['first feat', 'sepal_width']
     nodes = plot_tree(clf, feature_names=feature_names)
     assert len(nodes) == 3
-    assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 0.5\n"
+    assert nodes[0].get_text() == ("first feat <= 0.0\ngini = 0.5\n"
                                    "samples = 6\nvalue = [3, 3]")
-    assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]"
-    assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]"
+    assert nodes[1].get_text() == "gini = 0.0\nsamples = 3\nvalue = [3, 0]"
+    assert nodes[2].get_text() == "gini = 0.0\nsamples = 3\nvalue = [0, 3]"

From 9a7ba92d60203a8751d59258ff2c1d1aec1e49a1 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 12:21:23 -0500
Subject: [PATCH 02/12]  Please enter the commit message for your changes. Lines
 starting

---
 sklearn/tree/export.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 5320305478d36..f1d70bf2558af 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -553,10 +553,10 @@ def _make_tree(self, node_id, et, criterion, depth=0):
         name = self.node_to_str(et, node_id, criterion=criterion)
         if (et.children_left[node_id] != _tree.TREE_LEAF
                 and (self.max_depth is None or depth <= self.max_depth)):
-            children = [self._make_tree(et.children_left[node_id], et, criterion,
-                                        depth=depth + 1),
-                        self._make_tree(et.children_right[node_id], et, criterion,
-                                        depth=depth + 1)]
+            children = [self._make_tree(et.children_left[node_id], et,
+                                        criterion, depth=depth + 1),
+                        self._make_tree(et.children_right[node_id], et,
+                                        criterion, depth=depth + 1)]
         else:
             return Tree(name, node_id)
         return Tree(name, node_id, *children)

From 6881f8555d7b7b4a3d26eede032c94191b97be4e Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 12:22:22 -0500
Subject: [PATCH 03/12] Revert " Please enter the commit message for your changes.
 Lines starting"

This reverts commit 9a7ba92d
---
 sklearn/tree/export.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index f1d70bf2558af..5320305478d36 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -553,10 +553,10 @@ def _make_tree(self, node_id, et, criterion, depth=0):
         name = self.node_to_str(et, node_id, criterion=criterion)
         if (et.children_left[node_id] != _tree.TREE_LEAF
                 and (self.max_depth is None or depth <= self.max_depth)):
-            children = [self._make_tree(et.children_left[node_id], et,
-                                        criterion, depth=depth + 1),
-                        self._make_tree(et.children_right[node_id], et,
-                                        criterion, depth=depth + 1)]
+            children = [self._make_tree(et.children_left[node_id], et, criterion,
+                                        depth=depth + 1),
+                        self._make_tree(et.children_right[node_id], et, criterion,
+                                        depth=depth + 1)]
         else:
             return Tree(name, node_id)
         return Tree(name, node_id, *children)

From 9d1083922ef350e62146223c1668314759a9f565 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 12:26:45 -0500
Subject: [PATCH 04/12] Ensure changes conform to flake8 character limits

---
 sklearn/tree/export.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index 5320305478d36..f1d70bf2558af 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -553,10 +553,10 @@ def _make_tree(self, node_id, et, criterion, depth=0):
         name = self.node_to_str(et, node_id, criterion=criterion)
         if (et.children_left[node_id] != _tree.TREE_LEAF
                 and (self.max_depth is None or depth <= self.max_depth)):
-            children = [self._make_tree(et.children_left[node_id], et, criterion,
-                                        depth=depth + 1),
-                        self._make_tree(et.children_right[node_id], et, criterion,
-                                        depth=depth + 1)]
+            children = [self._make_tree(et.children_left[node_id], et,
+                                        criterion, depth=depth + 1),
+                        self._make_tree(et.children_right[node_id], et,
+                                        criterion, depth=depth + 1)]
         else:
             return Tree(name, node_id)
         return Tree(name, node_id, *children)

From 8c0fb6b5af71009774e238bd1acffeac08b21c0b Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 12:31:19 -0500
Subject: [PATCH 05/12] Resolved last line that violated pep8

---
 sklearn/tree/export.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py
index f1d70bf2558af..c012632d455a5 100644
--- a/sklearn/tree/export.py
+++ b/sklearn/tree/export.py
@@ -568,7 +568,8 @@ def export(self, decision_tree, ax=None):
             ax = plt.gca()
         ax.clear()
         ax.set_axis_off()
-        my_tree = self._make_tree(0, decision_tree.tree_, decision_tree.criterion)
+        my_tree = self._make_tree(0, decision_tree.tree_,
+                                  decision_tree.criterion)
         draw_tree = buchheim(my_tree)
 
         # important to make sure we're still

From 40f8ddf8adc93199ea4417a2849f6cf69abb86bc Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 13:44:23 -0500
Subject: [PATCH 06/12] Add test to test that entropy is calculated & displayed
 in tree diagram when decision tree classifier computes entropy

---
 sklearn/tree/tests/test_export.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 7fce48a5cb022..06ca9e446fdcc 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -399,9 +399,28 @@ def test_export_text():
     assert export_text(reg, decimals=1, show_weights=True) == expected_report
 
 
-def test_plot_tree(pyplot):
+def test_plot_tree_entropy(pyplot):
     # mostly smoke tests
-    # Check correctness of export_graphviz
+    # Check correctness of export_graphviz for criterion = entropy
+    clf = DecisionTreeClassifier(max_depth=3,
+                                 min_samples_split=2,
+                                 criterion="entropy",
+                                 random_state=2)
+    clf.fit(X, y)
+
+    # Test export code
+    feature_names = ['first feat', 'sepal_width']
+    nodes = plot_tree(clf, feature_names=feature_names)
+    assert len(nodes) == 3
+    assert nodes[0].get_text() == ("first feat <= 0.0\nentropy = 1.0\n"
+                                   "samples = 6\nvalue = [3, 3]")
+    assert nodes[1].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [3, 0]"
+    assert nodes[2].get_text() == "entropy = 0.0\nsamples = 3\nvalue = [0, 3]"
+
+
+def test_plot_tree_gini(pyplot):
+    # mostly smoke tests
+    # Check correctness of export_graphviz for criterion = gini
     clf = DecisionTreeClassifier(max_depth=3,
                                  min_samples_split=2,
                                  criterion="gini",

From a6823c9f36bea570f9ff5a3e1ead9d73b1c23a7e Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sat, 25 May 2019 22:29:40 -0500
Subject: [PATCH 07/12] Update v0.21.rst

Added #13947 to change log
---
 doc/whats_new/v0.21.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index bf650629e43fc..f23b21485c315 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -885,6 +885,10 @@ Support for Python 3.4 and below has been officially dropped.
   ``max_depth`` were both specified by the user. Please note that this also
   affects all ensemble methods using decision trees.
   :pr:`12344` by :user:`Adrin Jalali <adrinjalali>`.
+  
+- |Fix| Fixed an issue with :class:`MLPTreeExporter` where it display
+  entropy calculations even for `gini` criterion in DecisionTreeClassifiers.
+  :pr:`13947` by :user:`Frank Hoang <fhoang7>`.
 
 :mod:`sklearn.utils`
 ....................
@@ -965,8 +969,8 @@ Baibak, daten-kieker, Denis Kataev, Didi Bar-Zev, Dillon Gardner, Dmitry Mottl,
 Dmitry Vukolov, Dougal J. Sutherland, Dowon, drewmjohnston, Dror Atariah,
 Edward J Brown, Ekaterina Krivich, Elizabeth Sander, Emmanuel Arias, Eric
 Chang, Eric Larson, Erich Schubert, esvhd, Falak, Feda Curic, Federico Caselli,
-Fibinse Xavier`, Finn O'Shea, Gabriel Marzinotto, Gabriel Vacaliuc, Gabriele
-Calvo, Gael Varoquaux, GauravAhlawat, Giuseppe Vettigli, Greg Gandenberger,
+Frank Hoang, Fibinse Xavier`, Finn O'Shea, Gabriel Marzinotto, Gabriel Vacaliuc, 
+Gabriele Calvo, Gael Varoquaux, GauravAhlawat, Giuseppe Vettigli, Greg Gandenberger,
 Guillaume Fournier, Guillaume Lemaitre, Gustavo De Mari Pereira, Hanmin Qin,
 haroldfox, hhu-luqi, Hunter McGushion, Ian Sanders, JackLangerman, Jacopo
 Notarstefano, jakirkham, James Bourbeau, Jan Koch, Jan S, janvanrijn, Jarrod

From d66c9d4f7ffeb3c4314d722d4c4a6e32779edcd5 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 26 May 2019 20:48:51 -0500
Subject: [PATCH 08/12] Update splitting.pyx

Initialized gain variable with parent node negative loss and removing from _split_gain function in order to improve performance of splitting.
---
 sklearn/ensemble/_hist_gradient_boosting/splitting.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index 2f7c7d3453326..4815803de85cd 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -435,6 +435,7 @@ cdef class Splitter:
         best_split.gain = -1.
         sum_gradient_left, sum_hessian_left = 0., 0.
         n_samples_left = 0
+        gain = -negative_loss(sum_gradients, sum_hessians, l2_regularization)
 
         for bin_idx in range(self.actual_n_bins[feature_idx]):
             n_samples_left += histograms[feature_idx, bin_idx].count
@@ -462,7 +463,7 @@ cdef class Splitter:
                 # won't get any better (hessians are > 0 since loss is convex)
                 break
 
-            gain = _split_gain(sum_gradient_left, sum_hessian_left,
+            gain += _split_gain(sum_gradient_left, sum_hessian_left,
                                sum_gradient_right, sum_hessian_right,
                                sum_gradients, sum_hessians,
                                self.l2_regularization)
@@ -500,11 +501,10 @@ cdef inline Y_DTYPE_C _split_gain(
     """
     cdef:
         Y_DTYPE_C gain
-    gain = negative_loss(sum_gradient_left, sum_hessian_left,
+    gain += negative_loss(sum_gradient_left, sum_hessian_left,
                          l2_regularization)
     gain += negative_loss(sum_gradient_right, sum_hessian_right,
                           l2_regularization)
-    gain -= negative_loss(sum_gradients, sum_hessians, l2_regularization)
     return gain
 
 cdef inline Y_DTYPE_C negative_loss(

From 7011bb5b00b46d8e39f380b044f95c9ede293189 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 26 May 2019 20:57:19 -0500
Subject: [PATCH 09/12] Fix global variable accessing

---
 sklearn/ensemble/_hist_gradient_boosting/splitting.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index 4815803de85cd..aa133be4159c5 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -435,7 +435,7 @@ cdef class Splitter:
         best_split.gain = -1.
         sum_gradient_left, sum_hessian_left = 0., 0.
         n_samples_left = 0
-        gain = -negative_loss(sum_gradients, sum_hessians, l2_regularization)
+        gain = -negative_loss(sum_gradients, sum_hessians, self.l2_regularization)
 
         for bin_idx in range(self.actual_n_bins[feature_idx]):
             n_samples_left += histograms[feature_idx, bin_idx].count

From 8d822846ce1baeeb9a367d93027aee9769fac852 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 26 May 2019 21:03:31 -0500
Subject: [PATCH 10/12] Revert "Fix global variable accessing"

This reverts commit 7011bb5b00b46d8e39f380b044f95c9ede293189.
---
 sklearn/ensemble/_hist_gradient_boosting/splitting.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index aa133be4159c5..4815803de85cd 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -435,7 +435,7 @@ cdef class Splitter:
         best_split.gain = -1.
         sum_gradient_left, sum_hessian_left = 0., 0.
         n_samples_left = 0
-        gain = -negative_loss(sum_gradients, sum_hessians, self.l2_regularization)
+        gain = -negative_loss(sum_gradients, sum_hessians, l2_regularization)
 
         for bin_idx in range(self.actual_n_bins[feature_idx]):
             n_samples_left += histograms[feature_idx, bin_idx].count

From 61f0e7f8090d4688b04dd0ee7ed60643cf1ec393 Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Sun, 26 May 2019 21:04:14 -0500
Subject: [PATCH 11/12] Revert "Update splitting.pyx"

This reverts commit d66c9d4f7ffeb3c4314d722d4c4a6e32779edcd5.
---
 sklearn/ensemble/_hist_gradient_boosting/splitting.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
index 4815803de85cd..2f7c7d3453326 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
@@ -435,7 +435,6 @@ cdef class Splitter:
         best_split.gain = -1.
         sum_gradient_left, sum_hessian_left = 0., 0.
         n_samples_left = 0
-        gain = -negative_loss(sum_gradients, sum_hessians, l2_regularization)
 
         for bin_idx in range(self.actual_n_bins[feature_idx]):
             n_samples_left += histograms[feature_idx, bin_idx].count
@@ -463,7 +462,7 @@ cdef class Splitter:
                 # won't get any better (hessians are > 0 since loss is convex)
                 break
 
-            gain += _split_gain(sum_gradient_left, sum_hessian_left,
+            gain = _split_gain(sum_gradient_left, sum_hessian_left,
                                sum_gradient_right, sum_hessian_right,
                                sum_gradients, sum_hessians,
                                self.l2_regularization)
@@ -501,10 +500,11 @@ cdef inline Y_DTYPE_C _split_gain(
     """
     cdef:
         Y_DTYPE_C gain
-    gain += negative_loss(sum_gradient_left, sum_hessian_left,
+    gain = negative_loss(sum_gradient_left, sum_hessian_left,
                          l2_regularization)
     gain += negative_loss(sum_gradient_right, sum_hessian_right,
                           l2_regularization)
+    gain -= negative_loss(sum_gradients, sum_hessians, l2_regularization)
     return gain
 
 cdef inline Y_DTYPE_C negative_loss(

From 2a1aa7f20a4aab36ea13370934e0188627ae95cd Mon Sep 17 00:00:00 2001
From: Frank Hoang <fhoang7@berkeley.edu>
Date: Mon, 27 May 2019 01:04:02 -0500
Subject: [PATCH 12/12] Move change log update into proper spot and reference
 public API

---
 doc/whats_new/v0.21.rst | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index f23b21485c315..e6e0e6cf620dc 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -44,6 +44,13 @@ Changelog
   `drop` parameter was not reflected in `get_feature_names`. :pr:`13894`
   by :user:`James Myatt <jamesmyatt>`.
 
+:mod:`sklearn.tree`
+................................
+
+- |Fix| Fixed an issue with :func:`plot_tree` where it display
+  entropy calculations even for `gini` criterion in DecisionTreeClassifiers.
+  :pr:`13947` by :user:`Frank Hoang <fhoang7>`.
+
 :mod:`sklearn.utils.sparsefuncs`
 ................................
 
@@ -885,10 +892,6 @@ Support for Python 3.4 and below has been officially dropped.
   ``max_depth`` were both specified by the user. Please note that this also
   affects all ensemble methods using decision trees.
   :pr:`12344` by :user:`Adrin Jalali <adrinjalali>`.
-  
-- |Fix| Fixed an issue with :class:`MLPTreeExporter` where it display
-  entropy calculations even for `gini` criterion in DecisionTreeClassifiers.
-  :pr:`13947` by :user:`Frank Hoang <fhoang7>`.
 
 :mod:`sklearn.utils`
 ....................