From 7e75b73d964e8457a66030a22a51b447f246c0fb Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 24 Jun 2019 22:02:58 -0400 Subject: [PATCH 01/81] ENH Adds export_html --- sklearn/inspection/__init__.py | 2 + sklearn/inspection/_plot_estimators.py | 252 +++++++++++++ .../inspection/tests/test_plot_estimators.py | 341 ++++++++++++++++++ 3 files changed, 595 insertions(+) create mode 100644 sklearn/inspection/_plot_estimators.py create mode 100644 sklearn/inspection/tests/test_plot_estimators.py diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 2bf3fe14c0023..d3b6a32430663 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,9 +1,11 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" from .partial_dependence import partial_dependence from .partial_dependence import plot_partial_dependence +from ._plot_estimators import export_html __all__ = [ 'partial_dependence', 'plot_partial_dependence', + 'export_html' ] diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py new file mode 100644 index 0000000000000..3e63f1752367f --- /dev/null +++ b/sklearn/inspection/_plot_estimators.py @@ -0,0 +1,252 @@ +from collections import namedtuple +from contextlib import closing +from io import StringIO + +from .._config import config_context +from ..base import BaseEstimator +from ..pipeline import Pipeline +from ..pipeline import FeatureUnion +from ..compose import ColumnTransformer +from ..ensemble import VotingClassifier, VotingRegressor + + +def _estimator_tool_tip(estimator): + """Replace newlines to allow for css content: attr(...) to properly + display tooltips. + """ + return str(estimator).replace('\n', ' ') + + +def _write_label_html(out, name, tool_tip): + """Write label to html""" + out.write('
' + '{}
'.format(tool_tip, name)) + + +_EstHTMLInfo = namedtuple('_EstHTMLInfo', + 'type, estimators, names, name_tips') + + +def _type_of_html_estimator(estimator): + """Generate information about how to display an estimator. + """ + if isinstance(estimator, str): + return _EstHTMLInfo('single', estimator, estimator, estimator) + + elif estimator is None: + return _EstHTMLInfo('single', estimator, 'None', 'None') + + elif isinstance(estimator, Pipeline): + estimators = [step[1] for step in estimator.steps] + names = [step[0] for step in estimator.steps] + name_tips = [_estimator_tool_tip(est) for est in estimators] + return _EstHTMLInfo('serial', estimators, names, name_tips) + + elif isinstance(estimator, ColumnTransformer): + estimators = [trans[1] for trans in estimator.transformers] + names = [trans[0] for trans in estimator.transformers] + name_tips = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_tips) + + elif isinstance(estimator, FeatureUnion): + estimators = [trans[1] for trans in estimator.transformer_list] + names = [trans[0] for trans in estimator.transformer_list] + name_tips = [_estimator_tool_tip(est) for est in estimators] + return _EstHTMLInfo('parallel', estimators, names, name_tips) + + elif isinstance(estimator, (VotingClassifier, VotingRegressor)): + estimators = [est[1] for est in estimator.estimators] + names = [est[0] for est in estimator.estimators] + name_tips = [_estimator_tool_tip(est) for est in estimators] + return _EstHTMLInfo('parallel', estimators, names, name_tips) + + elif isinstance(estimator, BaseEstimator): + name = estimator.__class__.__name__ + tool_tip = _estimator_tool_tip(estimator) + return _EstHTMLInfo('single', estimator, name, tool_tip) + + else: + raise ValueError("Invalid estimator") + + +def _write_estimator_html(out, estimator, name): + """Write estimator to html in serial, parallel, or by itself (single). + """ + est_html_info = _type_of_html_estimator(estimator) + + if est_html_info.type == 'serial': + out.write('
') + est_infos = zip(est_html_info.estimators, est_html_info.names, + est_html_info.name_tips) + for est, name, tool_tip in est_infos: + _write_estimator_html(out, est, name) + out.write('
') # sk-serial + + elif est_html_info.type == 'parallel': + out.write('
') + if name: + tool_tip = _estimator_tool_tip(estimator) + _write_label_html(out, name, tool_tip) + out.write('
') + + est_infos = zip(est_html_info.estimators, est_html_info.names, + est_html_info.name_tips) + for est, name, tool_tip in est_infos: + out.write('
') + _write_label_html(out, name, tool_tip) + out.write('
') + _write_estimator_html(out, est, name) + out.write('
') # sk-parallel-item sk-serial + out.write('
') # sk-parallel sk-serial-item + + elif est_html_info.type == 'single': + out.write('
' + '
' + '{}
'.format(est_html_info.name_tips, + est_html_info.names)) + + +_STYLE = """ +.sk-estimator { + font-family: monospace; + background-color: #f0f8ff; + padding: 0.5em; + margin: 0.25em 0.25em; + border: 1px dotted black; + text-align: center; +} +.sk-parallel-item::after { + content: ""; + width: 100%; + border-bottom: 1px solid gray; + flex-grow: 1; +} +.sk-serial::before { + content: ""; + position: absolute; + border-left: 1px solid gray; + top: 2em; + bottom: 0; + left: 50%; +} +.sk-serial { + display: flex; + flex-direction: column; + align-items: center; + float: left; + background: white; +} +.sk-parallel { + display: flex; + align-items: stretch; +} +.sk-parallel-item { + display: flex; + flex-direction: column; + position: relative; +} +.sk-parallel-item:first-child::after { + align-self: flex-end; + width: 50%; +} +.sk-parallel-item:last-child::after { + align-self: flex-start; + width: 50%; +} +.sk-final-spacer { + visibility: hidden; + font-family: monospace; + white-space: pre; +} +.sk-dashed-wrapped { + border: 1px dashed gray; + padding: 0.25em; +} +.sk-label { + text-align: center; + font-family: monospace; + font-weight: bold; + margin: 0; + background: white; +} +.sk-serial-item { + margin-bottom: 0.25em; +} +.sk-container { + display: flex; + flex-direction: column; + align-items: flex-start; +} +.sk-container > .sk-serial::before { + border-left: 0; +} +[sk-data-tooltip] { + position: relative; + cursor: pointer; +} +[sk-data-tooltip]:before { + visibility: hidden; + opacity: 0; + pointer-events: none; + font-weight: 400; +} +[sk-data-tooltip]:before { + position: absolute; + top: 0; + left: 0; + padding: 0.5em; + overflow: hidden; + background-color: #f0f8ff; + border: 1px solid gray; + white-space: pre; + content: attr(sk-data-tooltip); + text-align: left; +} +[sk-data-tooltip]:hover:before { + visibility: visible; + opacity: 1; + z-index: 2; +} +""" + + +def export_html(estimator, print_changed_only=True): + """Build a html representation of an estimator + + Parameters + ---------- + estimator : estimator object + The estimator to visualize. + + print_changed_only : bool, optional (default=True) + If True, only the parameters that were set to non-default + values will be printed when printing an estimator. + """ + + with config_context(print_changed_only=print_changed_only), \ + closing(StringIO()) as out: + + if not isinstance(estimator, Pipeline): + estimator = Pipeline([('', estimator)]) + + out.write('') + + out.write('
') + _write_estimator_html(out, estimator, '') + out.write('
') # sk-container + + # Adds whitespace at the end to allow space for hover info + out.write('
') + out.write(_estimator_tool_tip(estimator.steps[-1])) + out.write('
') # sk-final-spacer + out.write("") + + html_output = out.getvalue() + + try: + from IPython.display import HTML + return HTML(html_output) + except ImportError: + return html_output diff --git a/sklearn/inspection/tests/test_plot_estimators.py b/sklearn/inspection/tests/test_plot_estimators.py new file mode 100644 index 0000000000000..4d83b30ab6d63 --- /dev/null +++ b/sklearn/inspection/tests/test_plot_estimators.py @@ -0,0 +1,341 @@ +from contextlib import closing +from io import StringIO + +import pytest + +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier +from sklearn.impute import SimpleImputer +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.pipeline import Pipeline +from sklearn.pipeline import FeatureUnion +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import VotingClassifier +from sklearn.feature_selection import SelectPercentile +from sklearn.preprocessing import OneHotEncoder +from sklearn.inspection._plot_estimators import _write_label_html +from sklearn.inspection._plot_estimators import _estimator_tool_tip +from sklearn.inspection._plot_estimators import _type_of_html_estimator +from sklearn.inspection._plot_estimators import export_html +from sklearn.inspection._plot_estimators import _STYLE + + +@pytest.mark.parametrize('est, expected', [ + ('None', 'None'), + ('passthrough', 'passthrough'), + ('hello\nworld', 'hello world') +]) +def test_estimator_tool_tip(est, expected): + assert expected == _estimator_tool_tip(est) + + +def test_write_label_html(): + name = "LogisticRegression" + tool_tip = "hello-world" + + expected = ('
' + 'LogisticRegression
') + + with closing(StringIO()) as out: + _write_label_html(out, name, tool_tip) + html_label = out.getvalue() + assert html_label == expected + + +def test_type_of_html_estimator_error(): + with pytest.raises(ValueError, match="Invalid estimator"): + _type_of_html_estimator(100) + + +@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) +def test_type_of_html_estimator_single_str_none(est): + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators == est + assert est_html_info.names == str(est) + assert est_html_info.name_tips == str(est) + + +def test_type_of_html_estimator_single_estimator(): + est = LogisticRegression(C=10.0) + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators == est + assert est_html_info.names == est.__class__.__name__ + assert est_html_info.name_tips == _estimator_tool_tip(est) + + +def test_type_of_html_estimator_pipeline(): + pipe = Pipeline([ + ('imputer', SimpleImputer()), + ('classifier', LogisticRegression()) + ]) + est_html_info = _type_of_html_estimator(pipe) + assert est_html_info.type == 'serial' + assert est_html_info.estimators == [step[1] for step in pipe.steps] + assert est_html_info.names == ['imputer', 'classifier'] + assert est_html_info.name_tips == [_estimator_tool_tip(step[1]) + for step in pipe.steps] + + +def test_type_of_html_estimator_feature_union(): + f_union = FeatureUnion([ + ('pca', PCA()), ('svd', TruncatedSVD()) + ]) + est_html_info = _type_of_html_estimator(f_union) + assert est_html_info.type == 'parallel' + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.estimators == [trans[1] + for trans in f_union.transformer_list] + assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) + for trans in f_union.transformer_list] + + +def test_type_of_html_estimator_voting(): + clf = VotingClassifier([ + ('log_reg', LogisticRegression()), + ('mlp', MLPClassifier()) + ]) + est_html_info = _type_of_html_estimator(clf) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in clf.estimators] + assert est_html_info.names == ['log_reg', 'mlp'] + assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) + for trans in clf.estimators] + + +def test_type_of_html_estimator_column_transformer(): + ct = ColumnTransformer([ + ('pca', PCA(), ['num1', 'num2']), + ('svd', TruncatedSVD, [0, 3]) + ]) + est_html_info = _type_of_html_estimator(ct) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in ct.transformers] + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] + + +expected_export_html = """ + +
+
+
+
+ preprocessor +
+
+
+
+ num +
+
+
+
+
+ passthrough +
+
+
+
+ SimpleImputer +
+
+
+
+
+
+
cat
+
+
+
+
+ SimpleImputer +
+
+
+
+ OneHotEncoder +
+
+
+
+
+
+
+
+
+ feat_u +
+
+
+
+ pca +
+
+
+
+ PCA +
+
+
+
+
+
+ tsvd +
+
+
+
+
+ TruncatedSVD +
+
+
+
+ SelectPercentile +
+
+
+
+
+
+
+
+
+ classifier +
+
+
+
+ lr +
+
+
+
+ LogisticRegression +
+
+
+
+
+
+ mlp +
+
+
+
+ MLPClassifier +
+
+
+
+
+
+
+
+
+ (\'classifier\', VotingClassifier(estimators=[(\'lr\', + LogisticRegression(random_state=1)), (\'mlp\', + MLPClassifier(alpha=0.001))])) +
+ +""".format(style=_STYLE).replace('\n', '').replace(' ', '') + + +def test_export_html(): + num_trans = Pipeline(steps=[ + ('pass', 'passthrough'), + ('imputer', SimpleImputer(strategy='median')) + ]) + + cat_trans = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='constant', + missing_values='empty')), + ('one-hot', OneHotEncoder()) + ]) + + preprocess = ColumnTransformer([ + ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), + ('cat', cat_trans, [0, 1, 2, 3]) + ]) + + feat_u = FeatureUnion([ + ('pca', PCA(n_components=1)), + ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), + ('select', SelectPercentile())])) + ]) + + clf = VotingClassifier([ + ('lr', LogisticRegression(solver='lbfgs', random_state=1)), + ('mlp', MLPClassifier(alpha=0.001)) + ]) + + pipe = Pipeline([ + ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) + ]) + html_output = export_html(pipe) + + # IPython HTML + if hasattr(html_output, 'data'): + html_output = html_output.data + assert expected_export_html == html_output.replace(' ', '') From 2ce355218a0f191f93e5031f64841709c00eb9a0 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Tue, 25 Jun 2019 09:09:20 -0400 Subject: [PATCH 02/81] CLN Checks for jupyter context --- sklearn/inspection/_plot_estimators.py | 15 +++++++++++++-- sklearn/inspection/tests/test_plot_estimators.py | 3 --- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py index 3e63f1752367f..61a329cda74b4 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_plot_estimators.py @@ -211,7 +211,7 @@ def _write_estimator_html(out, estimator, name): def export_html(estimator, print_changed_only=True): - """Build a html representation of an estimator + """Build a HTML representation of an estimator Parameters ---------- @@ -221,6 +221,12 @@ def export_html(estimator, print_changed_only=True): print_changed_only : bool, optional (default=True) If True, only the parameters that were set to non-default values will be printed when printing an estimator. + + Returns + ------- + html: str or iPython HTML object + HTML representation of estimator. When called in jupyter notebook or + lab, a iPython HTML object is returned. """ with config_context(print_changed_only=print_changed_only), \ @@ -245,8 +251,13 @@ def export_html(estimator, print_changed_only=True): html_output = out.getvalue() + # wrap in iPython HTML if in a notebook context try: + cls_name = get_ipython().__class__.__name__ + if cls_name != 'ZMQInteractiveShell': + return html_output + from IPython.display import HTML return HTML(html_output) - except ImportError: + except (ImportError, NameError): return html_output diff --git a/sklearn/inspection/tests/test_plot_estimators.py b/sklearn/inspection/tests/test_plot_estimators.py index 4d83b30ab6d63..0f4c6ad74907c 100644 --- a/sklearn/inspection/tests/test_plot_estimators.py +++ b/sklearn/inspection/tests/test_plot_estimators.py @@ -335,7 +335,4 @@ def test_export_html(): ]) html_output = export_html(pipe) - # IPython HTML - if hasattr(html_output, 'data'): - html_output = html_output.data assert expected_export_html == html_output.replace(' ', '') From 8b015e1f5f67011091d780c25133b1b08dac7122 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Fri, 28 Jun 2019 16:06:40 -0400 Subject: [PATCH 03/81] ENH Updates style --- sklearn/inspection/_plot_estimators.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py index 61a329cda74b4..313a774c4c418 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_plot_estimators.py @@ -176,9 +176,8 @@ def _write_estimator_html(out, estimator, name): display: flex; flex-direction: column; align-items: flex-start; -} -.sk-container > .sk-serial::before { - border-left: 0; + position: relative; + float: left; } [sk-data-tooltip] { position: relative; From 893dbfc68533899727076385c994030ef973ce4b Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sat, 29 Jun 2019 22:28:29 -0400 Subject: [PATCH 04/81] TST refactor test_numeric_stability (#14221) --- sklearn/preprocessing/tests/test_discretization.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py index 102b789eb093d..6dd0abdb99e9f 100644 --- a/sklearn/preprocessing/tests/test_discretization.py +++ b/sklearn/preprocessing/tests/test_discretization.py @@ -135,15 +135,15 @@ def test_transform_1d_behavior(): assert_raises(ValueError, est.transform, X) -def test_numeric_stability(): +@pytest.mark.parametrize('i', range(1, 9)) +def test_numeric_stability(i): X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1) Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1) # Test up to discretizing nano units - for i in range(1, 9): - X = X_init / 10**i - Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X) - assert_array_equal(Xt_expected, Xt) + X = X_init / 10**i + Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X) + assert_array_equal(Xt_expected, Xt) def test_invalid_encode_option(): From 9ccdbd06bcf3d3f4d84883f2229e8e0d02ed5171 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 8 Jul 2019 11:07:07 -0500 Subject: [PATCH 05/81] CLN Renames function --- sklearn/inspection/__init__.py | 4 ++-- sklearn/inspection/_plot_estimators.py | 2 +- sklearn/inspection/tests/test_plot_estimators.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index d3b6a32430663..767b14a2cce63 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,11 +1,11 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" from .partial_dependence import partial_dependence from .partial_dependence import plot_partial_dependence -from ._plot_estimators import export_html +from ._plot_estimators import display_estimator __all__ = [ 'partial_dependence', 'plot_partial_dependence', - 'export_html' + 'display_estimator' ] diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py index 313a774c4c418..73fb6a8ef89cc 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_plot_estimators.py @@ -209,7 +209,7 @@ def _write_estimator_html(out, estimator, name): """ -def export_html(estimator, print_changed_only=True): +def display_estimator(estimator, print_changed_only=True): """Build a HTML representation of an estimator Parameters diff --git a/sklearn/inspection/tests/test_plot_estimators.py b/sklearn/inspection/tests/test_plot_estimators.py index 0f4c6ad74907c..0bf2169e0a33f 100644 --- a/sklearn/inspection/tests/test_plot_estimators.py +++ b/sklearn/inspection/tests/test_plot_estimators.py @@ -17,7 +17,7 @@ from sklearn.inspection._plot_estimators import _write_label_html from sklearn.inspection._plot_estimators import _estimator_tool_tip from sklearn.inspection._plot_estimators import _type_of_html_estimator -from sklearn.inspection._plot_estimators import export_html +from sklearn.inspection._plot_estimators import display_estimator from sklearn.inspection._plot_estimators import _STYLE @@ -120,7 +120,7 @@ def test_type_of_html_estimator_column_transformer(): assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] -expected_export_html = """ @@ -302,7 +302,7 @@ def test_type_of_html_estimator_column_transformer(): """.format(style=_STYLE).replace('\n', '').replace(' ', '') -def test_export_html(): +def test_display_estimator(): num_trans = Pipeline(steps=[ ('pass', 'passthrough'), ('imputer', SimpleImputer(strategy='median')) @@ -333,6 +333,6 @@ def test_export_html(): pipe = Pipeline([ ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) ]) - html_output = export_html(pipe) + html_output = display_estimator(pipe) - assert expected_export_html == html_output.replace(' ', '') + assert expected_display_estimator == html_output.replace(' ', '') From 4df33f82f55036f0c9bcdd825d3a64bd49db552e Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 18 Jul 2019 16:23:21 -0400 Subject: [PATCH 06/81] ENH Adds sphinx extension to visiualize --- doc/conf.py | 3 +- doc/modules/compose.rst | 40 +++++++++++++++++ doc/sphinxext/display_html.py | 44 +++++++++++++++++++ .../plot_column_transformer_mixed_types.py | 5 ++- sklearn/inspection/_plot_estimators.py | 39 +++++++++------- 5 files changed, 114 insertions(+), 17 deletions(-) create mode 100644 doc/sphinxext/display_html.py diff --git a/doc/conf.py b/doc/conf.py index 251b25838d57b..7d7cbf6936a42 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -37,7 +37,8 @@ 'sphinx.ext.imgconverter', 'sphinx_gallery.gen_gallery', 'sphinx_issues', - 'custom_references_resolver' + 'custom_references_resolver', + 'display_html' ] # this is needed for some reason... diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 0ac33ce7a4d4a..ae0f203011406 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -512,6 +512,46 @@ above example would be:: ('countvectorizer', CountVectorizer(), 'title')]) +.. _visualizing_composite_estimators: + +Visualizing Composite Estimators +================================ + +:func:`sklearn.inspection.display_estimator` outputs a html representation of +composite estimators. This can be useful to diagnose or visualize a Pipeline +with may estimators. For example, the estimator defined in +The composite estimator defined in +:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` +can be visualized as: + +.. display_html:: + + from sklearn.compose import ColumnTransformer + from sklearn.pipeline import Pipeline + from sklearn.impute import SimpleImputer + from sklearn.preprocessing import StandardScaler, OneHotEncoder + from sklearn.linear_model import LogisticRegression + from sklearn.inspection import display_estimator + + numeric_features = ['age', 'fare'] + numeric_transformer = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='median')), + ('scaler', StandardScaler())]) + + categorical_features = ['embarked', 'sex', 'pclass'] + categorical_transformer = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), + ('onehot', OneHotEncoder(handle_unknown='ignore'))]) + + preprocessor = ColumnTransformer( + transformers=[ + ('num', numeric_transformer, numeric_features), + ('cat', categorical_transformer, categorical_features)]) + + clf = Pipeline(steps=[('preprocessor', preprocessor), + ('classifier', LogisticRegression())]) + print(display_estimator(clf)) + .. topic:: Examples: * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py` diff --git a/doc/sphinxext/display_html.py b/doc/sphinxext/display_html.py new file mode 100644 index 0000000000000..31bcaf83280d0 --- /dev/null +++ b/doc/sphinxext/display_html.py @@ -0,0 +1,44 @@ +""" +Primary used to display the html output `sklearn.inspection.display_estimator` +in sphinx. +""" +import sys +from docutils.parsers.rst import Directive +from docutils import nodes +from io import StringIO + + +class ExecuteHTML(Directive): + + has_content = True + required_arguments = 0 + optional_arguments = 0 + + @classmethod + def execute(cls, code): + orig_stdout, orig_stderr = sys.stdout, sys.stderr + + output, err = StringIO(), StringIO() + + sys.stdout, sys.stderr = output, err + exec(code) + sys.stdout, sys.stderr = orig_stdout, orig_stderr + + return "".join(['
', + output.getvalue(), err.getvalue(), "
"]) + + def run(self): + output = [] + code = "\n".join(self.content) + code_results = self.execute(code) + + input_code = nodes.literal_block(code, code) + input_code['language'] = 'python' + output.append(input_code) + code_results = nodes.raw('', code_results, format='html') + output.append(code_results) + return output + + +def setup(app): + app.add_directive('display_html', ExecuteHTML) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 264ae7495296c..ddbc5fe128d78 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -79,10 +79,13 @@ clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test)) +############################################################################### +# A HTML visualization of this classifier can be found in +# :ref:`visualizing_composite_estimators`. ############################################################################### # Using the prediction pipeline in a grid search -############################################################################### +# ---------------------------------------------- # Grid search can also be performed on the different preprocessing steps # defined in the ``ColumnTransformer`` object, together with the classifier's # hyperparameters as part of the ``Pipeline``. diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py index 73fb6a8ef89cc..f4c150362dca8 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_plot_estimators.py @@ -206,6 +206,9 @@ def _write_estimator_html(out, estimator, name): opacity: 1; z-index: 2; } +.sk-top-container { + display: flex; +} """ @@ -232,31 +235,37 @@ def display_estimator(estimator, print_changed_only=True): closing(StringIO()) as out: if not isinstance(estimator, Pipeline): - estimator = Pipeline([('', estimator)]) + estimator = Pipeline([(estimator.__class__.__name__, estimator)]) - out.write('') - out.write('
') + out.write('
') _write_estimator_html(out, estimator, '') - out.write('
') # sk-container - - # Adds whitespace at the end to allow space for hover info - out.write('
') - out.write(_estimator_tool_tip(estimator.steps[-1])) - out.write('
') # sk-final-spacer - out.write("") - - html_output = out.getvalue() + out.write('
') # sk-top-container # sk-container # wrap in iPython HTML if in a notebook context try: cls_name = get_ipython().__class__.__name__ if cls_name != 'ZMQInteractiveShell': - return html_output + out.write("") + return out.getvalue() + + # Adds whitespace at the end to allow space for hover info + # in jupyter notebook or lab + largest_est_repr = "" + for est in estimator.steps: + est_repr = _estimator_tool_tip(est) + if len(est_repr) > len(largest_est_repr): + largest_est_repr = est_repr + out.write('
') + out.write(largest_est_repr) + out.write('
') # sk-final-spacer from IPython.display import HTML - return HTML(html_output) + out.write("") + return HTML(out.getvalue()) except (ImportError, NameError): - return html_output + out.write("") + return out.getvalue() From 8f57191c3dec5250c52d3a4c7d5e3dbc7cca460c Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Fri, 19 Jul 2019 10:08:10 -0400 Subject: [PATCH 07/81] MNT Sets font color --- sklearn/inspection/_plot_estimators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_plot_estimators.py index f4c150362dca8..5303ee872910a 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_plot_estimators.py @@ -208,6 +208,7 @@ def _write_estimator_html(out, estimator, name): } .sk-top-container { display: flex; + color: black; } """ From 677e35a8bfb6458b242ac71ba47921bccfee79b7 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Sat, 20 Jul 2019 20:50:46 -0400 Subject: [PATCH 08/81] ENH Update styling --- sklearn/inspection/__init__.py | 2 +- ...ot_estimators.py => _display_estimator.py} | 53 ++- .../tests/test_display_estimator.py | 367 ++++++++++++++++++ .../inspection/tests/test_plot_estimators.py | 338 ---------------- 4 files changed, 390 insertions(+), 370 deletions(-) rename sklearn/inspection/{_plot_estimators.py => _display_estimator.py} (87%) create mode 100644 sklearn/inspection/tests/test_display_estimator.py delete mode 100644 sklearn/inspection/tests/test_plot_estimators.py diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 767b14a2cce63..d890bff76f8f4 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -1,7 +1,7 @@ """The :mod:`sklearn.inspection` module includes tools for model inspection.""" from .partial_dependence import partial_dependence from .partial_dependence import plot_partial_dependence -from ._plot_estimators import display_estimator +from ._display_estimator import display_estimator __all__ = [ diff --git a/sklearn/inspection/_plot_estimators.py b/sklearn/inspection/_display_estimator.py similarity index 87% rename from sklearn/inspection/_plot_estimators.py rename to sklearn/inspection/_display_estimator.py index 5303ee872910a..165cb97029889 100644 --- a/sklearn/inspection/_plot_estimators.py +++ b/sklearn/inspection/_display_estimator.py @@ -19,8 +19,9 @@ def _estimator_tool_tip(estimator): def _write_label_html(out, name, tool_tip): """Write label to html""" - out.write('
' - '{}
'.format(tool_tip, name)) + out.write('
' + '
' + '{}
'.format(tool_tip, name)) _EstHTMLInfo = namedtuple('_EstHTMLInfo', @@ -113,6 +114,7 @@ def _write_estimator_html(out, estimator, name): padding: 0.5em; margin: 0.25em 0.25em; border: 1px dotted black; + border-radius: 0.25em; text-align: center; } .sk-parallel-item::after { @@ -144,6 +146,7 @@ def _write_estimator_html(out, estimator, name): display: flex; flex-direction: column; position: relative; + background: white; } .sk-parallel-item:first-child::after { align-self: flex-end; @@ -153,11 +156,6 @@ def _write_estimator_html(out, estimator, name): align-self: flex-start; width: 50%; } -.sk-final-spacer { - visibility: hidden; - font-family: monospace; - white-space: pre; -} .sk-dashed-wrapped { border: 1px dashed gray; padding: 0.25em; @@ -166,11 +164,20 @@ def _write_estimator_html(out, estimator, name): text-align: center; font-family: monospace; font-weight: bold; - margin: 0; background: white; + display: inline-block; + border: 1px dotted rgb(171, 171, 171); + border-radius: 0.25em; + padding: 0.2em 0.5em; + margin: 0.1em; +} +.sk-label-container { + text-align: center; + border: #f0f8ff solid red; } .sk-serial-item { margin-bottom: 0.25em; + background: white; } .sk-container { display: flex; @@ -186,12 +193,9 @@ def _write_estimator_html(out, estimator, name): [sk-data-tooltip]:before { visibility: hidden; opacity: 0; - pointer-events: none; font-weight: 400; -} -[sk-data-tooltip]:before { position: absolute; - top: 0; + top: 100%; left: 0; padding: 0.5em; overflow: hidden; @@ -238,35 +242,22 @@ def display_estimator(estimator, print_changed_only=True): if not isinstance(estimator, Pipeline): estimator = Pipeline([(estimator.__class__.__name__, estimator)]) - out.write('') out.write('
') _write_estimator_html(out, estimator, '') out.write('
') # sk-top-container # sk-container + out.write('') + html_output = out.getvalue() # wrap in iPython HTML if in a notebook context try: cls_name = get_ipython().__class__.__name__ if cls_name != 'ZMQInteractiveShell': - out.write("") - return out.getvalue() - - # Adds whitespace at the end to allow space for hover info - # in jupyter notebook or lab - largest_est_repr = "" - for est in estimator.steps: - est_repr = _estimator_tool_tip(est) - if len(est_repr) > len(largest_est_repr): - largest_est_repr = est_repr - out.write('
') - out.write(largest_est_repr) - out.write('
') # sk-final-spacer - + return html_output from IPython.display import HTML - out.write("") - return HTML(out.getvalue()) + return HTML(html_output) except (ImportError, NameError): - out.write("") - return out.getvalue() + return html_output diff --git a/sklearn/inspection/tests/test_display_estimator.py b/sklearn/inspection/tests/test_display_estimator.py new file mode 100644 index 0000000000000..d02f2eb45aed1 --- /dev/null +++ b/sklearn/inspection/tests/test_display_estimator.py @@ -0,0 +1,367 @@ +from contextlib import closing +from io import StringIO + +import pytest + +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier +from sklearn.impute import SimpleImputer +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.pipeline import Pipeline +from sklearn.pipeline import FeatureUnion +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import VotingClassifier +from sklearn.feature_selection import SelectPercentile +from sklearn.preprocessing import OneHotEncoder +from sklearn.inspection._display_estimator import _write_label_html +from sklearn.inspection._display_estimator import _estimator_tool_tip +from sklearn.inspection._display_estimator import _type_of_html_estimator +from sklearn.inspection._display_estimator import display_estimator +from sklearn.inspection._display_estimator import _STYLE + + +@pytest.mark.parametrize('est, expected', [ + ('None', 'None'), + ('passthrough', 'passthrough'), + ('hello\nworld', 'hello world') +]) +def test_estimator_tool_tip(est, expected): + assert expected == _estimator_tool_tip(est) + + +def test_write_label_html(): + name = "LogisticRegression" + tool_tip = "hello-world" + + expected = ('
' + 'LogisticRegression
') + + with closing(StringIO()) as out: + _write_label_html(out, name, tool_tip) + html_label = out.getvalue() + assert html_label == expected + + +def test_type_of_html_estimator_error(): + with pytest.raises(ValueError, match="Invalid estimator"): + _type_of_html_estimator(100) + + +@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) +def test_type_of_html_estimator_single_str_none(est): + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators == est + assert est_html_info.names == str(est) + assert est_html_info.name_tips == str(est) + + +def test_type_of_html_estimator_single_estimator(): + est = LogisticRegression(C=10.0) + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators == est + assert est_html_info.names == est.__class__.__name__ + assert est_html_info.name_tips == _estimator_tool_tip(est) + + +def test_type_of_html_estimator_pipeline(): + pipe = Pipeline([ + ('imputer', SimpleImputer()), + ('classifier', LogisticRegression()) + ]) + est_html_info = _type_of_html_estimator(pipe) + assert est_html_info.type == 'serial' + assert est_html_info.estimators == [step[1] for step in pipe.steps] + assert est_html_info.names == ['imputer', 'classifier'] + assert est_html_info.name_tips == [_estimator_tool_tip(step[1]) + for step in pipe.steps] + + +def test_type_of_html_estimator_feature_union(): + f_union = FeatureUnion([ + ('pca', PCA()), ('svd', TruncatedSVD()) + ]) + est_html_info = _type_of_html_estimator(f_union) + assert est_html_info.type == 'parallel' + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.estimators == [trans[1] + for trans in f_union.transformer_list] + assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) + for trans in f_union.transformer_list] + + +def test_type_of_html_estimator_voting(): + clf = VotingClassifier([ + ('log_reg', LogisticRegression()), + ('mlp', MLPClassifier()) + ]) + est_html_info = _type_of_html_estimator(clf) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in clf.estimators] + assert est_html_info.names == ['log_reg', 'mlp'] + assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) + for trans in clf.estimators] + + +def test_type_of_html_estimator_column_transformer(): + ct = ColumnTransformer([ + ('pca', PCA(), ['num1', 'num2']), + ('svd', TruncatedSVD, [0, 3]) + ]) + est_html_info = _type_of_html_estimator(ct) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in ct.transformers] + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] + + +expected_display_estimator = """ + +
+
+
+
+
+
+ preprocessor +
+
+
+
+
+
+ num +
+
+
+
+
+
+ passthrough +
+
+
+
+ SimpleImputer +
+
+
+
+
+
+
+
cat
+
+
+
+
+
+ SimpleImputer +
+
+
+
+ OneHotEncoder +
+
+
+
+
+
+
+
+
+
+ feat_u +
+
+
+
+
+
+ pca +
+
+
+
+
+ PCA +
+
+
+
+
+
+
+ tsvd +
+
+
+
+
+
+ TruncatedSVD +
+
+
+
+ SelectPercentile +
+
+
+
+
+
+
+
+
+
+ classifier +
+
+
+
+
+
+ lr +
+
+
+
+
+ LogisticRegression +
+
+
+
+
+
+
+ mlp +
+
+
+
+
+ MLPClassifier +
+
+
+
+
+
+
+
+
+ + +""".format(style=_STYLE).replace('\n', '').replace(' ', '') + + +def test_display_estimator(): + num_trans = Pipeline(steps=[ + ('pass', 'passthrough'), + ('imputer', SimpleImputer(strategy='median')) + ]) + + cat_trans = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='constant', + missing_values='empty')), + ('one-hot', OneHotEncoder()) + ]) + + preprocess = ColumnTransformer([ + ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), + ('cat', cat_trans, [0, 1, 2, 3]) + ]) + + feat_u = FeatureUnion([ + ('pca', PCA(n_components=1)), + ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), + ('select', SelectPercentile())])) + ]) + + clf = VotingClassifier([ + ('lr', LogisticRegression(solver='lbfgs', random_state=1)), + ('mlp', MLPClassifier(alpha=0.001)) + ]) + + pipe = Pipeline([ + ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) + ]) + html_output = display_estimator(pipe) + + assert expected_display_estimator == html_output.replace(' ', '') diff --git a/sklearn/inspection/tests/test_plot_estimators.py b/sklearn/inspection/tests/test_plot_estimators.py deleted file mode 100644 index 0bf2169e0a33f..0000000000000 --- a/sklearn/inspection/tests/test_plot_estimators.py +++ /dev/null @@ -1,338 +0,0 @@ -from contextlib import closing -from io import StringIO - -import pytest - -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier -from sklearn.impute import SimpleImputer -from sklearn.decomposition import PCA -from sklearn.decomposition import TruncatedSVD -from sklearn.pipeline import Pipeline -from sklearn.pipeline import FeatureUnion -from sklearn.compose import ColumnTransformer -from sklearn.ensemble import VotingClassifier -from sklearn.feature_selection import SelectPercentile -from sklearn.preprocessing import OneHotEncoder -from sklearn.inspection._plot_estimators import _write_label_html -from sklearn.inspection._plot_estimators import _estimator_tool_tip -from sklearn.inspection._plot_estimators import _type_of_html_estimator -from sklearn.inspection._plot_estimators import display_estimator -from sklearn.inspection._plot_estimators import _STYLE - - -@pytest.mark.parametrize('est, expected', [ - ('None', 'None'), - ('passthrough', 'passthrough'), - ('hello\nworld', 'hello world') -]) -def test_estimator_tool_tip(est, expected): - assert expected == _estimator_tool_tip(est) - - -def test_write_label_html(): - name = "LogisticRegression" - tool_tip = "hello-world" - - expected = ('
' - 'LogisticRegression
') - - with closing(StringIO()) as out: - _write_label_html(out, name, tool_tip) - html_label = out.getvalue() - assert html_label == expected - - -def test_type_of_html_estimator_error(): - with pytest.raises(ValueError, match="Invalid estimator"): - _type_of_html_estimator(100) - - -@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) -def test_type_of_html_estimator_single_str_none(est): - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == str(est) - assert est_html_info.name_tips == str(est) - - -def test_type_of_html_estimator_single_estimator(): - est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == est.__class__.__name__ - assert est_html_info.name_tips == _estimator_tool_tip(est) - - -def test_type_of_html_estimator_pipeline(): - pipe = Pipeline([ - ('imputer', SimpleImputer()), - ('classifier', LogisticRegression()) - ]) - est_html_info = _type_of_html_estimator(pipe) - assert est_html_info.type == 'serial' - assert est_html_info.estimators == [step[1] for step in pipe.steps] - assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_tips == [_estimator_tool_tip(step[1]) - for step in pipe.steps] - - -def test_type_of_html_estimator_feature_union(): - f_union = FeatureUnion([ - ('pca', PCA()), ('svd', TruncatedSVD()) - ]) - est_html_info = _type_of_html_estimator(f_union) - assert est_html_info.type == 'parallel' - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.estimators == [trans[1] - for trans in f_union.transformer_list] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in f_union.transformer_list] - - -def test_type_of_html_estimator_voting(): - clf = VotingClassifier([ - ('log_reg', LogisticRegression()), - ('mlp', MLPClassifier()) - ]) - est_html_info = _type_of_html_estimator(clf) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in clf.estimators] - assert est_html_info.names == ['log_reg', 'mlp'] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in clf.estimators] - - -def test_type_of_html_estimator_column_transformer(): - ct = ColumnTransformer([ - ('pca', PCA(), ['num1', 'num2']), - ('svd', TruncatedSVD, [0, 3]) - ]) - est_html_info = _type_of_html_estimator(ct) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in ct.transformers] - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] - - -expected_display_estimator = """ - -
-
-
-
- preprocessor -
-
-
-
- num -
-
-
-
-
- passthrough -
-
-
-
- SimpleImputer -
-
-
-
-
-
-
cat
-
-
-
-
- SimpleImputer -
-
-
-
- OneHotEncoder -
-
-
-
-
-
-
-
-
- feat_u -
-
-
-
- pca -
-
-
-
- PCA -
-
-
-
-
-
- tsvd -
-
-
-
-
- TruncatedSVD -
-
-
-
- SelectPercentile -
-
-
-
-
-
-
-
-
- classifier -
-
-
-
- lr -
-
-
-
- LogisticRegression -
-
-
-
-
-
- mlp -
-
-
-
- MLPClassifier -
-
-
-
-
-
-
-
-
- (\'classifier\', VotingClassifier(estimators=[(\'lr\', - LogisticRegression(random_state=1)), (\'mlp\', - MLPClassifier(alpha=0.001))])) -
- -""".format(style=_STYLE).replace('\n', '').replace(' ', '') - - -def test_display_estimator(): - num_trans = Pipeline(steps=[ - ('pass', 'passthrough'), - ('imputer', SimpleImputer(strategy='median')) - ]) - - cat_trans = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='constant', - missing_values='empty')), - ('one-hot', OneHotEncoder()) - ]) - - preprocess = ColumnTransformer([ - ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), - ('cat', cat_trans, [0, 1, 2, 3]) - ]) - - feat_u = FeatureUnion([ - ('pca', PCA(n_components=1)), - ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), - ('select', SelectPercentile())])) - ]) - - clf = VotingClassifier([ - ('lr', LogisticRegression(solver='lbfgs', random_state=1)), - ('mlp', MLPClassifier(alpha=0.001)) - ]) - - pipe = Pipeline([ - ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) - ]) - html_output = display_estimator(pipe) - - assert expected_display_estimator == html_output.replace(' ', '') From 343773a41877fb855c00f8fda58a3f9461a5f824 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 22 Jul 2019 15:09:51 -0400 Subject: [PATCH 09/81] STY Update styling --- sklearn/inspection/_display_estimator.py | 28 +- .../tests/test_display_estimator.py | 343 ++++++++++-------- 2 files changed, 210 insertions(+), 161 deletions(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 165cb97029889..f3c2e2c6c3561 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -61,6 +61,18 @@ def _type_of_html_estimator(estimator): name_tips = [_estimator_tool_tip(est) for est in estimators] return _EstHTMLInfo('parallel', estimators, names, name_tips) + elif hasattr(estimator, "estimator"): + name = estimator.__class__.__name__ + name_tip = _estimator_tool_tip(estimator) + inner_estimator = estimator.estimator + return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + + elif hasattr(estimator, "base_estimator"): + name = estimator.__class__.__name__ + name_tip = _estimator_tool_tip(estimator) + inner_estimator = estimator.base_estimator + return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + elif isinstance(estimator, BaseEstimator): name = estimator.__class__.__name__ tool_tip = _estimator_tool_tip(estimator) @@ -99,7 +111,12 @@ def _write_estimator_html(out, estimator, name): _write_estimator_html(out, est, name) out.write('') # sk-parallel-item sk-serial out.write('') # sk-parallel sk-serial-item - + elif est_html_info.type == 'single-meta': + out.write('
') + _write_label_html(out, est_html_info.names, est_html_info.name_tips) + _write_estimator_html(out, est_html_info.estimators, + est_html_info.estimators.__class__.__name__) + out.write('
') # sk-serial-item # sk-serial elif est_html_info.type == 'single': out.write('
' '
' @@ -158,7 +175,7 @@ def _write_estimator_html(out, estimator, name): } .sk-dashed-wrapped { border: 1px dashed gray; - padding: 0.25em; + padding: 0 0.25em 0.25em 0.25em; } .sk-label { text-align: center; @@ -166,10 +183,8 @@ def _write_estimator_html(out, estimator, name): font-weight: bold; background: white; display: inline-block; - border: 1px dotted rgb(171, 171, 171); - border-radius: 0.25em; - padding: 0.2em 0.5em; - margin: 0.1em; + text-decoration: underline; + margin: 0 0.5em; } .sk-label-container { text-align: center; @@ -213,6 +228,7 @@ def _write_estimator_html(out, estimator, name): .sk-top-container { display: flex; color: black; + padding-bottom: 1em; } """ diff --git a/sklearn/inspection/tests/test_display_estimator.py b/sklearn/inspection/tests/test_display_estimator.py index d02f2eb45aed1..63f9489dc69ef 100644 --- a/sklearn/inspection/tests/test_display_estimator.py +++ b/sklearn/inspection/tests/test_display_estimator.py @@ -14,6 +14,8 @@ from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectPercentile from sklearn.preprocessing import OneHotEncoder +from sklearn.svm import LinearSVC +from sklearn.multiclass import OneVsOneClassifier from sklearn.inspection._display_estimator import _write_label_html from sklearn.inspection._display_estimator import _estimator_tool_tip from sklearn.inspection._display_estimator import _type_of_html_estimator @@ -122,201 +124,201 @@ def test_type_of_html_estimator_column_transformer(): expected_display_estimator = """ - -
-
-
-
-
-
- preprocessor + +
+
+
+
+
+
+ preprocessor +
-
-
-
-
-
- num +
+
+
+
+ num +
-
-
-
-
- passthrough +
+
+
+ passthrough +
-
-
-
- SimpleImputer +
+
+ SimpleImputer +
-
-
-
-
cat
-
-
+
+
+
cat +
+
-
-
- SimpleImputer +
+
+
+ SimpleImputer +
-
-
-
- OneHotEncoder +
+
+ OneHotEncoder +
-
-
-
-
- feat_u -
-
-
-
-
-
- pca -
-
-
-
-
- PCA -
-
+
+
+
+ feat_u
-
-
-
- tsvd +
+
+
+
+ pca +
-
-
- TruncatedSVD + PCA
-
-
- SelectPercentile +
+
+
+
+
+ tsvd +
+
+
+
+
+
+ TruncatedSVD +
+
+
+
+ SelectPercentile +
-
-
-
-
- classifier -
-
-
-
-
-
- lr -
+
+
+
+ classifier
-
-
+
+
+
+
- LogisticRegression + lr
-
-
-
-
-
- mlp +
+
+
+ LogisticRegression +
+
-
-
+
+
- MLPClassifier + mlp +
+
+
+
+
+ MLPClassifier +
@@ -325,9 +327,7 @@ def test_type_of_html_estimator_column_transformer():
-
- - + """.format(style=_STYLE).replace('\n', '').replace(' ', '') @@ -365,3 +365,36 @@ def test_display_estimator(): html_output = display_estimator(pipe) assert expected_display_estimator == html_output.replace(' ', '') + + +expected_display_estimator_ovo = """ + +
+
+
+
+
+
+ OneVsOneClassifier +
+
+
+
+ LinearSVC +
+
+
+
+
+
+ +""".format(style=_STYLE).replace('\n', '').replace(' ', '') + + +def test_display_estimator_ovo_classifier(): + ovo = OneVsOneClassifier(LinearSVC()) + html_output = display_estimator(ovo) + assert expected_display_estimator_ovo == html_output.replace(' ', '') From 50ed9f339f11ba060eae88276e1c47312a4ce715 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 25 Jul 2019 15:19:39 -0400 Subject: [PATCH 10/81] STY Removes underline --- sklearn/inspection/_display_estimator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index f3c2e2c6c3561..70ae509673c38 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -183,7 +183,6 @@ def _write_estimator_html(out, estimator, name): font-weight: bold; background: white; display: inline-block; - text-decoration: underline; margin: 0 0.5em; } .sk-label-container { From 1598cade87ff6ec4b0191274a0adf4814091a3b1 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 31 Jul 2019 11:32:18 -0400 Subject: [PATCH 11/81] ENH Updates style --- sklearn/inspection/_display_estimator.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 70ae509673c38..6d55cf09e38ae 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -67,11 +67,11 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - elif hasattr(estimator, "base_estimator"): - name = estimator.__class__.__name__ - name_tip = _estimator_tool_tip(estimator) - inner_estimator = estimator.base_estimator - return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + # elif hasattr(estimator, "base_estimator"): + # name = estimator.__class__.__name__ + # name_tip = _estimator_tool_tip(estimator) + # inner_estimator = estimator.base_estimator + # return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) elif isinstance(estimator, BaseEstimator): name = estimator.__class__.__name__ @@ -158,6 +158,7 @@ def _write_estimator_html(out, estimator, name): .sk-parallel { display: flex; align-items: stretch; + justify-content: center; } .sk-parallel-item { display: flex; @@ -173,17 +174,21 @@ def _write_estimator_html(out, estimator, name): align-self: flex-start; width: 50%; } +.sk-parallel-item:only-child::after { + width: 0; +} .sk-dashed-wrapped { border: 1px dashed gray; padding: 0 0.25em 0.25em 0.25em; } .sk-label { - text-align: center; + min-width: 70%; font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; + line-height: 1.4em; } .sk-label-container { text-align: center; @@ -227,7 +232,7 @@ def _write_estimator_html(out, estimator, name): .sk-top-container { display: flex; color: black; - padding-bottom: 1em; + padding-bottom: 2em; } """ From 464f6d86f7c95630563da1ea4054e9fc4f56dc5a Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 1 Aug 2019 14:42:53 -0400 Subject: [PATCH 12/81] ENH Update style --- sklearn/inspection/_display_estimator.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 6d55cf09e38ae..25c2be2039454 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -67,12 +67,6 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - # elif hasattr(estimator, "base_estimator"): - # name = estimator.__class__.__name__ - # name_tip = _estimator_tool_tip(estimator) - # inner_estimator = estimator.base_estimator - # return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - elif isinstance(estimator, BaseEstimator): name = estimator.__class__.__name__ tool_tip = _estimator_tool_tip(estimator) @@ -182,17 +176,14 @@ def _write_estimator_html(out, estimator, name): padding: 0 0.25em 0.25em 0.25em; } .sk-label { - min-width: 70%; font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; - line-height: 1.4em; } .sk-label-container { text-align: center; - border: #f0f8ff solid red; } .sk-serial-item { margin-bottom: 0.25em; From f4d882c0f837f287c32e091cae5731e451106e9c Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 5 Aug 2019 15:10:39 -0400 Subject: [PATCH 13/81] STY Update styling --- sklearn/inspection/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 25c2be2039454..1c533db0f6440 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -205,7 +205,7 @@ def _write_estimator_html(out, estimator, name): opacity: 0; font-weight: 400; position: absolute; - top: 100%; + top: 0; left: 0; padding: 0.5em; overflow: hidden; From c4cfe63b4d1d1aadc9c277858f1bb52173561b9e Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 11 Mar 2020 10:38:02 -0400 Subject: [PATCH 14/81] CLN Address comments --- doc/sphinxext/display_html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinxext/display_html.py b/doc/sphinxext/display_html.py index 31bcaf83280d0..ed175a84aee5b 100644 --- a/doc/sphinxext/display_html.py +++ b/doc/sphinxext/display_html.py @@ -9,6 +9,7 @@ class ExecuteHTML(Directive): + "Execute Python code and includes stdout as HTML" has_content = True required_arguments = 0 From 92be3e9bdd519025ce9dae13ce144c02ba54d9ea Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 12 Mar 2020 21:48:14 -0400 Subject: [PATCH 15/81] ENH Makes display_estimator privatte --- doc/conf.py | 1 - .../{inspection => }/_display_estimator.py | 167 +++++--- sklearn/base.py | 4 + sklearn/inspection/__init__.py | 2 - .../tests/test_display_estimator.py | 400 ------------------ sklearn/tests/test_display_estimator.py | 176 ++++++++ 6 files changed, 288 insertions(+), 462 deletions(-) rename sklearn/{inspection => }/_display_estimator.py (62%) delete mode 100644 sklearn/inspection/tests/test_display_estimator.py create mode 100644 sklearn/tests/test_display_estimator.py diff --git a/doc/conf.py b/doc/conf.py index ee8bdb3768eb5..778dad7554a70 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,7 +39,6 @@ 'sphinx.ext.imgconverter', 'sphinx_gallery.gen_gallery', 'sphinx_issues', - 'display_html' ] # this is needed for some reason... diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/_display_estimator.py similarity index 62% rename from sklearn/inspection/_display_estimator.py rename to sklearn/_display_estimator.py index 1c533db0f6440..06755b8f9c019 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,76 +1,93 @@ from collections import namedtuple from contextlib import closing from io import StringIO +import uuid -from .._config import config_context -from ..base import BaseEstimator -from ..pipeline import Pipeline -from ..pipeline import FeatureUnion -from ..compose import ColumnTransformer -from ..ensemble import VotingClassifier, VotingRegressor - -def _estimator_tool_tip(estimator): +def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly - display tooltips. + display estimator details. """ return str(estimator).replace('\n', ' ') +def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class): + out.write( + f'
' + f'
') + + if tool_tip is not None: + est_id = uuid.uuid4() + out.write(f'' + f'' + f'
{tool_tip}'
+                  f'
') + else: + out.write(f'') + out.write('
') # outer_class inner_class + + def _write_label_html(out, name, tool_tip): """Write label to html""" - out.write('
' - '
' - '{}
'.format(tool_tip, name)) + _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label") _EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_tips') + 'type, estimators, names, name_details') def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ + from sklearn.base import BaseEstimator + from sklearn.pipeline import Pipeline + from sklearn.pipeline import FeatureUnion + from sklearn.compose import ColumnTransformer + from sklearn.ensemble import VotingClassifier, VotingRegressor + if isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) + return _EstHTMLInfo('single', [estimator], [estimator], [estimator]) elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') + return _EstHTMLInfo('single', [estimator], ['None'], ['None']) elif isinstance(estimator, Pipeline): estimators = [step[1] for step in estimator.steps] names = [step[0] for step in estimator.steps] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('serial', estimators, names, name_tips) + name_details = [_estimator_details(est) for est in estimators] + return _EstHTMLInfo('serial', estimators, names, name_details) elif isinstance(estimator, ColumnTransformer): estimators = [trans[1] for trans in estimator.transformers] names = [trans[0] for trans in estimator.transformers] - name_tips = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) elif isinstance(estimator, FeatureUnion): estimators = [trans[1] for trans in estimator.transformer_list] names = [trans[0] for trans in estimator.transformer_list] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) elif isinstance(estimator, (VotingClassifier, VotingRegressor)): estimators = [est[1] for est in estimator.estimators] names = [est[0] for est in estimator.estimators] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) elif hasattr(estimator, "estimator"): - name = estimator.__class__.__name__ - name_tip = _estimator_tool_tip(estimator) + names = [estimator.__class__.__name__] + name_details = [_estimator_details(estimator)] inner_estimator = estimator.estimator - return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + return _EstHTMLInfo('single-meta', [inner_estimator], names, + name_details) elif isinstance(estimator, BaseEstimator): - name = estimator.__class__.__name__ - tool_tip = _estimator_tool_tip(estimator) - return _EstHTMLInfo('single', estimator, name, tool_tip) + names = [estimator.__class__.__name__] + tool_tips = [_estimator_details(estimator)] + return _EstHTMLInfo('single', [estimator], names, tool_tips) else: raise ValueError("Invalid estimator") @@ -84,7 +101,7 @@ def _write_estimator_html(out, estimator, name): if est_html_info.type == 'serial': out.write('
') est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_tips) + est_html_info.name_details) for est, name, tool_tip in est_infos: _write_estimator_html(out, est, name) out.write('
') # sk-serial @@ -92,12 +109,12 @@ def _write_estimator_html(out, estimator, name): elif est_html_info.type == 'parallel': out.write('
') if name: - tool_tip = _estimator_tool_tip(estimator) + tool_tip = _estimator_details(estimator) _write_label_html(out, name, tool_tip) out.write('
') est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_tips) + est_html_info.name_details) for est, name, tool_tip in est_infos: out.write('
') _write_label_html(out, name, tool_tip) @@ -105,20 +122,58 @@ def _write_estimator_html(out, estimator, name): _write_estimator_html(out, est, name) out.write('
') # sk-parallel-item sk-serial out.write('
') # sk-parallel sk-serial-item + elif est_html_info.type == 'single-meta': out.write('
') - _write_label_html(out, est_html_info.names, est_html_info.name_tips) - _write_estimator_html(out, est_html_info.estimators, + _write_label_html(out, est_html_info.names[0], + est_html_info.name_details[0]) + _write_estimator_html(out, est_html_info.estimators[0], est_html_info.estimators.__class__.__name__) out.write('
') # sk-serial-item # sk-serial + elif est_html_info.type == 'single': - out.write('
' - '
' - '{}
'.format(est_html_info.name_tips, - est_html_info.names)) + _write_dropdown_html(out, est_html_info.names[0], + est_html_info.name_details[0], + "sk-serial-item", "sk-estimator") _STYLE = """ +.sk-toggleable { + background-color: white; +} +.sk-toggleable__label { + cursor: pointer; + display: block; + width: 100%; + margin-bottom: 0; +} +.sk-toggleable__content { + max-height: 0; + max-width: 0; + overflow: hidden; + text-align: left; + background-color: #f0f8ff; +} +div.sk-toggleable__content pre { + margin: 0.5em; + border-radius: 0.25em; +} +.sk-toggleable__control:checked~.sk-toggleable__content { + max-height: 200px; + max-width: 100%; + overflow: auto; +} +.sk-hidden--visually { + border: 0; + clip: rect(1px 1px 1px 1px); + clip: rect(1px, 1px, 1px, 1px); + height: 1px; + margin: -1px; + overflow: hidden; + padding: 0; + position: absolute; + width: 1px; +} .sk-estimator { font-family: monospace; background-color: #f0f8ff; @@ -149,11 +204,15 @@ def _write_estimator_html(out, estimator, name): float: left; background: white; } +.sk-serial-item { + z-index: 1; +} .sk-parallel { display: flex; align-items: stretch; justify-content: center; } + .sk-parallel-item { display: flex; flex-direction: column; @@ -175,19 +234,19 @@ def _write_estimator_html(out, estimator, name): border: 1px dashed gray; padding: 0 0.25em 0.25em 0.25em; } -.sk-label { +.sk-label label { font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; + line-height: 1.4em; + width: 97%; } .sk-label-container { text-align: center; -} -.sk-serial-item { - margin-bottom: 0.25em; - background: white; + border: #f0f8ff solid red; + z-index: 1; } .sk-container { display: flex; @@ -220,12 +279,7 @@ def _write_estimator_html(out, estimator, name): opacity: 1; z-index: 2; } -.sk-top-container { - display: flex; - color: black; - padding-bottom: 2em; -} -""" +""".replace('\n', '').replace(' ', '') def display_estimator(estimator, print_changed_only=True): @@ -246,15 +300,18 @@ def display_estimator(estimator, print_changed_only=True): HTML representation of estimator. When called in jupyter notebook or lab, a iPython HTML object is returned. """ + from sklearn._config import config_context + from sklearn.pipeline import Pipeline with config_context(print_changed_only=print_changed_only), \ closing(StringIO()) as out: + # This forces estimators to always be serial at the first layer if not isinstance(estimator, Pipeline): estimator = Pipeline([(estimator.__class__.__name__, estimator)]) out.write('') out.write('
') @@ -263,12 +320,4 @@ def display_estimator(estimator, print_changed_only=True): out.write('') html_output = out.getvalue() - # wrap in iPython HTML if in a notebook context - try: - cls_name = get_ipython().__class__.__name__ - if cls_name != 'ZMQInteractiveShell': - return html_output - from IPython.display import HTML - return HTML(html_output) - except (ImportError, NameError): - return html_output + return html_output diff --git a/sklearn/base.py b/sklearn/base.py index 70dec8c030418..73b1c95bb3f08 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -20,6 +20,7 @@ from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array +from ._display_estimator import display_estimator _DEFAULT_TAGS = { 'non_deterministic': False, @@ -411,6 +412,9 @@ def _validate_data(self, X, y=None, reset=True, **check_params): return out + def _repr_html_(self): + return display_estimator(self) + class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index ca64f3147d8f2..bfa28f2b3a4f8 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -13,7 +13,6 @@ from .partial_dependence import partial_dependence from ._permutation_importance import permutation_importance # noqa -from ._display_estimator import display_estimator # noqa from ._plot.partial_dependence import plot_partial_dependence # noqa from ._plot.partial_dependence import PartialDependenceDisplay # noqa @@ -24,5 +23,4 @@ 'plot_partial_dependence', 'permutation_importance', 'PartialDependenceDisplay', - 'display_estimator', ] diff --git a/sklearn/inspection/tests/test_display_estimator.py b/sklearn/inspection/tests/test_display_estimator.py deleted file mode 100644 index 63f9489dc69ef..0000000000000 --- a/sklearn/inspection/tests/test_display_estimator.py +++ /dev/null @@ -1,400 +0,0 @@ -from contextlib import closing -from io import StringIO - -import pytest - -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier -from sklearn.impute import SimpleImputer -from sklearn.decomposition import PCA -from sklearn.decomposition import TruncatedSVD -from sklearn.pipeline import Pipeline -from sklearn.pipeline import FeatureUnion -from sklearn.compose import ColumnTransformer -from sklearn.ensemble import VotingClassifier -from sklearn.feature_selection import SelectPercentile -from sklearn.preprocessing import OneHotEncoder -from sklearn.svm import LinearSVC -from sklearn.multiclass import OneVsOneClassifier -from sklearn.inspection._display_estimator import _write_label_html -from sklearn.inspection._display_estimator import _estimator_tool_tip -from sklearn.inspection._display_estimator import _type_of_html_estimator -from sklearn.inspection._display_estimator import display_estimator -from sklearn.inspection._display_estimator import _STYLE - - -@pytest.mark.parametrize('est, expected', [ - ('None', 'None'), - ('passthrough', 'passthrough'), - ('hello\nworld', 'hello world') -]) -def test_estimator_tool_tip(est, expected): - assert expected == _estimator_tool_tip(est) - - -def test_write_label_html(): - name = "LogisticRegression" - tool_tip = "hello-world" - - expected = ('
' - 'LogisticRegression
') - - with closing(StringIO()) as out: - _write_label_html(out, name, tool_tip) - html_label = out.getvalue() - assert html_label == expected - - -def test_type_of_html_estimator_error(): - with pytest.raises(ValueError, match="Invalid estimator"): - _type_of_html_estimator(100) - - -@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) -def test_type_of_html_estimator_single_str_none(est): - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == str(est) - assert est_html_info.name_tips == str(est) - - -def test_type_of_html_estimator_single_estimator(): - est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == est.__class__.__name__ - assert est_html_info.name_tips == _estimator_tool_tip(est) - - -def test_type_of_html_estimator_pipeline(): - pipe = Pipeline([ - ('imputer', SimpleImputer()), - ('classifier', LogisticRegression()) - ]) - est_html_info = _type_of_html_estimator(pipe) - assert est_html_info.type == 'serial' - assert est_html_info.estimators == [step[1] for step in pipe.steps] - assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_tips == [_estimator_tool_tip(step[1]) - for step in pipe.steps] - - -def test_type_of_html_estimator_feature_union(): - f_union = FeatureUnion([ - ('pca', PCA()), ('svd', TruncatedSVD()) - ]) - est_html_info = _type_of_html_estimator(f_union) - assert est_html_info.type == 'parallel' - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.estimators == [trans[1] - for trans in f_union.transformer_list] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in f_union.transformer_list] - - -def test_type_of_html_estimator_voting(): - clf = VotingClassifier([ - ('log_reg', LogisticRegression()), - ('mlp', MLPClassifier()) - ]) - est_html_info = _type_of_html_estimator(clf) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in clf.estimators] - assert est_html_info.names == ['log_reg', 'mlp'] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in clf.estimators] - - -def test_type_of_html_estimator_column_transformer(): - ct = ColumnTransformer([ - ('pca', PCA(), ['num1', 'num2']), - ('svd', TruncatedSVD, [0, 3]) - ]) - est_html_info = _type_of_html_estimator(ct) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in ct.transformers] - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] - - -expected_display_estimator = """ -
-
-
-
-
-
- preprocessor -
-
-
-
-
-
- num -
-
-
-
-
-
- passthrough -
-
-
-
- SimpleImputer -
-
-
-
-
-
-
-
cat -
-
-
-
-
-
- SimpleImputer -
-
-
-
- OneHotEncoder -
-
-
-
-
-
-
-
-
-
- feat_u -
-
-
-
-
-
- pca -
-
-
-
-
- PCA -
-
-
-
-
-
-
- tsvd -
-
-
-
-
-
- TruncatedSVD -
-
-
-
- SelectPercentile -
-
-
-
-
-
-
-
-
-
- classifier -
-
-
-
-
-
- lr -
-
-
-
-
- LogisticRegression -
-
-
-
-
-
-
- mlp -
-
-
-
-
- MLPClassifier -
-
-
-
-
-
-
-
-
- -""".format(style=_STYLE).replace('\n', '').replace(' ', '') - - -def test_display_estimator(): - num_trans = Pipeline(steps=[ - ('pass', 'passthrough'), - ('imputer', SimpleImputer(strategy='median')) - ]) - - cat_trans = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='constant', - missing_values='empty')), - ('one-hot', OneHotEncoder()) - ]) - - preprocess = ColumnTransformer([ - ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), - ('cat', cat_trans, [0, 1, 2, 3]) - ]) - - feat_u = FeatureUnion([ - ('pca', PCA(n_components=1)), - ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), - ('select', SelectPercentile())])) - ]) - - clf = VotingClassifier([ - ('lr', LogisticRegression(solver='lbfgs', random_state=1)), - ('mlp', MLPClassifier(alpha=0.001)) - ]) - - pipe = Pipeline([ - ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) - ]) - html_output = display_estimator(pipe) - - assert expected_display_estimator == html_output.replace(' ', '') - - -expected_display_estimator_ovo = """ - -
-
-
-
-
-
- OneVsOneClassifier -
-
-
-
- LinearSVC -
-
-
-
-
-
- -""".format(style=_STYLE).replace('\n', '').replace(' ', '') - - -def test_display_estimator_ovo_classifier(): - ovo = OneVsOneClassifier(LinearSVC()) - html_output = display_estimator(ovo) - assert expected_display_estimator_ovo == html_output.replace(' ', '') diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py new file mode 100644 index 0000000000000..2f5a4cffe5711 --- /dev/null +++ b/sklearn/tests/test_display_estimator.py @@ -0,0 +1,176 @@ +from contextlib import closing +from io import StringIO + +import pytest + +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier +from sklearn.impute import SimpleImputer +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.pipeline import Pipeline +from sklearn.pipeline import FeatureUnion +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import VotingClassifier +from sklearn.feature_selection import SelectPercentile +from sklearn.preprocessing import OneHotEncoder +from sklearn.svm import LinearSVC +from sklearn.multiclass import OneVsOneClassifier +from sklearn._display_estimator import _write_label_html +from sklearn._display_estimator import _estimator_details +from sklearn._display_estimator import _type_of_html_estimator +from sklearn._display_estimator import display_estimator + + +@pytest.mark.parametrize('est, expected', [ + ('None', 'None'), + ('passthrough', 'passthrough'), + ('hello\nworld', 'hello world') +]) +def test_estimator_tool_tip(est, expected): + assert expected == _estimator_details(est) + + +def test_write_label_html(): + name = "LogisticRegression" + tool_tip = "hello-world" + + with closing(StringIO()) as out: + _write_label_html(out, name, tool_tip) + html_label = out.getvalue() + assert 'LogisticRegression' in html_label + assert html_label.startswith('
') + assert '
hello-world
' in html_label + + +def test_type_of_html_estimator_error(): + with pytest.raises(ValueError, match="Invalid estimator"): + _type_of_html_estimator(100) + + +@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) +def test_type_of_html_estimator_single_str_none(est): + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators[0] == est + assert est_html_info.names[0] == str(est) + assert est_html_info.name_details[0] == str(est) + + +def test_type_of_html_estimator_single_estimator(): + est = LogisticRegression(C=10.0) + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators[0] == est + assert est_html_info.names[0] == est.__class__.__name__ + assert est_html_info.name_details[0] == _estimator_details(est) + + +def test_type_of_html_estimator_pipeline(): + pipe = Pipeline([ + ('imputer', SimpleImputer()), + ('classifier', LogisticRegression()) + ]) + est_html_info = _type_of_html_estimator(pipe) + assert est_html_info.type == 'serial' + assert est_html_info.estimators == [step[1] for step in pipe.steps] + assert est_html_info.names == ['imputer', 'classifier'] + assert est_html_info.name_details == [_estimator_details(step[1]) + for step in pipe.steps] + + +def test_type_of_html_estimator_feature_union(): + f_union = FeatureUnion([ + ('pca', PCA()), ('svd', TruncatedSVD()) + ]) + est_html_info = _type_of_html_estimator(f_union) + assert est_html_info.type == 'parallel' + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.estimators == [trans[1] + for trans in f_union.transformer_list] + assert est_html_info.name_details == [None, None] + + +def test_type_of_html_estimator_voting(): + clf = VotingClassifier([ + ('log_reg', LogisticRegression()), + ('mlp', MLPClassifier()) + ]) + est_html_info = _type_of_html_estimator(clf) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in clf.estimators] + assert est_html_info.names == ['log_reg', 'mlp'] + assert est_html_info.name_details == [None, None] + + +def test_type_of_html_estimator_column_transformer(): + ct = ColumnTransformer([ + ('pca', PCA(), ['num1', 'num2']), + ('svd', TruncatedSVD, [0, 3]) + ]) + est_html_info = _type_of_html_estimator(ct) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in ct.transformers] + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.name_details == [['num1', 'num2'], [0, 3]] + + +def test_display_estimator_pipeline(): + num_trans = Pipeline(steps=[ + ('pass', 'passthrough'), + ('imputer', SimpleImputer(strategy='median')) + ]) + + cat_trans = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='constant', + missing_values='empty')), + ('one-hot', OneHotEncoder()) + ]) + + preprocess = ColumnTransformer([ + ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), + ('cat', cat_trans, [0, 1, 2, 3]) + ]) + + feat_u = FeatureUnion([ + ('pca', PCA(n_components=1)), + ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), + ('select', SelectPercentile())])) + ]) + + clf = VotingClassifier([ + ('lr', LogisticRegression(solver='lbfgs', random_state=1)), + ('mlp', MLPClassifier(alpha=0.001)) + ]) + + pipe = Pipeline([ + ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) + ]) + html_output = display_estimator(pipe) + + expected_strings = [ + 'passthrough', + 'div class=\"sk-toggleable__content\">
SimpleImputer'
+      '(strategy=\'median\')',
+      '
SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
+      '
', + '(\'one-hot\', OneHotEncoder())', + 'preprocessor', + '
[\'a\', \'b\', \'c\', \'d\', \'e\']
', + '
LogisticRegression(random_state=1)
', + '
SelectPercentile()
', + '>TruncatedSVD', + '
TruncatedSVD(n_components=3)',
+    ]
+
+    for expected_string in expected_strings:
+        assert expected_string in html_output
+
+
+def test_display_estimator_ovo_classifier():
+    ovo = OneVsOneClassifier(LinearSVC())
+    html_output = display_estimator(ovo)
+    assert "
OneVsOneClassifier(estimator=LinearSVC())
" in html_output + assert "LinearSVC" in html_output From 1b471702c2bd98af82427f18c37b2be42f9f8dde Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 12 Mar 2020 23:21:12 -0400 Subject: [PATCH 16/81] ENN Major visual changes --- doc/conf.py | 1 + doc/modules/compose.rst | 7 +- ...splay_html.py => display_est_repr_html.py} | 16 ++-- sklearn/_display_estimator.py | 82 ++++++++++--------- sklearn/base.py | 4 +- sklearn/tests/test_display_estimator.py | 11 +-- 6 files changed, 62 insertions(+), 59 deletions(-) rename doc/sphinxext/{display_html.py => display_est_repr_html.py} (66%) diff --git a/doc/conf.py b/doc/conf.py index 778dad7554a70..ef36d077435ed 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,6 +39,7 @@ 'sphinx.ext.imgconverter', 'sphinx_gallery.gen_gallery', 'sphinx_issues', + 'display_est_repr_html' ] # this is needed for some reason... diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 8cfaff2c0c263..fd76eb4b90176 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -538,21 +538,20 @@ above example would be:: Visualizing Composite Estimators ================================ -:func:`sklearn.inspection.display_estimator` outputs a html representation of +In by default a jupyter notebook outputs a html representation of composite estimators. This can be useful to diagnose or visualize a Pipeline with may estimators. For example, the estimator defined in The composite estimator defined in :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` can be visualized as: -.. display_html:: +.. display_estimator_repr_html:: from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.linear_model import LogisticRegression - from sklearn.inspection import display_estimator numeric_features = ['age', 'fare'] numeric_transformer = Pipeline(steps=[ @@ -571,7 +570,7 @@ can be visualized as: clf = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', LogisticRegression())]) - print(display_estimator(clf)) + clf .. topic:: Examples: diff --git a/doc/sphinxext/display_html.py b/doc/sphinxext/display_est_repr_html.py similarity index 66% rename from doc/sphinxext/display_html.py rename to doc/sphinxext/display_est_repr_html.py index ed175a84aee5b..dc72e71390c6f 100644 --- a/doc/sphinxext/display_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -1,6 +1,5 @@ """ -Primary used to display the html output `sklearn.inspection.display_estimator` -in sphinx. +Primary used to display the html output of `_repr_html_` of estimators """ import sys from docutils.parsers.rst import Directive @@ -8,15 +7,18 @@ from io import StringIO -class ExecuteHTML(Directive): +class DisplayReprEstimator(Directive): "Execute Python code and includes stdout as HTML" has_content = True required_arguments = 0 optional_arguments = 0 - @classmethod - def execute(cls, code): + def execute(self, code): + code_parts = code.split('\n') + final_output = code_parts[-1] + code_parts[-1] = f'print({final_output}._repr_html_())' + code = '\n'.join(code_parts) orig_stdout, orig_stderr = sys.stdout, sys.stderr output, err = StringIO(), StringIO() @@ -25,7 +27,7 @@ def execute(cls, code): exec(code) sys.stdout, sys.stderr = orig_stdout, orig_stderr - return "".join(['
', + return "".join(['
', output.getvalue(), err.getvalue(), "
"]) def run(self): @@ -42,4 +44,4 @@ def run(self): def setup(app): - app.add_directive('display_html', ExecuteHTML) + app.add_directive('display_estimator_repr_html', DisplayReprEstimator) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 06755b8f9c019..6d70660d0ef17 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -41,7 +41,7 @@ def _write_label_html(out, name, tool_tip): def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - from sklearn.base import BaseEstimator + # import here to avoid circular import from base.py from sklearn.pipeline import Pipeline from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer @@ -83,14 +83,10 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', [inner_estimator], names, name_details) - - elif isinstance(estimator, BaseEstimator): - names = [estimator.__class__.__name__] - tool_tips = [_estimator_details(estimator)] - return _EstHTMLInfo('single', [estimator], names, tool_tips) - - else: - raise ValueError("Invalid estimator") + # Base estimator + names = [estimator.__class__.__name__] + tool_tips = [_estimator_details(estimator)] + return _EstHTMLInfo('single', [estimator], names, tool_tips) def _write_estimator_html(out, estimator, name): @@ -138,16 +134,19 @@ def _write_estimator_html(out, estimator, name): _STYLE = """ -.sk-toggleable { +div.sk-toggleable { background-color: white; } -.sk-toggleable__label { +label.sk-toggleable__label { cursor: pointer; display: block; width: 100%; margin-bottom: 0; + padding: 0.2em 0.3em; + box-sizing: border-box; + text-align: center; } -.sk-toggleable__content { +div.sk-toggleable__content { max-height: 0; max-width: 0; overflow: hidden; @@ -155,15 +154,15 @@ def _write_estimator_html(out, estimator, name): background-color: #f0f8ff; } div.sk-toggleable__content pre { - margin: 0.5em; + margin: 0.2em; border-radius: 0.25em; } -.sk-toggleable__control:checked~.sk-toggleable__content { +input.sk-toggleable__control:checked~div.sk-toggleable__content { max-height: 200px; max-width: 100%; overflow: auto; } -.sk-hidden--visually { +input.sk-hidden--visually { border: 0; clip: rect(1px 1px 1px 1px); clip: rect(1px, 1px, 1px, 1px); @@ -174,81 +173,86 @@ def _write_estimator_html(out, estimator, name): position: absolute; width: 1px; } -.sk-estimator { +div.sk-estimator { font-family: monospace; background-color: #f0f8ff; - padding: 0.5em; margin: 0.25em 0.25em; border: 1px dotted black; border-radius: 0.25em; - text-align: center; + box-sizing: border-box; +} +div.sk-estimator:hover { + background-color: #a3d4ff; } -.sk-parallel-item::after { +div.sk-parallel-item::after { content: ""; width: 100%; border-bottom: 1px solid gray; flex-grow: 1; } -.sk-serial::before { +div.sk-label:hover label.sk-toggleable__label { + color: #0087fe; + background-color: rgb(246, 246, 246); + border-radius: 0.25em; +} +div.sk-serial::before { content: ""; position: absolute; border-left: 1px solid gray; + box-sizing: border-box; top: 2em; bottom: 0; left: 50%; } -.sk-serial { +div.sk-serial { display: flex; flex-direction: column; align-items: center; float: left; background: white; } -.sk-serial-item { +div.sk-serial-item { z-index: 1; } -.sk-parallel { +div.sk-parallel { display: flex; align-items: stretch; justify-content: center; } - -.sk-parallel-item { +div.sk-parallel-item { display: flex; flex-direction: column; position: relative; background: white; } -.sk-parallel-item:first-child::after { +div.sk-parallel-item:first-child::after { align-self: flex-end; width: 50%; } -.sk-parallel-item:last-child::after { +div.sk-parallel-item:last-child::after { align-self: flex-start; width: 50%; } -.sk-parallel-item:only-child::after { +div.sk-parallel-item:only-child::after { width: 0; } -.sk-dashed-wrapped { +div.sk-dashed-wrapped { border: 1px dashed gray; - padding: 0 0.25em 0.25em 0.25em; + padding: 0 0.3em 0.3em 0.3em; + box-sizing: border-box; } -.sk-label label { +div.sk-label label { font-family: monospace; font-weight: bold; background: white; display: inline-block; - margin: 0 0.5em; line-height: 1.4em; - width: 97%; } -.sk-label-container { +div.sk-label-container { text-align: center; - border: #f0f8ff solid red; z-index: 1; } -.sk-container { +div.sk-container { display: flex; flex-direction: column; align-items: flex-start; @@ -270,6 +274,7 @@ def _write_estimator_html(out, estimator, name): overflow: hidden; background-color: #f0f8ff; border: 1px solid gray; + box-sizing: border-box; white-space: pre; content: attr(sk-data-tooltip); text-align: left; @@ -279,10 +284,10 @@ def _write_estimator_html(out, estimator, name): opacity: 1; z-index: 2; } -""".replace('\n', '').replace(' ', '') +""".replace(' ', '').replace('\n', '') -def display_estimator(estimator, print_changed_only=True): +def _estimator_repr_html(estimator, print_changed_only=True): """Build a HTML representation of an estimator Parameters @@ -300,6 +305,7 @@ def display_estimator(estimator, print_changed_only=True): HTML representation of estimator. When called in jupyter notebook or lab, a iPython HTML object is returned. """ + # import here to avoid circular import from base.py from sklearn._config import config_context from sklearn.pipeline import Pipeline diff --git a/sklearn/base.py b/sklearn/base.py index 73b1c95bb3f08..08b0fc820705d 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -20,7 +20,7 @@ from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array -from ._display_estimator import display_estimator +from ._display_estimator import _estimator_repr_html _DEFAULT_TAGS = { 'non_deterministic': False, @@ -413,7 +413,7 @@ def _validate_data(self, X, y=None, reset=True, **check_params): return out def _repr_html_(self): - return display_estimator(self) + return _estimator_repr_html(self) class ClassifierMixin: diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 2f5a4cffe5711..74a7b4b540a9c 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -19,7 +19,7 @@ from sklearn._display_estimator import _write_label_html from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator -from sklearn._display_estimator import display_estimator +from sklearn._display_estimator import _estimator_repr_html @pytest.mark.parametrize('est, expected', [ @@ -43,11 +43,6 @@ def test_write_label_html(): assert '
hello-world
' in html_label -def test_type_of_html_estimator_error(): - with pytest.raises(ValueError, match="Invalid estimator"): - _type_of_html_estimator(100) - - @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) def test_type_of_html_estimator_single_str_none(est): est_html_info = _type_of_html_estimator(est) @@ -148,7 +143,7 @@ def test_display_estimator_pipeline(): pipe = Pipeline([ ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) ]) - html_output = display_estimator(pipe) + html_output = _estimator_repr_html(pipe) expected_strings = [ 'passthrough', @@ -171,6 +166,6 @@ def test_display_estimator_pipeline(): def test_display_estimator_ovo_classifier(): ovo = OneVsOneClassifier(LinearSVC()) - html_output = display_estimator(ovo) + html_output = _estimator_repr_html(ovo) assert "
OneVsOneClassifier(estimator=LinearSVC())
" in html_output assert "LinearSVC" in html_output From 733bade7aaf6ea4d27d77d98a6e3bb9c65c41b92 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 12 Mar 2020 23:31:27 -0400 Subject: [PATCH 17/81] ENH Update viz --- sklearn/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 6d70660d0ef17..dc3711ba00bc8 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -182,7 +182,7 @@ def _write_estimator_html(out, estimator, name): box-sizing: border-box; } div.sk-estimator:hover { - background-color: #a3d4ff; + background-color: #c1e2ff; } div.sk-parallel-item::after { content: ""; From ae98ae9892f83a0aab49bede07e437b9e9cc1c6d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 11:39:22 -0400 Subject: [PATCH 18/81] STY Update --- sklearn/_display_estimator.py | 1 + sklearn/tests/test_display_estimator.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index dc3711ba00bc8..25fa76b2c4971 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -156,6 +156,7 @@ def _write_estimator_html(out, estimator, name): div.sk-toggleable__content pre { margin: 0.2em; border-radius: 0.25em; + background-color: #f0f8ff; } input.sk-toggleable__control:checked~div.sk-toggleable__content { max-height: 200px; diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 74a7b4b540a9c..63aaf74f3eb56 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -167,5 +167,5 @@ def test_display_estimator_pipeline(): def test_display_estimator_ovo_classifier(): ovo = OneVsOneClassifier(LinearSVC()) html_output = _estimator_repr_html(ovo) - assert "
OneVsOneClassifier(estimator=LinearSVC())
" in html_output + assert "pre>OneVsOneClassifier(estimator=LinearSVC())
" in html_output assert "LinearSVC" in html_output From 7b1de5fe87be4aa2eaaec8a0f6bc68686e3ad89e Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 14:29:32 -0400 Subject: [PATCH 19/81] STY Update --- sklearn/_display_estimator.py | 84 ++++++++++++------------- sklearn/tests/test_display_estimator.py | 11 ++-- 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 25fa76b2c4971..a0df605d12387 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -4,22 +4,26 @@ import uuid -def _estimator_details(estimator): +def _estimator_details(estimator, print_changed_only=True): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - return str(estimator).replace('\n', ' ') + from sklearn._config import config_context + with config_context(print_changed_only=print_changed_only): + return str(estimator).replace('\n', ' ') -def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class): +def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class, + checked=False): out.write( f'
' f'
') if tool_tip is not None: + checked_str = 'checked' if checked else '' est_id = uuid.uuid4() out.write(f'' + f'id="{est_id}" type="checkbox" {checked_str}>' f'' f'
{tool_tip}'
@@ -29,16 +33,17 @@ def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class):
     out.write('
') # outer_class inner_class -def _write_label_html(out, name, tool_tip): +def _write_label_html(out, name, tool_tip, checked=False): """Write label to html""" - _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label") + _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label", + checked=checked) _EstHTMLInfo = namedtuple('_EstHTMLInfo', 'type, estimators, names, name_details') -def _type_of_html_estimator(estimator): +def _type_of_html_estimator(estimator, first_call=False): """Generate information about how to display an estimator. """ # import here to avoid circular import from base.py @@ -48,7 +53,8 @@ def _type_of_html_estimator(estimator): from sklearn.ensemble import VotingClassifier, VotingRegressor if isinstance(estimator, str): - return _EstHTMLInfo('single', [estimator], [estimator], [estimator]) + return _EstHTMLInfo('single', [estimator], [estimator], + [estimator]) elif estimator is None: return _EstHTMLInfo('single', [estimator], ['None'], ['None']) @@ -83,16 +89,20 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', [inner_estimator], names, name_details) - # Base estimator + + # Base estimator, if this is the first call, then all parameters are + # printed names = [estimator.__class__.__name__] - tool_tips = [_estimator_details(estimator)] + tool_tips = [_estimator_details(estimator, + print_changed_only=not first_call)] return _EstHTMLInfo('single', [estimator], names, tool_tips) -def _write_estimator_html(out, estimator, name): +def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - est_html_info = _type_of_html_estimator(estimator) + est_html_info = _type_of_html_estimator(estimator, + first_call=first_call) if est_html_info.type == 'serial': out.write('
') @@ -130,7 +140,8 @@ def _write_estimator_html(out, estimator, name): elif est_html_info.type == 'single': _write_dropdown_html(out, est_html_info.names[0], est_html_info.name_details[0], - "sk-serial-item", "sk-estimator") + "sk-serial-item", "sk-estimator", + checked=first_call) _STYLE = """ @@ -163,6 +174,12 @@ def _write_estimator_html(out, estimator, name): max-width: 100%; overflow: auto; } +div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label { + background-color: #d4ebff; +} +div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label { + background-color: #d4ebff; +} input.sk-hidden--visually { border: 0; clip: rect(1px 1px 1px 1px); @@ -183,7 +200,7 @@ def _write_estimator_html(out, estimator, name): box-sizing: border-box; } div.sk-estimator:hover { - background-color: #c1e2ff; + background-color: #d4ebff; } div.sk-parallel-item::after { content: ""; @@ -192,9 +209,7 @@ def _write_estimator_html(out, estimator, name): flex-grow: 1; } div.sk-label:hover label.sk-toggleable__label { - color: #0087fe; - background-color: rgb(246, 246, 246); - border-radius: 0.25em; + background-color: #d4ebff; } div.sk-serial::before { content: ""; @@ -212,6 +227,7 @@ def _write_estimator_html(out, estimator, name): float: left; background: white; } + div.sk-serial-item { z-index: 1; } @@ -239,7 +255,7 @@ def _write_estimator_html(out, estimator, name): } div.sk-dashed-wrapped { border: 1px dashed gray; - padding: 0 0.3em 0.3em 0.3em; + margin: 0 0.3em 0.3em 0.3em; box-sizing: border-box; } div.sk-label label { @@ -285,10 +301,10 @@ def _write_estimator_html(out, estimator, name): opacity: 1; z-index: 2; } -""".replace(' ', '').replace('\n', '') +""".replace(' ', '').replace('\n', '') # noqa -def _estimator_repr_html(estimator, print_changed_only=True): +def _estimator_repr_html(estimator): """Build a HTML representation of an estimator Parameters @@ -296,35 +312,19 @@ def _estimator_repr_html(estimator, print_changed_only=True): estimator : estimator object The estimator to visualize. - print_changed_only : bool, optional (default=True) - If True, only the parameters that were set to non-default - values will be printed when printing an estimator. - Returns ------- html: str or iPython HTML object HTML representation of estimator. When called in jupyter notebook or lab, a iPython HTML object is returned. """ - # import here to avoid circular import from base.py - from sklearn._config import config_context - from sklearn.pipeline import Pipeline - - with config_context(print_changed_only=print_changed_only), \ - closing(StringIO()) as out: - - # This forces estimators to always be serial at the first layer - if not isinstance(estimator, Pipeline): - estimator = Pipeline([(estimator.__class__.__name__, estimator)]) - - out.write('') + with closing(StringIO()) as out: - out.write('
') - _write_estimator_html(out, estimator, '') - out.write('
') # sk-top-container # sk-container - out.write('') + out.write(f'' + f'
') + _write_estimator_html(out, estimator, estimator.__class__.__name__, + first_call=True) + out.write('
') html_output = out.getvalue() return html_output diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 63aaf74f3eb56..6739a8c9533e3 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -31,16 +31,19 @@ def test_estimator_tool_tip(est, expected): assert expected == _estimator_details(est) -def test_write_label_html(): +@pytest.mark.parametrize("checked", [True, False]) +def test_write_label_html(checked): name = "LogisticRegression" tool_tip = "hello-world" with closing(StringIO()) as out: - _write_label_html(out, name, tool_tip) + _write_label_html(out, name, tool_tip, checked=checked) html_label = out.getvalue() assert 'LogisticRegression' in html_label assert html_label.startswith('
') assert '
hello-world
' in html_label + if checked: + assert 'checked>' in html_label @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) @@ -151,7 +154,7 @@ def test_display_estimator_pipeline(): '(strategy=\'median\')', '
SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
       '
', - '(\'one-hot\', OneHotEncoder())', + '(\'one-hot\', OneHotEncoder', 'preprocessor', '
[\'a\', \'b\', \'c\', \'d\', \'e\']
', '
LogisticRegression(random_state=1)
', @@ -167,5 +170,5 @@ def test_display_estimator_pipeline(): def test_display_estimator_ovo_classifier(): ovo = OneVsOneClassifier(LinearSVC()) html_output = _estimator_repr_html(ovo) - assert "pre>OneVsOneClassifier(estimator=LinearSVC())
" in html_output + assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output assert "LinearSVC" in html_output From 741bc138238a3c82ed7e7b5d463fd0428b538f82 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 15:06:54 -0400 Subject: [PATCH 20/81] CLN Cleaner code --- sklearn/_display_estimator.py | 88 +++++++++++++------------ sklearn/tests/test_display_estimator.py | 11 +++- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a0df605d12387..1600cd54e26d9 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -8,9 +8,7 @@ def _estimator_details(estimator, print_changed_only=True): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - from sklearn._config import config_context - with config_context(print_changed_only=print_changed_only): - return str(estimator).replace('\n', ' ') + return str(estimator).replace('\n', ' ') def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class, @@ -51,56 +49,59 @@ def _type_of_html_estimator(estimator, first_call=False): from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier, VotingRegressor + from sklearn._config import config_context - if isinstance(estimator, str): - return _EstHTMLInfo('single', [estimator], [estimator], - [estimator]) - - elif estimator is None: - return _EstHTMLInfo('single', [estimator], ['None'], ['None']) - - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [_estimator_details(est) for est in estimators] - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif hasattr(estimator, "estimator"): - names = [estimator.__class__.__name__] - name_details = [_estimator_details(estimator)] - inner_estimator = estimator.estimator - return _EstHTMLInfo('single-meta', [inner_estimator], names, - name_details) + with config_context(print_changed_only=True): + if isinstance(estimator, str): + return _EstHTMLInfo('single', [estimator], [estimator], + [estimator]) + + elif estimator is None: + return _EstHTMLInfo('single', [estimator], ['None'], ['None']) + + elif isinstance(estimator, Pipeline): + estimators = [step[1] for step in estimator.steps] + names = [step[0] for step in estimator.steps] + name_details = [_estimator_details(est) for est in estimators] + return _EstHTMLInfo('serial', estimators, names, name_details) + + elif isinstance(estimator, ColumnTransformer): + estimators = [trans[1] for trans in estimator.transformers] + names = [trans[0] for trans in estimator.transformers] + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, FeatureUnion): + estimators = [trans[1] for trans in estimator.transformer_list] + names = [trans[0] for trans in estimator.transformer_list] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, (VotingClassifier, VotingRegressor)): + estimators = [est[1] for est in estimator.estimators] + names = [est[0] for est in estimator.estimators] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif hasattr(estimator, "estimator"): + estimators = [estimator.estimator] + names = [estimator.__class__.__name__] + name_details = [_estimator_details(estimator)] + return _EstHTMLInfo('single-meta', estimators, names, + name_details) # Base estimator, if this is the first call, then all parameters are # printed names = [estimator.__class__.__name__] - tool_tips = [_estimator_details(estimator, - print_changed_only=not first_call)] + with config_context(print_changed_only=not first_call): + tool_tips = [_estimator_details(estimator)] return _EstHTMLInfo('single', [estimator], names, tool_tips) def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ + from sklearn._config import config_context est_html_info = _type_of_html_estimator(estimator, first_call=first_call) @@ -115,7 +116,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('
') if name: - tool_tip = _estimator_details(estimator) + with config_context(print_changed_only=True): + tool_tip = _estimator_details(estimator) _write_label_html(out, name, tool_tip) out.write('
') diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 6739a8c9533e3..acaa3ba9c9948 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,6 +3,7 @@ import pytest +from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -56,8 +57,9 @@ def test_type_of_html_estimator_single_str_none(est): def test_type_of_html_estimator_single_estimator(): + # single estimator prints all the details est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) + est_html_info = _type_of_html_estimator(est, first_call=True) assert est_html_info.type == 'single' assert est_html_info.estimators[0] == est assert est_html_info.names[0] == est.__class__.__name__ @@ -65,6 +67,7 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): + # multiple estimators in a pipeline prints only the changes pipe = Pipeline([ ('imputer', SimpleImputer()), ('classifier', LogisticRegression()) @@ -73,8 +76,10 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.type == 'serial' assert est_html_info.estimators == [step[1] for step in pipe.steps] assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_details == [_estimator_details(step[1]) - for step in pipe.steps] + + with config_context(print_changed_only=True): + assert est_html_info.name_details == [_estimator_details(step[1]) + for step in pipe.steps] def test_type_of_html_estimator_feature_union(): From b0dd3f2d75191e07313a680cd239fba4fe433b4f Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 15:32:03 -0400 Subject: [PATCH 21/81] CLN Improves logic --- sklearn/_display_estimator.py | 38 ++++++++++++------------- sklearn/tests/test_display_estimator.py | 6 +--- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 1600cd54e26d9..cec6663d3ec9f 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -11,30 +11,30 @@ def _estimator_details(estimator, print_changed_only=True): return str(estimator).replace('\n', ' ') -def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class, +def _write_dropdown_html(out, name, name_details, outer_class, inner_class, checked=False): out.write( f'
' f'
') - if tool_tip is not None: + if name_details is not None: checked_str = 'checked' if checked else '' est_id = uuid.uuid4() out.write(f'' f'' - f'
{tool_tip}'
+                  f'
{name_details}'
                   f'
') else: out.write(f'') out.write('
') # outer_class inner_class -def _write_label_html(out, name, tool_tip, checked=False): +def _write_label_html(out, name, name_details, checked=False): """Write label to html""" - _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label", - checked=checked) + _write_dropdown_html(out, name, name_details, "sk-label-container", + "sk-label", checked=checked) _EstHTMLInfo = namedtuple('_EstHTMLInfo', @@ -62,7 +62,7 @@ def _type_of_html_estimator(estimator, first_call=False): elif isinstance(estimator, Pipeline): estimators = [step[1] for step in estimator.steps] names = [step[0] for step in estimator.steps] - name_details = [_estimator_details(est) for est in estimators] + name_details = [None] * len(names) return _EstHTMLInfo('serial', estimators, names, name_details) elif isinstance(estimator, ColumnTransformer): @@ -83,7 +83,8 @@ def _type_of_html_estimator(estimator, first_call=False): name_details = [None] * len(names) return _EstHTMLInfo('parallel', estimators, names, name_details) - elif hasattr(estimator, "estimator"): + elif (hasattr(estimator, "estimator") and + hasattr(estimator.estimator, 'get_params')): estimators = [estimator.estimator] names = [estimator.__class__.__name__] name_details = [_estimator_details(estimator)] @@ -94,8 +95,8 @@ def _type_of_html_estimator(estimator, first_call=False): # printed names = [estimator.__class__.__name__] with config_context(print_changed_only=not first_call): - tool_tips = [_estimator_details(estimator)] - return _EstHTMLInfo('single', [estimator], names, tool_tips) + name_details = [_estimator_details(estimator)] + return _EstHTMLInfo('single', [estimator], names, name_details) def _write_estimator_html(out, estimator, name, first_call=False): @@ -107,9 +108,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): if est_html_info.type == 'serial': out.write('
') - est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_details) - for est, name, tool_tip in est_infos: + est_infos = zip(est_html_info.estimators, est_html_info.names) + for est, name in est_infos: _write_estimator_html(out, est, name) out.write('
') # sk-serial @@ -117,17 +117,17 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') if name: with config_context(print_changed_only=True): - tool_tip = _estimator_details(estimator) - _write_label_html(out, name, tool_tip) + name_details = _estimator_details(estimator) + _write_label_html(out, name, name_details) out.write('
') est_infos = zip(est_html_info.estimators, est_html_info.names, est_html_info.name_details) - for est, name, tool_tip in est_infos: + for est, name, name_details in est_infos: out.write('
') - _write_label_html(out, name, tool_tip) + _write_label_html(out, name, name_details) out.write('
') - _write_estimator_html(out, est, name) + _write_estimator_html(out, est, '') out.write('
') # sk-parallel-item sk-serial out.write('
') # sk-parallel sk-serial-item @@ -137,7 +137,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): est_html_info.name_details[0]) _write_estimator_html(out, est_html_info.estimators[0], est_html_info.estimators.__class__.__name__) - out.write('
') # sk-serial-item # sk-serial + out.write('
') # sk-serial-item elif est_html_info.type == 'single': _write_dropdown_html(out, est_html_info.names[0], diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index acaa3ba9c9948..b6ac4067b3a40 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -67,7 +67,6 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): - # multiple estimators in a pipeline prints only the changes pipe = Pipeline([ ('imputer', SimpleImputer()), ('classifier', LogisticRegression()) @@ -76,10 +75,7 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.type == 'serial' assert est_html_info.estimators == [step[1] for step in pipe.steps] assert est_html_info.names == ['imputer', 'classifier'] - - with config_context(print_changed_only=True): - assert est_html_info.name_details == [_estimator_details(step[1]) - for step in pipe.steps] + assert est_html_info.name_details == [None, None] def test_type_of_html_estimator_feature_union(): From 1b14ce2a94fee8afe95ce8d926343e76c075bf16 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 17:34:27 -0400 Subject: [PATCH 22/81] CLN More polish --- sklearn/_display_estimator.py | 161 ++++++++++-------------- sklearn/tests/test_display_estimator.py | 22 ++-- 2 files changed, 79 insertions(+), 104 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index cec6663d3ec9f..8fa01769bd373 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -8,11 +8,16 @@ def _estimator_details(estimator, print_changed_only=True): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - return str(estimator).replace('\n', ' ') + from sklearn._config import config_context + with config_context(print_changed_only=print_changed_only): + return str(estimator).replace('\n', ' ') -def _write_dropdown_html(out, name, name_details, outer_class, inner_class, - checked=False): +def _write_label_html(out, name, name_details, + outer_class="sk-label-container", + inner_class="sk-label", + checked=False): + """Write labeled html with or without a dropdown with named details""" out.write( f'
' f'
') @@ -31,17 +36,13 @@ def _write_dropdown_html(out, name, name_details, outer_class, inner_class, out.write('
') # outer_class inner_class -def _write_label_html(out, name, name_details, checked=False): - """Write label to html""" - _write_dropdown_html(out, name, name_details, "sk-label-container", - "sk-label", checked=checked) - - +# if type == 'single' then estimators, names, and name_details represent +# repsent the single _EstHTMLInfo = namedtuple('_EstHTMLInfo', 'type, estimators, names, name_details') -def _type_of_html_estimator(estimator, first_call=False): +def _type_of_html_estimator(estimator, print_changed_only=True): """Generate information about how to display an estimator. """ # import here to avoid circular import from base.py @@ -49,62 +50,56 @@ def _type_of_html_estimator(estimator, first_call=False): from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier, VotingRegressor - from sklearn._config import config_context - with config_context(print_changed_only=True): - if isinstance(estimator, str): - return _EstHTMLInfo('single', [estimator], [estimator], - [estimator]) - - elif estimator is None: - return _EstHTMLInfo('single', [estimator], ['None'], ['None']) - - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [None] * len(names) - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif (hasattr(estimator, "estimator") and - hasattr(estimator.estimator, 'get_params')): - estimators = [estimator.estimator] - names = [estimator.__class__.__name__] - name_details = [_estimator_details(estimator)] - return _EstHTMLInfo('single-meta', estimators, names, - name_details) + if isinstance(estimator, str): + return _EstHTMLInfo('single', estimator, estimator, estimator) + + elif estimator is None: + return _EstHTMLInfo('single', estimator, 'None', 'None') + + elif isinstance(estimator, Pipeline): + estimators = [step[1] for step in estimator.steps] + names = [step[0] for step in estimator.steps] + name_details = [None] * len(names) + return _EstHTMLInfo('serial', estimators, names, name_details) + + elif isinstance(estimator, ColumnTransformer): + estimators = [trans[1] for trans in estimator.transformers] + names = [trans[0] for trans in estimator.transformers] + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, FeatureUnion): + estimators = [trans[1] for trans in estimator.transformer_list] + names = [trans[0] for trans in estimator.transformer_list] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, (VotingClassifier, VotingRegressor)): + estimators = [est[1] for est in estimator.estimators] + names = [est[0] for est in estimator.estimators] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif (hasattr(estimator, "estimator") and + hasattr(estimator.estimator, 'get_params')): + inner_estimator = estimator.estimator + inner_name = inner_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', inner_estimator, inner_name, None) # Base estimator, if this is the first call, then all parameters are # printed - names = [estimator.__class__.__name__] - with config_context(print_changed_only=not first_call): - name_details = [_estimator_details(estimator)] - return _EstHTMLInfo('single', [estimator], names, name_details) + name = estimator.__class__.__name__ + name_detail = _estimator_details(estimator, + print_changed_only=print_changed_only) + return _EstHTMLInfo('single', estimator, name, name_detail) def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - from sklearn._config import config_context est_html_info = _type_of_html_estimator(estimator, - first_call=first_call) + print_changed_only=not first_call) if est_html_info.type == 'serial': out.write('
') @@ -116,8 +111,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('
') if name: - with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = _estimator_details(estimator) _write_label_html(out, name, name_details) out.write('
') @@ -133,17 +127,20 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'single-meta': out.write('
') - _write_label_html(out, est_html_info.names[0], - est_html_info.name_details[0]) - _write_estimator_html(out, est_html_info.estimators[0], - est_html_info.estimators.__class__.__name__) - out.write('
') # sk-serial-item + if name: + name_details = _estimator_details(estimator) + _write_label_html(out, name, name_details) + out.write('
') + _write_estimator_html(out, est_html_info.estimators, + est_html_info.names) + out.write('
') elif est_html_info.type == 'single': - _write_dropdown_html(out, est_html_info.names[0], - est_html_info.name_details[0], - "sk-serial-item", "sk-estimator", - checked=first_call) + _write_label_html(out, est_html_info.names, + est_html_info.name_details, + outer_class="sk-serial-item", + inner_class="sk-estimator", + checked=first_call) _STYLE = """ @@ -278,31 +275,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): position: relative; float: left; } -[sk-data-tooltip] { - position: relative; - cursor: pointer; -} -[sk-data-tooltip]:before { - visibility: hidden; - opacity: 0; - font-weight: 400; - position: absolute; - top: 0; - left: 0; - padding: 0.5em; - overflow: hidden; - background-color: #f0f8ff; - border: 1px solid gray; - box-sizing: border-box; - white-space: pre; - content: attr(sk-data-tooltip); - text-align: left; -} -[sk-data-tooltip]:hover:before { - visibility: visible; - opacity: 1; - z-index: 2; -} """.replace(' ', '').replace('\n', '') # noqa @@ -321,8 +293,9 @@ def _estimator_repr_html(estimator): lab, a iPython HTML object is returned. """ with closing(StringIO()) as out: - - out.write(f'' + out.write(f'' + f'sklearn-viz' + f'' f'
') _write_estimator_html(out, estimator, estimator.__class__.__name__, first_call=True) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index b6ac4067b3a40..08ad2f6107255 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,7 +3,6 @@ import pytest -from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -21,6 +20,7 @@ from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html +from sklearn._config import config_context @pytest.mark.parametrize('est, expected', [ @@ -51,19 +51,21 @@ def test_write_label_html(checked): def test_type_of_html_estimator_single_str_none(est): est_html_info = _type_of_html_estimator(est) assert est_html_info.type == 'single' - assert est_html_info.estimators[0] == est - assert est_html_info.names[0] == str(est) - assert est_html_info.name_details[0] == str(est) + assert est_html_info.estimators == est + assert est_html_info.names == str(est) + assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_single_estimator(): - # single estimator prints all the details +@pytest.mark.parametrize('print_changed_only', [True, False]) +def test_type_of_html_estimator_single_estimator(print_changed_only): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est, first_call=True) + est_html_info = _type_of_html_estimator( + est, print_changed_only=print_changed_only) assert est_html_info.type == 'single' - assert est_html_info.estimators[0] == est - assert est_html_info.names[0] == est.__class__.__name__ - assert est_html_info.name_details[0] == _estimator_details(est) + assert est_html_info.estimators == est + assert est_html_info.names == est.__class__.__name__ + assert (est_html_info.name_details == + _estimator_details(est, print_changed_only=print_changed_only)) def test_type_of_html_estimator_pipeline(): From e03362f6a3e369a69added5f9e1a28e8bcbdacdc Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 17:44:17 -0400 Subject: [PATCH 23/81] CLN More polish --- sklearn/_display_estimator.py | 17 ++++++++++++----- sklearn/tests/test_display_estimator.py | 1 - 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 8fa01769bd373..8e65091a1b080 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -36,10 +36,16 @@ def _write_label_html(out, name, name_details, out.write('
') # outer_class inner_class -# if type == 'single' then estimators, names, and name_details represent -# repsent the single _EstHTMLInfo = namedtuple('_EstHTMLInfo', 'type, estimators, names, name_details') +# In this section, the parameters mean estimators, names, and name_details +# if type == 'single', then the parameters are single items representing the +# single estimator +# if type == 'parallel', then the paramters are list representing the +# parallel estimators +# if type == 'serial', then the parameters are list representing the serial +# estimators +# if type == 'single-meta', then parameters represent the wrapped estimator def _type_of_html_estimator(estimator, print_changed_only=True): @@ -83,9 +89,10 @@ def _type_of_html_estimator(estimator, print_changed_only=True): elif (hasattr(estimator, "estimator") and hasattr(estimator.estimator, 'get_params')): - inner_estimator = estimator.estimator - inner_name = inner_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', inner_estimator, inner_name, None) + wrapped_estimator = estimator.estimator + wrapped_name = wrapped_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, + None) # Base estimator, if this is the first call, then all parameters are # printed diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 08ad2f6107255..922f55fb0fd5e 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -20,7 +20,6 @@ from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html -from sklearn._config import config_context @pytest.mark.parametrize('est, expected', [ From 975c8233ce92509c01287f8f0c791b0db99de5b7 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 18:38:22 -0400 Subject: [PATCH 24/81] STY Minor adjustment --- sklearn/_display_estimator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 8e65091a1b080..eae05415783a8 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -261,8 +261,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-dashed-wrapped { border: 1px dashed gray; - margin: 0 0.3em 0.3em 0.3em; + margin: 0 0.2em 0.2em 0.2em; box-sizing: border-box; + padding-bottom: 0.2em; } div.sk-label label { font-family: monospace; From ecb3ae63968812d7153fbe1806acb9034b55aafb Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 20:31:41 -0400 Subject: [PATCH 25/81] ENH Adds a _sk_rep_html method --- sklearn/_display_estimator.py | 72 ++++++++----------------- sklearn/base.py | 6 +++ sklearn/compose/_column_transformer.py | 5 ++ sklearn/ensemble/_voting.py | 5 ++ sklearn/pipeline.py | 10 ++++ sklearn/tests/test_display_estimator.py | 39 +++++++------- 6 files changed, 65 insertions(+), 72 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index eae05415783a8..e1d98a7090af3 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,16 +1,15 @@ +from sklearn._config import config_context from collections import namedtuple from contextlib import closing from io import StringIO import uuid -def _estimator_details(estimator, print_changed_only=True): +def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - from sklearn._config import config_context - with config_context(print_changed_only=print_changed_only): - return str(estimator).replace('\n', ' ') + return str(estimator).replace('\n', ' ') def _write_label_html(out, name, name_details, @@ -48,65 +47,29 @@ def _write_label_html(out, name, name_details, # if type == 'single-meta', then parameters represent the wrapped estimator -def _type_of_html_estimator(estimator, print_changed_only=True): +def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - # import here to avoid circular import from base.py - from sklearn.pipeline import Pipeline - from sklearn.pipeline import FeatureUnion - from sklearn.compose import ColumnTransformer - from sklearn.ensemble import VotingClassifier, VotingRegressor - if isinstance(estimator, str): return _EstHTMLInfo('single', estimator, estimator, estimator) - elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [None] * len(names) - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif (hasattr(estimator, "estimator") and - hasattr(estimator.estimator, 'get_params')): - wrapped_estimator = estimator.estimator + # looks like a meta estimator + if (hasattr(estimator, 'estimator') and + hasattr(getattr(estimator, 'estimator'), 'get_params')): + wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, None) - - # Base estimator, if this is the first call, then all parameters are - # printed - name = estimator.__class__.__name__ - name_detail = _estimator_details(estimator, - print_changed_only=print_changed_only) - return _EstHTMLInfo('single', estimator, name, name_detail) + return estimator._sk_repr_html() def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - est_html_info = _type_of_html_estimator(estimator, - print_changed_only=not first_call) + with config_context(print_changed_only=not first_call): + est_html_info = _type_of_html_estimator(estimator) if est_html_info.type == 'serial': out.write('
') @@ -118,12 +81,18 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('
') if name: - name_details = _estimator_details(estimator) + with config_context(print_changed_only=True): + name_details = _estimator_details(estimator) _write_label_html(out, name, name_details) out.write('
') + if est_html_info.name_details is None: + name_details = (None,) * len(est_html_info.estimators) + else: + name_details = est_html_info.name_details + est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_details) + name_details) for est, name, name_details in est_infos: out.write('
') _write_label_html(out, name, name_details) @@ -135,7 +104,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'single-meta': out.write('
') if name: - name_details = _estimator_details(estimator) + with config_context(print_changed_only=True): + name_details = _estimator_details(estimator) _write_label_html(out, name, name_details) out.write('
') _write_estimator_html(out, est_html_info.estimators, diff --git a/sklearn/base.py b/sklearn/base.py index 08b0fc820705d..0e695ee712640 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -415,6 +415,12 @@ def _validate_data(self, X, y=None, reset=True, **check_params): def _repr_html_(self): return _estimator_repr_html(self) + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + return _EstHTMLInfo('single', self, + self.__class__.__name__, + str(self).replace('\n', ' ')) + class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e94757bca6993..aa7b0db3d1b7a 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -628,6 +628,11 @@ def _hstack(self, Xs): Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs] return np.hstack(Xs) + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, transformers, name_details = zip(*self.transformers) + return _EstHTMLInfo('parallel', transformers, names, name_details) + def _check_X(X): """Use check_array only on lists and other non-array-likes / sparse""" diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 0da6dc86c30fa..7bc36f10164a8 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -103,6 +103,11 @@ def n_features_in_(self): return self.estimators_[0].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, estimators = zip(*self.estimators) + return _EstHTMLInfo('parallel', estimators, names, None) + class VotingClassifier(ClassifierMixin, _BaseVoting): """Soft Voting/Majority Rule classifier for unfitted estimators. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 64d2de70df531..1c7b407dadc3f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -633,6 +633,11 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, estimators = zip(*self.steps) + return _EstHTMLInfo('serial', estimators, names, None) + def _name_estimators(estimators): """Generate names for estimators.""" @@ -1010,6 +1015,11 @@ def n_features_in_(self): # X is passed to all transformers so we just delegate to the first one return self.transformer_list[0][1].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, transformers = zip(*self.transformer_list) + return _EstHTMLInfo('parallel', transformers, names, None) + def make_union(*transformers, **kwargs): """ diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 922f55fb0fd5e..0cfacd00fb98f 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -55,16 +55,13 @@ def test_type_of_html_estimator_single_str_none(est): assert est_html_info.name_details == str(est) -@pytest.mark.parametrize('print_changed_only', [True, False]) -def test_type_of_html_estimator_single_estimator(print_changed_only): +def test_type_of_html_estimator_single_estimator(): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator( - est, print_changed_only=print_changed_only) + est_html_info = _type_of_html_estimator(est) assert est_html_info.type == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ - assert (est_html_info.name_details == - _estimator_details(est, print_changed_only=print_changed_only)) + assert (est_html_info.name_details == _estimator_details(est)) def test_type_of_html_estimator_pipeline(): @@ -74,9 +71,9 @@ def test_type_of_html_estimator_pipeline(): ]) est_html_info = _type_of_html_estimator(pipe) assert est_html_info.type == 'serial' - assert est_html_info.estimators == [step[1] for step in pipe.steps] - assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_details == [None, None] + assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) + assert est_html_info.names == ('imputer', 'classifier') + assert est_html_info.name_details is None def test_type_of_html_estimator_feature_union(): @@ -85,10 +82,10 @@ def test_type_of_html_estimator_feature_union(): ]) est_html_info = _type_of_html_estimator(f_union) assert est_html_info.type == 'parallel' - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.estimators == [trans[1] - for trans in f_union.transformer_list] - assert est_html_info.name_details == [None, None] + assert est_html_info.names == ('pca', 'svd') + assert est_html_info.estimators == tuple( + trans[1] for trans in f_union.transformer_list) + assert est_html_info.name_details is None def test_type_of_html_estimator_voting(): @@ -98,10 +95,10 @@ def test_type_of_html_estimator_voting(): ]) est_html_info = _type_of_html_estimator(clf) assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in clf.estimators] - assert est_html_info.names == ['log_reg', 'mlp'] - assert est_html_info.name_details == [None, None] + assert est_html_info.estimators == tuple(trans[1] + for trans in clf.estimators) + assert est_html_info.names == ('log_reg', 'mlp') + assert est_html_info.name_details is None def test_type_of_html_estimator_column_transformer(): @@ -111,10 +108,10 @@ def test_type_of_html_estimator_column_transformer(): ]) est_html_info = _type_of_html_estimator(ct) assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in ct.transformers] - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.name_details == [['num1', 'num2'], [0, 3]] + assert est_html_info.estimators == tuple( + trans[1] for trans in ct.transformers) + assert est_html_info.names == ('pca', 'svd') + assert est_html_info.name_details == (['num1', 'num2'], [0, 3]) def test_display_estimator_pipeline(): From 3451ab009df95b632c7f383a6782d7bc41d2fc12 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:20:42 -0400 Subject: [PATCH 26/81] CLN Less diffs --- examples/compose/plot_column_transformer_mixed_types.py | 2 +- sklearn/inspection/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index b1aebb3252505..037ff3fbc147a 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -136,7 +136,7 @@ ############################################################################### # Using the prediction pipeline in a grid search -# ---------------------------------------------- +############################################################################### # Grid search can also be performed on the different preprocessing steps # defined in the ``ColumnTransformer`` object, together with the classifier's # hyperparameters as part of the ``Pipeline``. diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index bfa28f2b3a4f8..5940ac22a2ef2 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -22,5 +22,5 @@ 'partial_dependence', 'plot_partial_dependence', 'permutation_importance', - 'PartialDependenceDisplay', + 'PartialDependenceDisplay' ] From 407cfff7271ab7d2af6cd0663cf38bd262b1c5a5 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:23:11 -0400 Subject: [PATCH 27/81] CLN Imports higher --- sklearn/base.py | 5 ++--- sklearn/compose/_column_transformer.py | 2 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 3 +-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 0e695ee712640..70f6cb7914dca 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,6 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html +from ._display_estimator import _EstHTMLInfo _DEFAULT_TAGS = { 'non_deterministic': False, @@ -416,9 +417,7 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo - return _EstHTMLInfo('single', self, - self.__class__.__name__, + return _EstHTMLInfo('single', self, self.__class__.__name__, str(self).replace('\n', ' ')) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index aa7b0db3d1b7a..12d8ad18b2d35 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,6 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin +from .._display_estimator import _EstHTMLInfo from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -629,7 +630,6 @@ def _hstack(self, Xs): return np.hstack(Xs) def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, transformers, name_details = zip(*self.transformers) return _EstHTMLInfo('parallel', transformers, names, name_details) diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 7bc36f10164a8..cfaedfb18edc2 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,6 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError +from .._display_estimator import _EstHTMLInfo class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -104,7 +105,6 @@ def n_features_in_(self): return self.estimators_[0].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, estimators = zip(*self.estimators) return _EstHTMLInfo('parallel', estimators, names, None) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 1c7b407dadc3f..3be08e32ad31c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,6 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin +from ._display_estimator import _EstHTMLInfo from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -634,7 +635,6 @@ def n_features_in_(self): return self.steps[0][1].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, estimators = zip(*self.steps) return _EstHTMLInfo('serial', estimators, names, None) @@ -1016,7 +1016,6 @@ def n_features_in_(self): return self.transformer_list[0][1].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, transformers = zip(*self.transformer_list) return _EstHTMLInfo('parallel', transformers, names, None) From 80d9b10dbf20a2968debe7535b24b165a9c4d776 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:36:25 -0400 Subject: [PATCH 28/81] ENH Better support for dark themes --- sklearn/_display_estimator.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index e1d98a7090af3..f4dae57d2f6ad 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -121,6 +121,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): _STYLE = """ +div.sk-top-container { + color: black; + background-color: white; +} div.sk-toggleable { background-color: white; } @@ -142,6 +146,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-toggleable__content pre { margin: 0.2em; + color: black; border-radius: 0.25em; background-color: #f0f8ff; } @@ -201,7 +206,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): flex-direction: column; align-items: center; float: left; - background: white; + background-color: white; } div.sk-serial-item { @@ -211,12 +216,13 @@ def _write_estimator_html(out, estimator, name, first_call=False): display: flex; align-items: stretch; justify-content: center; + background-color: white; } div.sk-parallel-item { display: flex; flex-direction: column; position: relative; - background: white; + background-color: white; } div.sk-parallel-item:first-child::after { align-self: flex-end; @@ -234,11 +240,12 @@ def _write_estimator_html(out, estimator, name, first_call=False): margin: 0 0.2em 0.2em 0.2em; box-sizing: border-box; padding-bottom: 0.2em; + background-color: white; } div.sk-label label { font-family: monospace; font-weight: bold; - background: white; + background-color: white; display: inline-block; line-height: 1.4em; } From 791374beb9043c463ddb9c3f260f0adf1bec941d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:16:21 -0400 Subject: [PATCH 29/81] DOC Includes note about html --- doc/modules/compose.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index fd76eb4b90176..aeb8a553737f9 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -538,12 +538,14 @@ above example would be:: Visualizing Composite Estimators ================================ -In by default a jupyter notebook outputs a html representation of -composite estimators. This can be useful to diagnose or visualize a Pipeline -with may estimators. For example, the estimator defined in -The composite estimator defined in -:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` -can be visualized as: +By default, estimators are displayed with a HTML representation when shown in a +jupyter notebook. This can be useful to diagnose or visualize a Pipeline with +many estimators. An example of the HTML output can been seen below. + +.. note:: + + The HTML output of this code snippet can only been seen on the HTML version + of the docs. .. display_estimator_repr_html:: From d297bc70190a58b0cff7b19af5fefd864dc9f040 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:33:01 -0400 Subject: [PATCH 30/81] STY Update --- sklearn/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index f4dae57d2f6ad..a94a022331184 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -247,7 +247,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): font-weight: bold; background-color: white; display: inline-block; - line-height: 1.4em; + line-height: 1em; } div.sk-label-container { text-align: center; From 9fde84ab44bb44175851f7c32dacacc7062a4d98 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:35:38 -0400 Subject: [PATCH 31/81] STY Update --- sklearn/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a94a022331184..03aecf9106ae9 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -247,7 +247,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): font-weight: bold; background-color: white; display: inline-block; - line-height: 1em; + line-height: 1.2em; } div.sk-label-container { text-align: center; From f254e1d824a88830a6ff21a397ed889d05e67d6c Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:41:55 -0400 Subject: [PATCH 32/81] CLN --- sklearn/_display_estimator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 03aecf9106ae9..a82eb6408b104 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -54,9 +54,8 @@ def _type_of_html_estimator(estimator): return _EstHTMLInfo('single', estimator, estimator, estimator) elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') - # looks like a meta estimator - if (hasattr(estimator, 'estimator') and + elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ From 48aebee175a5c470249a2b6a8ae368333a99296c Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:45:50 -0400 Subject: [PATCH 33/81] CLN Moves code around --- sklearn/_display_estimator.py | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a82eb6408b104..db08ea66dba0d 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -5,6 +5,35 @@ import uuid +_EstHTMLInfo = namedtuple('_EstHTMLInfo', + 'type, estimators, names, name_details') +# In this section, the parameters mean estimators, names, and name_details +# if type == 'single', then the parameters are single items representing the +# single estimator +# if type == 'parallel', then the paramters are list representing the +# parallel estimators +# if type == 'serial', then the parameters are list representing the serial +# estimators +# if type == 'single-meta', then parameters represent the wrapped estimator + + +def _type_of_html_estimator(estimator): + """Generate information about how to display an estimator. + """ + if isinstance(estimator, str): + return _EstHTMLInfo('single', estimator, estimator, estimator) + elif estimator is None: + return _EstHTMLInfo('single', estimator, 'None', 'None') + # looks like a meta estimator + elif (hasattr(estimator, 'estimator') and + hasattr(getattr(estimator, 'estimator'), 'get_params')): + wrapped_estimator = getattr(estimator, 'estimator') + wrapped_name = wrapped_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, + None) + return estimator._sk_repr_html() + + def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly display estimator details. @@ -35,35 +64,6 @@ def _write_label_html(out, name, name_details, out.write('
') # outer_class inner_class -_EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_details') -# In this section, the parameters mean estimators, names, and name_details -# if type == 'single', then the parameters are single items representing the -# single estimator -# if type == 'parallel', then the paramters are list representing the -# parallel estimators -# if type == 'serial', then the parameters are list representing the serial -# estimators -# if type == 'single-meta', then parameters represent the wrapped estimator - - -def _type_of_html_estimator(estimator): - """Generate information about how to display an estimator. - """ - if isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) - elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') - # looks like a meta estimator - elif (hasattr(estimator, 'estimator') and - hasattr(getattr(estimator, 'estimator'), 'get_params')): - wrapped_estimator = getattr(estimator, 'estimator') - wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, - None) - return estimator._sk_repr_html() - - def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ From d44c38ea2689ac2cc7a55231b6f31ec7a0837aec Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 23:32:58 -0400 Subject: [PATCH 34/81] ENH Adds stacking viz --- sklearn/_display_estimator.py | 15 +++----- sklearn/base.py | 2 +- sklearn/ensemble/_stacking.py | 22 ++++++++++++ sklearn/tests/test_display_estimator.py | 47 +++++++++++++++++++------ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index db08ea66dba0d..5e1b09f5fb294 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -20,7 +20,9 @@ def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, str): + if isinstance(estimator, _EstHTMLInfo): + return estimator + elif isinstance(estimator, str): return _EstHTMLInfo('single', estimator, estimator, estimator) elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') @@ -34,13 +36,6 @@ def _type_of_html_estimator(estimator): return estimator._sk_repr_html() -def _estimator_details(estimator): - """Replace newlines to allow for css content: attr(...) to properly - display estimator details. - """ - return str(estimator).replace('\n', ' ') - - def _write_label_html(out, name, name_details, outer_class="sk-label-container", inner_class="sk-label", @@ -81,7 +76,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') if name: with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = str(estimator) _write_label_html(out, name, name_details) out.write('
') @@ -104,7 +99,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') if name: with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = str(estimator) _write_label_html(out, name, name_details) out.write('
') _write_estimator_html(out, est_html_info.estimators, diff --git a/sklearn/base.py b/sklearn/base.py index 70f6cb7914dca..77832cfc3e0ab 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -418,7 +418,7 @@ def _repr_html_(self): def _sk_repr_html(self): return _EstHTMLInfo('single', self, self.__class__.__name__, - str(self).replace('\n', ' ')) + str(self)) class ClassifierMixin: diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index ba817613523f6..18976e4b6a7b8 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,6 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor +from .._display_estimator import _EstHTMLInfo from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -232,6 +233,13 @@ def predict(self, X, **predict_params): self.transform(X), **predict_params ) + def _sk_repr_html(self, final_estimator): + names, estimators = zip(*self.estimators) + parallel = _EstHTMLInfo('parallel', estimators, names, None) + serial = _EstHTMLInfo('serial', (parallel, final_estimator), + ('', ''), None) + return _EstHTMLInfo('single-meta', serial, str(self), None) + class StackingClassifier(ClassifierMixin, _BaseStacking): """Stack of estimators with a final classifier. @@ -494,6 +502,13 @@ def transform(self, X): """ return self._transform(X) + def _sk_repr_html(self): + if self.final_estimator is None: + final_estimator = LogisticRegression() + else: + final_estimator = self.final_estimator + return super()._sk_repr_html(final_estimator) + class StackingRegressor(RegressorMixin, _BaseStacking): """Stack of estimators with a final regressor. @@ -662,3 +677,10 @@ def transform(self, X): Prediction outputs for each estimator. """ return self._transform(X) + + def _sk_repr_html(self): + if self.final_estimator is None: + final_estimator = RidgeCV() + else: + final_estimator = self.final_estimator + return super()._sk_repr_html(final_estimator) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 0cfacd00fb98f..a9013afddeccb 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -15,22 +15,16 @@ from sklearn.feature_selection import SelectPercentile from sklearn.preprocessing import OneHotEncoder from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVR +from sklearn.tree import DecisionTreeClassifier from sklearn.multiclass import OneVsOneClassifier +from sklearn.ensemble import StackingClassifier +from sklearn.ensemble import StackingRegressor from sklearn._display_estimator import _write_label_html -from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html -@pytest.mark.parametrize('est, expected', [ - ('None', 'None'), - ('passthrough', 'passthrough'), - ('hello\nworld', 'hello world') -]) -def test_estimator_tool_tip(est, expected): - assert expected == _estimator_details(est) - - @pytest.mark.parametrize("checked", [True, False]) def test_write_label_html(checked): name = "LogisticRegression" @@ -61,7 +55,7 @@ def test_type_of_html_estimator_single_estimator(): assert est_html_info.type == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ - assert (est_html_info.name_details == _estimator_details(est)) + assert est_html_info.name_details == str(est) def test_type_of_html_estimator_pipeline(): @@ -171,3 +165,34 @@ def test_display_estimator_ovo_classifier(): html_output = _estimator_repr_html(ovo) assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output assert "LinearSVC" in html_output + + +@pytest.mark.parametrize("final_estimator", [None, LinearSVC()]) +def test_stacking_classsifer(final_estimator): + estimators = [('mlp', MLPClassifier(alpha=0.001)), + ('tree', DecisionTreeClassifier())] + clf = StackingClassifier( + estimators=estimators, final_estimator=final_estimator) + + html_output = _estimator_repr_html(clf) + + assert "('mlp', MLPClassifier(alpha=0.001)" in html_output + if final_estimator is None: + assert "LogisticRegression()" in html_output + else: + assert final_estimator.__class__.__name__ in html_output + + +@pytest.mark.parametrize("final_estimator", [None, LinearSVR()]) +def test_stacking_regressor(final_estimator): + reg = StackingRegressor( + estimators=[('svr', LinearSVR())], final_estimator=final_estimator) + + html_output = _estimator_repr_html(reg) + + assert "('svr', LinearSVR()" in html_output + print(html_output) + if final_estimator is None: + assert "RidgeCV" in html_output + else: + assert final_estimator.__class__.__name__ in html_output From 3811190df4782fe499425ac4026e8b273fc948bd Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 16:53:58 -0400 Subject: [PATCH 35/81] ENH Better viz --- sklearn/_display_estimator.py | 122 +++++++++++++----------- sklearn/base.py | 6 +- sklearn/compose/_column_transformer.py | 4 +- sklearn/ensemble/_stacking.py | 11 ++- sklearn/ensemble/_voting.py | 4 +- sklearn/pipeline.py | 7 +- sklearn/tests/test_display_estimator.py | 2 +- 7 files changed, 86 insertions(+), 70 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 5e1b09f5fb294..90fed34b944fb 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,38 +1,45 @@ from sklearn._config import config_context -from collections import namedtuple from contextlib import closing from io import StringIO import uuid -_EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_details') -# In this section, the parameters mean estimators, names, and name_details -# if type == 'single', then the parameters are single items representing the -# single estimator -# if type == 'parallel', then the paramters are list representing the -# parallel estimators -# if type == 'serial', then the parameters are list representing the serial -# estimators -# if type == 'single-meta', then parameters represent the wrapped estimator +class _EstHTMLBlock: + """HTML Representation of Estimator + + If type == 'single', then the parameters are single items representing the + single estimator + if type == 'parallel', then the paramters are list representing the + parallel estimators + if type == 'serial', then the parameters are list representing the serial + estimators + if type == 'single-meta', then parameters represent the wrapped estimator + """ + def __init__(self, type, estimators, names, name_details, + dash_wrapped=True): + self.type = type + self.estimators = estimators + self.names = names + self.name_details = name_details + self.dash_wrapped = dash_wrapped def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, _EstHTMLInfo): + if isinstance(estimator, _EstHTMLBlock): return estimator elif isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) + return _EstHTMLBlock('single', estimator, estimator, estimator) elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') + return _EstHTMLBlock('single', estimator, 'None', 'None') # looks like a meta estimator elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, - None) + return _EstHTMLBlock('single-meta', wrapped_estimator, wrapped_name, + None) return estimator._sk_repr_html() @@ -41,9 +48,8 @@ def _write_label_html(out, name, name_details, inner_class="sk-label", checked=False): """Write labeled html with or without a dropdown with named details""" - out.write( - f'
' - f'
') + out.write(f'
' + f'
') if name_details is not None: checked_str = 'checked' if checked else '' @@ -59,58 +65,67 @@ def _write_label_html(out, name, name_details, out.write('
') # outer_class inner_class +def _write_named_label_html(out, estimator, name): + if not name or isinstance(estimator, _EstHTMLBlock): + return + with config_context(print_changed_only=True): + name_details = str(estimator) + _write_label_html(out, name, name_details) + + +def _write_sk_item(out, dash_wrapped=True): + dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" + out.write(f'
') + + def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): - est_html_info = _type_of_html_estimator(estimator) + est_block = _type_of_html_estimator(estimator) + + if est_block.type == 'serial': + _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) - if est_html_info.type == 'serial': out.write('
') - est_infos = zip(est_html_info.estimators, est_html_info.names) + est_infos = zip(est_block.estimators, est_block.names) for est, name in est_infos: + if name and not isinstance(est, _EstHTMLBlock): + name = f"{name}: {est.__class__.__name__}" _write_estimator_html(out, est, name) - out.write('
') # sk-serial + out.write('
') # sk-serial sk-item - elif est_html_info.type == 'parallel': - out.write('
') - if name: - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) + elif est_block.type == 'parallel': + _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) out.write('
') - if est_html_info.name_details is None: - name_details = (None,) * len(est_html_info.estimators) + if est_block.name_details is None: + name_details = (None,) * len(est_block.estimators) else: - name_details = est_html_info.name_details + name_details = est_block.name_details - est_infos = zip(est_html_info.estimators, est_html_info.names, - name_details) + est_infos = zip(est_block.estimators, est_block.names, name_details) for est, name, name_details in est_infos: out.write('
') _write_label_html(out, name, name_details) out.write('
') _write_estimator_html(out, est, '') out.write('
') # sk-parallel-item sk-serial - out.write('
') # sk-parallel sk-serial-item + out.write('
') # sk-parallel sk-item - elif est_html_info.type == 'single-meta': - out.write('
') - if name: - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) + elif est_block.type == 'single-meta': + _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) out.write('
') - _write_estimator_html(out, est_html_info.estimators, - est_html_info.names) + _write_estimator_html(out, est_block.estimators, est_block.names) + # sk-parallel sk-parallel-item sk-item out.write('
') - elif est_html_info.type == 'single': - _write_label_html(out, est_html_info.names, - est_html_info.name_details, - outer_class="sk-serial-item", - inner_class="sk-estimator", + elif est_block.type == 'single': + _write_label_html(out, est_block.names, est_block.name_details, + outer_class="sk-item", inner_class="sk-estimator", checked=first_call) @@ -199,11 +214,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): display: flex; flex-direction: column; align-items: center; - float: left; background-color: white; } - -div.sk-serial-item { +div.sk-item { z-index: 1; } div.sk-parallel { @@ -231,9 +244,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-dashed-wrapped { border: 1px dashed gray; - margin: 0 0.2em 0.2em 0.2em; + margin: 0.2em; box-sizing: border-box; - padding-bottom: 0.2em; + padding-bottom: 0.1em; background-color: white; } div.sk-label label { @@ -244,8 +257,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): line-height: 1.2em; } div.sk-label-container { + position: relative; + z-index: 2; text-align: center; - z-index: 1; } div.sk-container { display: flex; diff --git a/sklearn/base.py b/sklearn/base.py index 77832cfc3e0ab..c0aeed9f620fe 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,7 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html -from ._display_estimator import _EstHTMLInfo +from ._display_estimator import _EstHTMLBlock _DEFAULT_TAGS = { 'non_deterministic': False, @@ -417,8 +417,8 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - return _EstHTMLInfo('single', self, self.__class__.__name__, - str(self)) + return _EstHTMLBlock('single', self, self.__class__.__name__, + str(self)) class ClassifierMixin: diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index dbba05ff21078..c9462bc6cd58e 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -633,7 +633,7 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _EstHTMLInfo('parallel', transformers, names, name_details) + return _EstHTMLBlock('parallel', transformers, names, name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 18976e4b6a7b8..67da335566780 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -235,10 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLInfo('parallel', estimators, names, None) - serial = _EstHTMLInfo('serial', (parallel, final_estimator), - ('', ''), None) - return _EstHTMLInfo('single-meta', serial, str(self), None) + parallel = _EstHTMLBlock('parallel', estimators, names, None, + dash_wrapped=False) + serial = _EstHTMLBlock('serial', (parallel, final_estimator), + ('', ''), None, dash_wrapped=False) + return _EstHTMLBlock('single-meta', serial, str(self), None) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index cfaedfb18edc2..f87bdd6c4151a 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,7 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLInfo('parallel', estimators, names, None) + return _EstHTMLBlock('parallel', estimators, names, None) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 3be08e32ad31c..b299d36f93887 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from ._display_estimator import _EstHTMLInfo +from ._display_estimator import _EstHTMLBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -636,7 +636,8 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.steps) - return _EstHTMLInfo('serial', estimators, names, None) + return _EstHTMLBlock('serial', estimators, names, None, + dash_wrapped=False) def _name_estimators(estimators): @@ -1017,7 +1018,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLInfo('parallel', transformers, names, None) + return _EstHTMLBlock('parallel', transformers, names, None) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index a9013afddeccb..7c35da0a6268e 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -148,7 +148,7 @@ def test_display_estimator_pipeline(): '
SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
       '
', '(\'one-hot\', OneHotEncoder', - 'preprocessor', + 'preprocessor: ColumnTransformer', '
[\'a\', \'b\', \'c\', \'d\', \'e\']
', '
LogisticRegression(random_state=1)
', '
SelectPercentile()
', From 9df8a4b970b2acf1aef6173c37753ab49d4ad1b1 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 18:56:23 -0400 Subject: [PATCH 36/81] CLN Improves code quality --- sklearn/_display_estimator.py | 64 ++++++++++++++----------- sklearn/ensemble/_stacking.py | 6 +-- sklearn/ensemble/_voting.py | 5 +- sklearn/pipeline.py | 17 +++++-- sklearn/tests/test_display_estimator.py | 12 +++-- 5 files changed, 66 insertions(+), 38 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 90fed34b944fb..78dfe66b879e0 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -7,22 +7,44 @@ class _EstHTMLBlock: """HTML Representation of Estimator - If type == 'single', then the parameters are single items representing the - single estimator - if type == 'parallel', then the paramters are list representing the - parallel estimators - if type == 'serial', then the parameters are list representing the serial - estimators - if type == 'single-meta', then parameters represent the wrapped estimator + Parameters + ---------- + type : {'serial', 'parallel', 'single'} + Type of HTML block + + estimators : list of estimators or _EstHTMLBlock + or a single estimator + If type is in ('parallel', 'serial'), then `estimators` is a list of + estimators. + If type == 'single', then `estimators` is a single estimator. + + names : list of str + If type in ('parallel', 'serial'), then `names` corresponds to + estimators + If type is 'single', then `names` is a single string corresponding to + the single estimator. + + name_details : list of str, str, or None, default=None + If type == 'parallel', then `name_details` corresponds to `names`. + If type == 'single', then `name_details` is a single string + corresponding to the single estimator. + `name_details` is not used when type == 'single'. + + dash_wrapped : bool, default=True + If true, wrapped HTML element will be wrapped with a dashed boarder. """ - def __init__(self, type, estimators, names, name_details, + def __init__(self, type, estimators, names, name_details=None, dash_wrapped=True): self.type = type self.estimators = estimators self.names = names - self.name_details = name_details self.dash_wrapped = dash_wrapped + if self.type == 'parallel' and name_details is None: + name_details = (None, ) * len(names) + + self.name_details = name_details + def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. @@ -38,8 +60,7 @@ def _type_of_html_estimator(estimator): hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLBlock('single-meta', wrapped_estimator, wrapped_name, - None) + return _EstHTMLBlock('serial', [wrapped_estimator], [wrapped_name]) return estimator._sk_repr_html() @@ -66,6 +87,7 @@ def _write_label_html(out, name, name_details, def _write_named_label_html(out, estimator, name): + """Write label with details based on name""" if not name or isinstance(estimator, _EstHTMLBlock): return with config_context(print_changed_only=True): @@ -74,6 +96,7 @@ def _write_named_label_html(out, estimator, name): def _write_sk_item(out, dash_wrapped=True): + """Write sk-item with or without sk-dashed-wrapped""" dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" out.write(f'
') @@ -91,8 +114,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') est_infos = zip(est_block.estimators, est_block.names) for est, name in est_infos: - if name and not isinstance(est, _EstHTMLBlock): - name = f"{name}: {est.__class__.__name__}" _write_estimator_html(out, est, name) out.write('
') # sk-serial sk-item @@ -101,12 +122,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): _write_named_label_html(out, estimator, name) out.write('
') - if est_block.name_details is None: - name_details = (None,) * len(est_block.estimators) - else: - name_details = est_block.name_details - - est_infos = zip(est_block.estimators, est_block.names, name_details) + est_infos = zip(est_block.estimators, est_block.names, + est_block.name_details) for est, name, name_details in est_infos: out.write('
') _write_label_html(out, name, name_details) @@ -115,14 +132,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') # sk-parallel-item sk-serial out.write('
') # sk-parallel sk-item - elif est_block.type == 'single-meta': - _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) - _write_named_label_html(out, estimator, name) - out.write('
') - _write_estimator_html(out, est_block.estimators, est_block.names) - # sk-parallel sk-parallel-item sk-item - out.write('
') - elif est_block.type == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", @@ -248,6 +257,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): box-sizing: border-box; padding-bottom: 0.1em; background-color: white; + position: relative; } div.sk-label label { font-family: monospace; diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 67da335566780..750935bff8b8c 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLBlock('parallel', estimators, names, None, + parallel = _EstHTMLBlock('parallel', estimators, names, dash_wrapped=False) serial = _EstHTMLBlock('serial', (parallel, final_estimator), - ('', ''), None, dash_wrapped=False) - return _EstHTMLBlock('single-meta', serial, str(self), None) + ('', ''), dash_wrapped=False) + return _EstHTMLBlock('serial', [serial], [str(self)]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index f87bdd6c4151a..7391096e17e32 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,6 +32,7 @@ from ..utils.validation import column_or_1d from ..exceptions import NotFittedError from .._display_estimator import _EstHTMLBlock +from .._config import config_context class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +107,9 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLBlock('parallel', estimators, names, None) + with config_context(print_changed_only=True): + name_details = [str(trans) for trans in estimators] + return _EstHTMLBlock('parallel', estimators, names, name_details) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index b299d36f93887..7bf00b4131657 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,6 +19,7 @@ from .base import clone, TransformerMixin from ._display_estimator import _EstHTMLBlock +from ._config import config_context from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -635,9 +636,15 @@ def n_features_in_(self): return self.steps[0][1].n_features_in_ def _sk_repr_html(self): - names, estimators = zip(*self.steps) - return _EstHTMLBlock('serial', estimators, names, None, - dash_wrapped=False) + _, estimators = zip(*self.steps) + + def _get_name(name, est): + if est is None or est == 'passthrough': + return f'{name}: passthrough' + # Is an estimator + return f'{name}: {est.__class__.__name__}' + names = [_get_name(name, est) for name, est in self.steps] + return _EstHTMLBlock('serial', estimators, names, dash_wrapped=False) def _name_estimators(estimators): @@ -1018,7 +1025,9 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLBlock('parallel', transformers, names, None) + with config_context(print_changed_only=True): + name_details = [str(trans) for trans in transformers] + return _EstHTMLBlock('parallel', transformers, names, name_details) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 7c35da0a6268e..720ffb860d8f4 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -61,12 +61,17 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): pipe = Pipeline([ ('imputer', SimpleImputer()), + ('do_nothing', 'passthrough'), + ('do_nothing_more', None), ('classifier', LogisticRegression()) ]) est_html_info = _type_of_html_estimator(pipe) assert est_html_info.type == 'serial' assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) - assert est_html_info.names == ('imputer', 'classifier') + assert est_html_info.names == ['imputer: SimpleImputer', + 'do_nothing: passthrough', + 'do_nothing_more: passthrough', + 'classifier: LogisticRegression'] assert est_html_info.name_details is None @@ -79,7 +84,7 @@ def test_type_of_html_estimator_feature_union(): assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) - assert est_html_info.name_details is None + assert est_html_info.name_details == ['PCA()', 'TruncatedSVD()'] def test_type_of_html_estimator_voting(): @@ -92,7 +97,8 @@ def test_type_of_html_estimator_voting(): assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') - assert est_html_info.name_details is None + assert est_html_info.name_details == ['LogisticRegression()', + 'MLPClassifier()'] def test_type_of_html_estimator_column_transformer(): From 50ee0b4e635214750e74033cd942b3309f35babb Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 20:10:12 -0400 Subject: [PATCH 37/81] STY Update --- sklearn/ensemble/_voting.py | 5 +---- sklearn/pipeline.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 7391096e17e32..5ce24ccae41ea 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,7 +32,6 @@ from ..utils.validation import column_or_1d from ..exceptions import NotFittedError from .._display_estimator import _EstHTMLBlock -from .._config import config_context class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -107,9 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - with config_context(print_changed_only=True): - name_details = [str(trans) for trans in estimators] - return _EstHTMLBlock('parallel', estimators, names, name_details) + return _EstHTMLBlock('parallel', estimators, names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7bf00b4131657..7ad7dc87b05af 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,7 +19,6 @@ from .base import clone, TransformerMixin from ._display_estimator import _EstHTMLBlock -from ._config import config_context from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -1025,9 +1024,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - with config_context(print_changed_only=True): - name_details = [str(trans) for trans in transformers] - return _EstHTMLBlock('parallel', transformers, names, name_details) + return _EstHTMLBlock('parallel', transformers, names) def make_union(*transformers, **kwargs): From 212ba2112f82b261262be41dbd6c341f0d520099 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 21:55:40 -0400 Subject: [PATCH 38/81] TST Fix --- sklearn/tests/test_display_estimator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 720ffb860d8f4..047ffd75d16d5 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -84,7 +84,7 @@ def test_type_of_html_estimator_feature_union(): assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) - assert est_html_info.name_details == ['PCA()', 'TruncatedSVD()'] + assert est_html_info.name_details == (None, None) def test_type_of_html_estimator_voting(): @@ -97,8 +97,7 @@ def test_type_of_html_estimator_voting(): assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') - assert est_html_info.name_details == ['LogisticRegression()', - 'MLPClassifier()'] + assert est_html_info.name_details == (None, None) def test_type_of_html_estimator_column_transformer(): From 2a81f160c4e176e6f3fd0c370aca2b12db315046 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 13:38:37 -0400 Subject: [PATCH 39/81] WIP --- doc/modules/compose.rst | 2 +- doc/sphinxext/display_est_repr_html.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index aeb8a553737f9..366da54bd7d67 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -545,7 +545,7 @@ many estimators. An example of the HTML output can been seen below. .. note:: The HTML output of this code snippet can only been seen on the HTML version - of the docs. + of the documentation. .. display_estimator_repr_html:: diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index dc72e71390c6f..9a1cc5a7ec571 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -5,9 +5,10 @@ from docutils.parsers.rst import Directive from docutils import nodes from io import StringIO +from sphinx import addnodes -class DisplayReprEstimator(Directive): +class DisplayEstimatorRepr(Directive): "Execute Python code and includes stdout as HTML" has_content = True @@ -38,10 +39,20 @@ def run(self): input_code = nodes.literal_block(code, code) input_code['language'] = 'python' output.append(input_code) - code_results = nodes.raw('', code_results, format='html') - output.append(code_results) + + onlynode_html = addnodes.only(expr='html') + onlynode_html += nodes.raw('', code_results, format='html') + output.append(onlynode_html) + + onlynode_latex = addnodes.only(expr='latex') + onlynode_latex += nodes.raw('', code_results, format='html') + onlynode_latex += nodes.note('The HTML output of this code snippet ' + 'can only been seen on the HTML version ' + 'of the docs.') + output.append(onlynode_latex) + return output def setup(app): - app.add_directive('display_estimator_repr_html', DisplayReprEstimator) + app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) From 6c11293b2ddf430d3962056828200860c854e593 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 14:14:35 -0400 Subject: [PATCH 40/81] WIP --- sklearn/_display_estimator.py | 52 ++++++++++++------------- sklearn/base.py | 4 +- sklearn/compose/_column_transformer.py | 4 +- sklearn/ensemble/_stacking.py | 8 ++-- sklearn/ensemble/_voting.py | 4 +- sklearn/pipeline.py | 6 +-- sklearn/tests/test_display_estimator.py | 38 +++++++++--------- 7 files changed, 57 insertions(+), 59 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 78dfe66b879e0..d2b0db0792d92 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -4,63 +4,62 @@ import uuid -class _EstHTMLBlock: +class _VisualBlock: """HTML Representation of Estimator Parameters ---------- - type : {'serial', 'parallel', 'single'} - Type of HTML block + kind : {'serial', 'parallel', 'single'} + kind of HTML block - estimators : list of estimators or _EstHTMLBlock - or a single estimator - If type is in ('parallel', 'serial'), then `estimators` is a list of + estimators : list of estimators or `_VisualBlock`s or a single estimator + If kind is in ('parallel', 'serial'), then `estimators` is a list of estimators. - If type == 'single', then `estimators` is a single estimator. + If kind == 'single', then `estimators` is a single estimator. names : list of str - If type in ('parallel', 'serial'), then `names` corresponds to + If kind in ('parallel', 'serial'), then `names` corresponds to estimators - If type is 'single', then `names` is a single string corresponding to + If kind is 'single', then `names` is a single string corresponding to the single estimator. name_details : list of str, str, or None, default=None - If type == 'parallel', then `name_details` corresponds to `names`. - If type == 'single', then `name_details` is a single string + If kind == 'parallel', then `name_details` corresponds to `names`. + If kind == 'single', then `name_details` is a single string corresponding to the single estimator. - `name_details` is not used when type == 'single'. + `name_details` is not used when kind == 'single'. dash_wrapped : bool, default=True - If true, wrapped HTML element will be wrapped with a dashed boarder. + If true, wrapped HTML element will be wrapped with a dashed border. """ - def __init__(self, type, estimators, names, name_details=None, + def __init__(self, kind, estimators, names, name_details=None, dash_wrapped=True): - self.type = type + self.kind = kind self.estimators = estimators self.names = names self.dash_wrapped = dash_wrapped - if self.type == 'parallel' and name_details is None: + if self.kind == 'parallel' and name_details is None: name_details = (None, ) * len(names) self.name_details = name_details -def _type_of_html_estimator(estimator): +def _get_visual_block(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, _EstHTMLBlock): + if isinstance(estimator, _VisualBlock): return estimator elif isinstance(estimator, str): - return _EstHTMLBlock('single', estimator, estimator, estimator) + return _VisualBlock('single', estimator, estimator, estimator) elif estimator is None: - return _EstHTMLBlock('single', estimator, 'None', 'None') + return _VisualBlock('single', estimator, 'None', 'None') # looks like a meta estimator elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLBlock('serial', [wrapped_estimator], [wrapped_name]) + return _VisualBlock('serial', [wrapped_estimator], [wrapped_name]) return estimator._sk_repr_html() @@ -88,7 +87,7 @@ def _write_label_html(out, name, name_details, def _write_named_label_html(out, estimator, name): """Write label with details based on name""" - if not name or isinstance(estimator, _EstHTMLBlock): + if not name or isinstance(estimator, _VisualBlock): return with config_context(print_changed_only=True): name_details = str(estimator) @@ -105,9 +104,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): - est_block = _type_of_html_estimator(estimator) + est_block = _get_visual_block(estimator) - if est_block.type == 'serial': + if est_block.kind == 'serial': _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) _write_named_label_html(out, estimator, name) @@ -117,7 +116,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): _write_estimator_html(out, est, name) out.write('
') # sk-serial sk-item - elif est_block.type == 'parallel': + elif est_block.kind == 'parallel': _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) _write_named_label_html(out, estimator, name) out.write('
') @@ -132,7 +131,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') # sk-parallel-item sk-serial out.write('
') # sk-parallel sk-item - elif est_block.type == 'single': + elif est_block.kind == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", checked=first_call) @@ -276,7 +275,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): flex-direction: column; align-items: flex-start; position: relative; - float: left; } """.replace(' ', '').replace('\n', '') # noqa diff --git a/sklearn/base.py b/sklearn/base.py index c0aeed9f620fe..fb328076e4f8f 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,7 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html -from ._display_estimator import _EstHTMLBlock +from ._display_estimator import _VisualBlock _DEFAULT_TAGS = { 'non_deterministic': False, @@ -417,7 +417,7 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - return _EstHTMLBlock('single', self, self.__class__.__name__, + return _VisualBlock('single', self, self.__class__.__name__, str(self)) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index c9462bc6cd58e..ac24c14262538 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -633,7 +633,7 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _EstHTMLBlock('parallel', transformers, names, name_details) + return _VisualBlock('parallel', transformers, names, name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 750935bff8b8c..6a2f8adad5c8e 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLBlock('parallel', estimators, names, + parallel = _VisualBlock('parallel', estimators, names, dash_wrapped=False) - serial = _EstHTMLBlock('serial', (parallel, final_estimator), + serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), dash_wrapped=False) - return _EstHTMLBlock('serial', [serial], [str(self)]) + return _VisualBlock('serial', [serial], [str(self)]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 5ce24ccae41ea..8665dea15884e 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,7 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLBlock('parallel', estimators, names) + return _VisualBlock('parallel', estimators, names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7ad7dc87b05af..37a7fc9c406a4 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from ._display_estimator import _EstHTMLBlock +from ._display_estimator import _VisualBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -643,7 +643,7 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] - return _EstHTMLBlock('serial', estimators, names, dash_wrapped=False) + return _VisualBlock('serial', estimators, names, dash_wrapped=False) def _name_estimators(estimators): @@ -1024,7 +1024,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLBlock('parallel', transformers, names) + return _VisualBlock('parallel', transformers, names) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 047ffd75d16d5..aa36e469f277c 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -21,7 +21,7 @@ from sklearn.ensemble import StackingClassifier from sklearn.ensemble import StackingRegressor from sklearn._display_estimator import _write_label_html -from sklearn._display_estimator import _type_of_html_estimator +from sklearn._display_estimator import _get_visual_block from sklearn._display_estimator import _estimator_repr_html @@ -41,32 +41,32 @@ def test_write_label_html(checked): @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) -def test_type_of_html_estimator_single_str_none(est): - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' +def test_get_visual_block_single_str_none(est): + est_html_info = _get_visual_block(est) + assert est_html_info.kind == 'single' assert est_html_info.estimators == est assert est_html_info.names == str(est) assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_single_estimator(): +def test_get_visual_block_single_estimator(): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' + est_html_info = _get_visual_block(est) + assert est_html_info.kind == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_pipeline(): +def test_get_visual_block_pipeline(): pipe = Pipeline([ ('imputer', SimpleImputer()), ('do_nothing', 'passthrough'), ('do_nothing_more', None), ('classifier', LogisticRegression()) ]) - est_html_info = _type_of_html_estimator(pipe) - assert est_html_info.type == 'serial' + est_html_info = _get_visual_block(pipe) + assert est_html_info.kind == 'serial' assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) assert est_html_info.names == ['imputer: SimpleImputer', 'do_nothing: passthrough', @@ -75,38 +75,38 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.name_details is None -def test_type_of_html_estimator_feature_union(): +def test_get_visual_block_feature_union(): f_union = FeatureUnion([ ('pca', PCA()), ('svd', TruncatedSVD()) ]) - est_html_info = _type_of_html_estimator(f_union) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(f_union) + assert est_html_info.kind == 'parallel' assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) assert est_html_info.name_details == (None, None) -def test_type_of_html_estimator_voting(): +def test_get_visual_block_voting(): clf = VotingClassifier([ ('log_reg', LogisticRegression()), ('mlp', MLPClassifier()) ]) - est_html_info = _type_of_html_estimator(clf) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(clf) + assert est_html_info.kind == 'parallel' assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') assert est_html_info.name_details == (None, None) -def test_type_of_html_estimator_column_transformer(): +def test_get_visual_block_column_transformer(): ct = ColumnTransformer([ ('pca', PCA(), ['num1', 'num2']), ('svd', TruncatedSVD, [0, 3]) ]) - est_html_info = _type_of_html_estimator(ct) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(ct) + assert est_html_info.kind == 'parallel' assert est_html_info.estimators == tuple( trans[1] for trans in ct.transformers) assert est_html_info.names == ('pca', 'svd') From da83a68bd9ac4ad95a9a11fd087b6a43ed9f5c1d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 14:16:29 -0400 Subject: [PATCH 41/81] CLN Address comments --- sklearn/_display_estimator.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index d2b0db0792d92..258487d521f45 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -94,12 +94,6 @@ def _write_named_label_html(out, estimator, name): _write_label_html(out, name, name_details) -def _write_sk_item(out, dash_wrapped=True): - """Write sk-item with or without sk-dashed-wrapped""" - dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" - out.write(f'
') - - def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ @@ -107,7 +101,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): est_block = _get_visual_block(estimator) if est_block.kind == 'serial': - _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) + dashed_wrapped = first_call or est_block.dash_wrapped + dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" + out.write(f'
') + _write_named_label_html(out, estimator, name) out.write('
') @@ -117,7 +114,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('
') # sk-serial sk-item elif est_block.kind == 'parallel': - _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + dashed_wrapped = first_call or est_block.dash_wrapped + dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" + out.write(f'
') + _write_named_label_html(out, estimator, name) out.write('
') From 55a20e7ee3cad3e66949d16396ee31f76c2d3061 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 14:06:54 -0400 Subject: [PATCH 42/81] ENH Update sphinx extension --- doc/modules/compose.rst | 5 ----- doc/sphinxext/display_est_repr_html.py | 25 ++++++++++++------------- sklearn/ensemble/_stacking.py | 6 +++--- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 366da54bd7d67..4f82d0bd8da8a 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -542,11 +542,6 @@ By default, estimators are displayed with a HTML representation when shown in a jupyter notebook. This can be useful to diagnose or visualize a Pipeline with many estimators. An example of the HTML output can been seen below. -.. note:: - - The HTML output of this code snippet can only been seen on the HTML version - of the documentation. - .. display_estimator_repr_html:: from sklearn.compose import ColumnTransformer diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index 9a1cc5a7ec571..d93b783d5a1d4 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -28,8 +28,7 @@ def execute(self, code): exec(code) sys.stdout, sys.stderr = orig_stdout, orig_stderr - return "".join(['
', - output.getvalue(), err.getvalue(), "
"]) + return f"{output.getvalue()}{err.getvalue()}" def run(self): output = [] @@ -40,17 +39,17 @@ def run(self): input_code['language'] = 'python' output.append(input_code) - onlynode_html = addnodes.only(expr='html') - onlynode_html += nodes.raw('', code_results, format='html') - output.append(onlynode_html) - - onlynode_latex = addnodes.only(expr='latex') - onlynode_latex += nodes.raw('', code_results, format='html') - onlynode_latex += nodes.note('The HTML output of this code snippet ' - 'can only been seen on the HTML version ' - 'of the docs.') - output.append(onlynode_latex) - + html_node = nodes.raw('', code_results, format='html') + output.append(html_node) + + code_results_latex = r""" + \begin{sphinxadmonition}{note}{Note:} + The HTML output of this code snippet can only been seen on the HTML + version of the documentation + \end{sphinxadmonition} + """ + latex_node = nodes.raw('', code_results_latex, format='latex') + output.append(latex_node) return output diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 6a2f8adad5c8e..1794de018189b 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -236,9 +236,9 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) parallel = _VisualBlock('parallel', estimators, names, - dash_wrapped=False) - serial = _VisualBlock('serial', (parallel, final_estimator), - ('', ''), dash_wrapped=False) + dash_wrapped=False) + serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), + dash_wrapped=False) return _VisualBlock('serial', [serial], [str(self)]) From 169964d436cfdfe7c549a314546f299cf280c177 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 15:57:02 -0400 Subject: [PATCH 43/81] WIP --- doc/modules/compose.rst | 2 +- doc/sphinxext/display_est_repr_html.py | 6 +++++- .../compose/plot_column_transformer_mixed_types.py | 11 +++++++---- sklearn/_display_estimator.py | 4 +--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 4f82d0bd8da8a..81e219ee1a7d9 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -540,7 +540,7 @@ Visualizing Composite Estimators By default, estimators are displayed with a HTML representation when shown in a jupyter notebook. This can be useful to diagnose or visualize a Pipeline with -many estimators. An example of the HTML output can been seen below. +many estimators. An example of the HTML output can be seen below. .. display_estimator_repr_html:: diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index d93b783d5a1d4..b856d1045d654 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -5,7 +5,6 @@ from docutils.parsers.rst import Directive from docutils import nodes from io import StringIO -from sphinx import addnodes class DisplayEstimatorRepr(Directive): @@ -55,3 +54,8 @@ def run(self): def setup(app): app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 037ff3fbc147a..b1c1efd96dab0 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -59,10 +59,6 @@ # * ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``; # * ``sex``: categories encoded as strings ``{'female', 'male'}``; # * ``pclass``: ordinal integers ``{1, 2, 3}``. -# -# We create the preprocessing pipelines for both numeric and categorical data. -# A HTML visualization of the created pipeline can be found in -# :ref:`visualizing_composite_estimators`. numeric_features = ['age', 'fare'] numeric_transformer = Pipeline(steps=[ @@ -89,6 +85,13 @@ clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test)) +############################################################################### +# HTML representation of ``Pipeline`` +############################################################################### +# When the ``Pipeline`` is printed out in a jupyter notebook an HTML +# representation of the estimator is displayed as follows: +clf + ############################################################################### # Use ``ColumnTransformer`` by selecting column by data types ############################################################################### diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 258487d521f45..c0f1c59e0ee49 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -271,9 +271,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): text-align: center; } div.sk-container { - display: flex; - flex-direction: column; - align-items: flex-start; + display: inline-block; position: relative; } """.replace(' ', '').replace('\n', '') # noqa From 1e1bd1b44052a78601acc63eda5c1e93373643d2 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 15:57:42 -0400 Subject: [PATCH 44/81] REV Less diffs --- examples/compose/plot_column_transformer_mixed_types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index b1c1efd96dab0..920ef37bfb333 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -59,6 +59,8 @@ # * ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``; # * ``sex``: categories encoded as strings ``{'female', 'male'}``; # * ``pclass``: ordinal integers ``{1, 2, 3}``. +# +# We create the preprocessing pipelines for both numeric and categorical data. numeric_features = ['age', 'fare'] numeric_transformer = Pipeline(steps=[ From ce0fc2c3c415d44127b935046567a39c34b6fe41 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sat, 21 Mar 2020 21:18:59 -0400 Subject: [PATCH 45/81] WIP --- sklearn/_display_estimator.py | 82 +++++++++++++++---------- sklearn/base.py | 4 -- sklearn/compose/_column_transformer.py | 3 +- sklearn/ensemble/_stacking.py | 6 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 5 +- sklearn/tests/test_display_estimator.py | 1 + 7 files changed, 58 insertions(+), 45 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index c0f1c59e0ee49..6a5caffcdea30 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,5 +1,6 @@ from sklearn._config import config_context from contextlib import closing +from contextlib import suppress from io import StringIO import uuid @@ -32,37 +33,22 @@ class _VisualBlock: dash_wrapped : bool, default=True If true, wrapped HTML element will be wrapped with a dashed border. """ - def __init__(self, kind, estimators, names, name_details=None, + def __init__(self, kind, estimators, *, names=None, name_details=None, dash_wrapped=True): self.kind = kind self.estimators = estimators - self.names = names self.dash_wrapped = dash_wrapped - if self.kind == 'parallel' and name_details is None: - name_details = (None, ) * len(names) + if self.kind in ('parallel', 'serial'): + if names is None: + names = (None, ) * len(estimators) + if name_details is None: + name_details = (None, ) * len(estimators) + self.names = names self.name_details = name_details -def _get_visual_block(estimator): - """Generate information about how to display an estimator. - """ - if isinstance(estimator, _VisualBlock): - return estimator - elif isinstance(estimator, str): - return _VisualBlock('single', estimator, estimator, estimator) - elif estimator is None: - return _VisualBlock('single', estimator, 'None', 'None') - # looks like a meta estimator - elif (hasattr(estimator, 'estimator') and - hasattr(getattr(estimator, 'estimator'), 'get_params')): - wrapped_estimator = getattr(estimator, 'estimator') - wrapped_name = wrapped_estimator.__class__.__name__ - return _VisualBlock('serial', [wrapped_estimator], [wrapped_name]) - return estimator._sk_repr_html() - - def _write_label_html(out, name, name_details, outer_class="sk-label-container", inner_class="sk-label", @@ -85,16 +71,36 @@ def _write_label_html(out, name, name_details, out.write('
') # outer_class inner_class -def _write_named_label_html(out, estimator, name): - """Write label with details based on name""" - if not name or isinstance(estimator, _VisualBlock): - return - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) +def _get_visual_block(estimator): + """Generate information about how to display an estimator. + """ + with suppress(AttributeError): + return estimator._sk_repr_html() + + if isinstance(estimator, _VisualBlock): + return estimator + elif isinstance(estimator, str): + return _VisualBlock('single', estimator, + names=estimator, name_details=estimator) + elif estimator is None: + return _VisualBlock('single', estimator, + names='None', name_details='None') + + # check if estimator looks like a meta estimator wraps estimators + if hasattr(estimator, 'get_params'): + estimators = [] + for key, value in estimator.get_params().items(): + if '__' not in key and hasattr(value, 'get_params'): + estimators.append(value) + if len(estimators): + return _VisualBlock('parallel', estimators, names=None) + + return _VisualBlock('single', estimator, + names=estimator.__class__.__name__, + name_details=str(estimator)) -def _write_estimator_html(out, estimator, name, first_call=False): +def _write_estimator_html(out, estimator, estimator_label, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): @@ -105,7 +111,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'
') - _write_named_label_html(out, estimator, name) + # write label of current if name is given + if estimator_label: + with config_context(print_changed_only=True): + _write_label_html(out, estimator_label, str(estimator)) out.write('
') est_infos = zip(est_block.estimators, est_block.names) @@ -118,14 +127,19 @@ def _write_estimator_html(out, estimator, name, first_call=False): dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'
') - _write_named_label_html(out, estimator, name) - out.write('
') + if estimator_label: + with config_context(print_changed_only=True): + _write_label_html(out, estimator_label, str(estimator)) + out.write('
') est_infos = zip(est_block.estimators, est_block.names, est_block.name_details) + for est, name, name_details in est_infos: out.write('
') - _write_label_html(out, name, name_details) + # name is associated with the parallel element + if name: + _write_label_html(out, name, name_details) out.write('
') _write_estimator_html(out, est, '') out.write('
') # sk-parallel-item sk-serial diff --git a/sklearn/base.py b/sklearn/base.py index fb328076e4f8f..22bce1cd9d6f6 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -416,10 +416,6 @@ def _validate_data(self, X, y=None, reset=True, **check_params): def _repr_html_(self): return _estimator_repr_html(self) - def _sk_repr_html(self): - return _VisualBlock('single', self, self.__class__.__name__, - str(self)) - class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index ac24c14262538..a501eaa145475 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -633,7 +633,8 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _VisualBlock('parallel', transformers, names, name_details) + return _VisualBlock('parallel', transformers, + names=names, name_details=name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 1794de018189b..09eca6f507e16 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _VisualBlock('parallel', estimators, names, + parallel = _VisualBlock('parallel', estimators, names=names, dash_wrapped=False) - serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), + serial = _VisualBlock('serial', (parallel, final_estimator), dash_wrapped=False) - return _VisualBlock('serial', [serial], [str(self)]) + return _VisualBlock('serial', [serial]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 8665dea15884e..551d89737f12c 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _VisualBlock('parallel', estimators, names) + return _VisualBlock('parallel', estimators, names=names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 37a7fc9c406a4..666b703f6c04f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -643,7 +643,8 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] - return _VisualBlock('serial', estimators, names, dash_wrapped=False) + return _VisualBlock('serial', estimators, + names=names, dash_wrapped=False) def _name_estimators(estimators): @@ -1024,7 +1025,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _VisualBlock('parallel', transformers, names) + return _VisualBlock('parallel', transformers, names=names) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index aa36e469f277c..c5dec08964d47 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -182,6 +182,7 @@ def test_stacking_classsifer(final_estimator): html_output = _estimator_repr_html(clf) assert "('mlp', MLPClassifier(alpha=0.001)" in html_output + assert "('tree', DecisionTreeClassifier()" in html_output if final_estimator is None: assert "LogisticRegression()" in html_output else: From 93da06040fc670f508416ab18e728feaa73e4ab2 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sun, 22 Mar 2020 14:59:16 -0400 Subject: [PATCH 46/81] CLN Address comments --- sklearn/_display_estimator.py | 56 +++++++++++++------------ sklearn/pipeline.py | 5 ++- sklearn/tests/test_display_estimator.py | 41 +++++++++++++----- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 6a5caffcdea30..1bef4f61de410 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,4 +1,5 @@ from sklearn._config import config_context +from sklearn._config import get_config from contextlib import closing from contextlib import suppress from io import StringIO @@ -14,24 +15,23 @@ class _VisualBlock: kind of HTML block estimators : list of estimators or `_VisualBlock`s or a single estimator - If kind is in ('parallel', 'serial'), then `estimators` is a list of + If kind != 'single', then `estimators` is a list of estimators. If kind == 'single', then `estimators` is a single estimator. names : list of str - If kind in ('parallel', 'serial'), then `names` corresponds to - estimators - If kind is 'single', then `names` is a single string corresponding to + If kind != 'single', then `names` corresponds to estimators. + If kind == 'single', then `names` is a single string corresponding to the single estimator. name_details : list of str, str, or None, default=None - If kind == 'parallel', then `name_details` corresponds to `names`. + If kind != 'single', then `name_details` corresponds to `names`. If kind == 'single', then `name_details` is a single string corresponding to the single estimator. - `name_details` is not used when kind == 'single'. dash_wrapped : bool, default=True If true, wrapped HTML element will be wrapped with a dashed border. + Only active when kind != 'single'. """ def __init__(self, kind, estimators, *, names=None, name_details=None, dash_wrapped=True): @@ -48,6 +48,9 @@ def __init__(self, kind, estimators, *, names=None, name_details=None, self.names = names self.name_details = name_details + def _sk_repr_html(self): + return self + def _write_label_html(out, name, name_details, outer_class="sk-label-container", @@ -77,9 +80,7 @@ def _get_visual_block(estimator): with suppress(AttributeError): return estimator._sk_repr_html() - if isinstance(estimator, _VisualBlock): - return estimator - elif isinstance(estimator, str): + if isinstance(estimator, str): return _VisualBlock('single', estimator, names=estimator, name_details=estimator) elif estimator is None: @@ -90,6 +91,7 @@ def _get_visual_block(estimator): if hasattr(estimator, 'get_params'): estimators = [] for key, value in estimator.get_params().items(): + # Only look at the estimators in the first layer if '__' not in key and hasattr(value, 'get_params'): estimators.append(value) if len(estimators): @@ -100,26 +102,30 @@ def _get_visual_block(estimator): name_details=str(estimator)) -def _write_estimator_html(out, estimator, estimator_label, first_call=False): +def _write_estimator_html(out, estimator, estimator_label, + estimator_label_details, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - with config_context(print_changed_only=not first_call): + if first_call: est_block = _get_visual_block(estimator) + else: + # deeper calls will only show the changes + with config_context(print_changed_only=True): + est_block = _get_visual_block(estimator) if est_block.kind == 'serial': dashed_wrapped = first_call or est_block.dash_wrapped dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'
') - # write label of current if name is given if estimator_label: - with config_context(print_changed_only=True): - _write_label_html(out, estimator_label, str(estimator)) + _write_label_html(out, estimator_label, estimator_label_details) out.write('
') - est_infos = zip(est_block.estimators, est_block.names) - for est, name in est_infos: - _write_estimator_html(out, est, name) + est_infos = zip(est_block.estimators, est_block.names, + est_block.name_details) + for est, name, name_details in est_infos: + _write_estimator_html(out, est, name, name_details) out.write('
') # sk-serial sk-item elif est_block.kind == 'parallel': @@ -128,21 +134,17 @@ def _write_estimator_html(out, estimator, estimator_label, first_call=False): out.write(f'
') if estimator_label: - with config_context(print_changed_only=True): - _write_label_html(out, estimator_label, str(estimator)) + _write_label_html(out, estimator_label, estimator_label_details) out.write('
') est_infos = zip(est_block.estimators, est_block.names, est_block.name_details) - for est, name, name_details in est_infos: out.write('
') - # name is associated with the parallel element - if name: - _write_label_html(out, name, name_details) - out.write('
') - _write_estimator_html(out, est, '') - out.write('
') # sk-parallel-item sk-serial + # wrap element in a serial visualblock + serial_block = _VisualBlock('serial', [est], dash_wrapped=False) + _write_estimator_html(out, serial_block, name, name_details) + out.write('
') # sk-parallel-item out.write('
') # sk-parallel sk-item elif est_block.kind == 'single': @@ -311,7 +313,7 @@ def _estimator_repr_html(estimator): f'' f'
') _write_estimator_html(out, estimator, estimator.__class__.__name__, - first_call=True) + str(estimator), first_call=True) out.write('
') html_output = out.getvalue() diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 666b703f6c04f..f018f3c36ac9f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -643,8 +643,11 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] + name_details = [str(est) for est in estimators] return _VisualBlock('serial', estimators, - names=names, dash_wrapped=False) + names=names, + name_details=name_details, + dash_wrapped=False) def _name_estimators(estimators): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index c5dec08964d47..b040c40332fa5 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,6 +3,7 @@ import pytest +from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -13,6 +14,8 @@ from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectPercentile +from sklearn.cluster import Birch +from sklearn.cluster import AgglomerativeClustering from sklearn.preprocessing import OneHotEncoder from sklearn.svm import LinearSVC from sklearn.svm import LinearSVR @@ -72,7 +75,7 @@ def test_get_visual_block_pipeline(): 'do_nothing: passthrough', 'do_nothing_more: passthrough', 'classifier: LogisticRegression'] - assert est_html_info.name_details is None + assert est_html_info.name_details == [str(est) for _, est in pipe.steps] def test_get_visual_block_feature_union(): @@ -148,11 +151,8 @@ def test_display_estimator_pipeline(): expected_strings = [ 'passthrough', - 'div class=\"sk-toggleable__content\">
SimpleImputer'
-      '(strategy=\'median\')',
-      '
SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
-      '
', - '(\'one-hot\', OneHotEncoder', + 'div class=\"sk-toggleable__content\">
SimpleImputer(strategy',
+      '(\'one-hot\',',
       'preprocessor: ColumnTransformer',
       '
[\'a\', \'b\', \'c\', \'d\', \'e\']
', '
LogisticRegression(random_state=1)
', @@ -164,6 +164,8 @@ def test_display_estimator_pipeline(): for expected_string in expected_strings: assert expected_string in html_output + assert str(pipe) in html_output + def test_display_estimator_ovo_classifier(): ovo = OneVsOneClassifier(LinearSVC()) @@ -181,8 +183,7 @@ def test_stacking_classsifer(final_estimator): html_output = _estimator_repr_html(clf) - assert "('mlp', MLPClassifier(alpha=0.001)" in html_output - assert "('tree', DecisionTreeClassifier()" in html_output + assert str(clf) in html_output if final_estimator is None: assert "LogisticRegression()
" in html_output else: @@ -196,9 +197,29 @@ def test_stacking_regressor(final_estimator): html_output = _estimator_repr_html(reg) - assert "('svr', LinearSVR()" in html_output - print(html_output) + assert str(reg.estimators[0][0]) in html_output + assert "LinearSVR" in html_output if final_estimator is None: assert "RidgeCV" in html_output else: assert final_estimator.__class__.__name__ in html_output + + +def test_estimator_birch(): + # birch uses another estimator + birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) + html_output = _estimator_repr_html(birch) + + # inner estimator only prints the changes + assert '
AgglomerativeClustering(n_clusters=3)
' in html_output + assert '
Birch(' in html_output
+
+
+@pytest.mark.parametrize('print_changed_only', [True, False])
+def test_one_estimator_print_change_only(print_changed_only):
+    pca = PCA(n_components=10)
+
+    with config_context(print_changed_only=print_changed_only):
+        pca_repr = str(pca)
+        html_output = _estimator_repr_html(pca)
+        assert pca_repr in html_output

From 0a30cedfe03921ead76605c034cf2def5aeef12a Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 22 Mar 2020 15:03:54 -0400
Subject: [PATCH 47/81] STY Flake8

---
 sklearn/_display_estimator.py | 1 -
 sklearn/base.py               | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 1bef4f61de410..3abd7724975c4 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -1,5 +1,4 @@
 from sklearn._config import config_context
-from sklearn._config import get_config
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO
diff --git a/sklearn/base.py b/sklearn/base.py
index 22bce1cd9d6f6..08b0fc820705d 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -21,7 +21,6 @@
 from .utils.validation import check_X_y
 from .utils.validation import check_array
 from ._display_estimator import _estimator_repr_html
-from ._display_estimator import _VisualBlock
 
 _DEFAULT_TAGS = {
     'non_deterministic': False,

From f74053752f6bbd6ac325c4625f67ffbee9390d4d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Thu, 26 Mar 2020 19:41:00 -0400
Subject: [PATCH 48/81] CLN More refactoring

---
 sklearn/_display_estimator.py | 36 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 3abd7724975c4..cedcd38d870f6 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -112,7 +112,7 @@ def _write_estimator_html(out, estimator, estimator_label,
         with config_context(print_changed_only=True):
             est_block = _get_visual_block(estimator)
 
-    if est_block.kind == 'serial':
+    if est_block.kind in ('serial', 'parallel'):
         dashed_wrapped = first_call or est_block.dash_wrapped
         dash_cls = " sk-dashed-wrapped" if dashed_wrapped else ""
         out.write(f'
') @@ -120,32 +120,22 @@ def _write_estimator_html(out, estimator, estimator_label, if estimator_label: _write_label_html(out, estimator_label, estimator_label_details) - out.write('
') + kind = est_block.kind + out.write(f'
') est_infos = zip(est_block.estimators, est_block.names, est_block.name_details) - for est, name, name_details in est_infos: - _write_estimator_html(out, est, name, name_details) - out.write('
') # sk-serial sk-item - - elif est_block.kind == 'parallel': - dashed_wrapped = first_call or est_block.dash_wrapped - dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" - out.write(f'
') - if estimator_label: - _write_label_html(out, estimator_label, estimator_label_details) - - out.write('
') - est_infos = zip(est_block.estimators, est_block.names, - est_block.name_details) for est, name, name_details in est_infos: - out.write('
') - # wrap element in a serial visualblock - serial_block = _VisualBlock('serial', [est], dash_wrapped=False) - _write_estimator_html(out, serial_block, name, name_details) - out.write('
') # sk-parallel-item - out.write('
') # sk-parallel sk-item - + if kind == 'serial': + _write_estimator_html(out, est, name, name_details) + else: # parallel + out.write('
') + # wrap element in a serial visualblock + serial_block = _VisualBlock('serial', [est], dash_wrapped=False) + _write_estimator_html(out, serial_block, name, name_details) + out.write('
') # sk-parallel-item + + out.write('
') elif est_block.kind == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", From f656d8bb274f5c7afb6bce69e63e836dfaac2905 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 26 Mar 2020 21:54:16 -0400 Subject: [PATCH 49/81] CLN Outputs repr in latex --- doc/sphinxext/display_est_repr_html.py | 36 ++++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index b856d1045d654..4868a10e2fb3b 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -2,22 +2,22 @@ Primary used to display the html output of `_repr_html_` of estimators """ import sys -from docutils.parsers.rst import Directive +from sphinx.util.docutils import SphinxDirective from docutils import nodes from io import StringIO -class DisplayEstimatorRepr(Directive): +class DisplayEstimatorRepr(SphinxDirective): "Execute Python code and includes stdout as HTML" has_content = True required_arguments = 0 optional_arguments = 0 - def execute(self, code): + def execute(self, code, format_str): code_parts = code.split('\n') final_output = code_parts[-1] - code_parts[-1] = f'print({final_output}._repr_html_())' + code_parts[-1] = format_str.format(final_output) code = '\n'.join(code_parts) orig_stdout, orig_stderr = sys.stdout, sys.stderr @@ -32,23 +32,31 @@ def execute(self, code): def run(self): output = [] code = "\n".join(self.content) - code_results = self.execute(code) + html_repr = self.execute(code, format_str='print({}._repr_html_())') input_code = nodes.literal_block(code, code) input_code['language'] = 'python' output.append(input_code) - html_node = nodes.raw('', code_results, format='html') + html_node = nodes.raw('', html_repr, format='html') output.append(html_node) - code_results_latex = r""" - \begin{sphinxadmonition}{note}{Note:} - The HTML output of this code snippet can only been seen on the HTML - version of the documentation - \end{sphinxadmonition} - """ - latex_node = nodes.raw('', code_results_latex, format='latex') - output.append(latex_node) + if self.env.app.builder.name == 'latex': + code_results_latex = r""" + \begin{sphinxadmonition}{note}{Note:} + The HTML output of this code snippet can only been seen on the HTML + version of the documentation. The following is a string + representation. + \end{sphinxadmonition} + """ + latex_node = nodes.raw('', code_results_latex, format='latex') + output.append(latex_node) + + str_repr = self.execute(code, format_str='print(repr({}))') + str_repr_node = nodes.literal_block(str_repr, str_repr) + str_repr_node['language'] = 'python' + output.append(str_repr_node) + return output From 856ce5d62cd6bcc4e46c211ccc6ae2c7237e9cda Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 27 Mar 2020 11:50:22 -0400 Subject: [PATCH 50/81] CLN Adds more tests --- sklearn/tests/test_display_estimator.py | 98 ++++++++++++++++++------- 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index b040c40332fa5..9182a1e219044 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -1,5 +1,6 @@ from contextlib import closing from io import StringIO +from itertools import chain import pytest @@ -23,6 +24,8 @@ from sklearn.multiclass import OneVsOneClassifier from sklearn.ensemble import StackingClassifier from sklearn.ensemble import StackingRegressor +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RationalQuadratic from sklearn._display_estimator import _write_label_html from sklearn._display_estimator import _get_visual_block from sklearn._display_estimator import _estimator_repr_html @@ -30,6 +33,7 @@ @pytest.mark.parametrize("checked", [True, False]) def test_write_label_html(checked): + # Test checking logic and labeling name = "LogisticRegression" tool_tip = "hello-world" @@ -45,6 +49,7 @@ def test_write_label_html(checked): @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) def test_get_visual_block_single_str_none(est): + # Test estimators that are represnted by strings est_html_info = _get_visual_block(est) assert est_html_info.kind == 'single' assert est_html_info.estimators == est @@ -125,7 +130,7 @@ def test_display_estimator_pipeline(): cat_trans = Pipeline(steps=[ ('imputer', SimpleImputer(strategy='constant', missing_values='empty')), - ('one-hot', OneHotEncoder()) + ('one-hot', OneHotEncoder(drop='first')) ]) preprocess = ColumnTransformer([ @@ -149,29 +154,38 @@ def test_display_estimator_pipeline(): ]) html_output = _estimator_repr_html(pipe) - expected_strings = [ - 'passthrough', - 'div class=\"sk-toggleable__content\">
SimpleImputer(strategy',
-      '(\'one-hot\',',
-      'preprocessor: ColumnTransformer',
-      '
[\'a\', \'b\', \'c\', \'d\', \'e\']
', - '
LogisticRegression(random_state=1)
', - '
SelectPercentile()
', - '>TruncatedSVD', - '
TruncatedSVD(n_components=3)',
-    ]
-
-    for expected_string in expected_strings:
-        assert expected_string in html_output
-
+    # top level estimators show estimator with changes
     assert str(pipe) in html_output
+    for _, est in pipe.steps:
+        assert (f"
" + f"
{str(est)}") in html_output
 
+    # all other estimators are shown with only its changes
+    with config_context(print_changed_only=True):
+        assert str(num_trans['pass']) in html_output
+        assert 'passthrough' in html_output
+        assert str(num_trans['imputer']) in html_output
 
-def test_display_estimator_ovo_classifier():
-    ovo = OneVsOneClassifier(LinearSVC())
-    html_output = _estimator_repr_html(ovo)
-    assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output
-    assert "LinearSVC" in html_output
+        for _, _, cols in preprocess.transformers:
+            assert f"
{cols}
" in html_output + + # feature union + for name, _ in feat_u.transformer_list: + assert f"" in html_output + + pca = feat_u.transformer_list[0][1] + assert f"
{str(pca)}
" in html_output + + tsvd = feat_u.transformer_list[1][1] + first = tsvd['first'] + select = tsvd['select'] + assert f"
{str(first)}
" in html_output + assert f"
{str(select)}
" in html_output + + # voting classifer + for name, est in clf.estimators: + assert f"" in html_output + assert f"
{str(est)}
" in html_output @pytest.mark.parametrize("final_estimator", [None, LinearSVC()]) @@ -189,12 +203,10 @@ def test_stacking_classsifer(final_estimator): else: assert final_estimator.__class__.__name__ in html_output - @pytest.mark.parametrize("final_estimator", [None, LinearSVR()]) def test_stacking_regressor(final_estimator): reg = StackingRegressor( estimators=[('svr', LinearSVR())], final_estimator=final_estimator) - html_output = _estimator_repr_html(reg) assert str(reg.estimators[0][0]) in html_output @@ -205,14 +217,44 @@ def test_stacking_regressor(final_estimator): assert final_estimator.__class__.__name__ in html_output -def test_estimator_birch(): - # birch uses another estimator +def test_birch_duck_typing_meta(): + # Test duck typing meta estimators with Birch birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) html_output = _estimator_repr_html(birch) - # inner estimator only prints the changes - assert '
AgglomerativeClustering(n_clusters=3)
' in html_output - assert '
Birch(' in html_output
+    # inner estimator shows only the changes
+    with config_context(print_changed_only=True):
+        assert f"
{str(birch.n_clusters)}" in html_output
+        assert "AgglomerativeClustering" in html_output
+
+    # outer estimator contains all changes
+    assert f"
{str(birch)}" in html_output
+
+
+def test_ovo_classifier_duck_typing_meta():
+    # Test duck typing metaestimators with OVO
+    ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
+    html_output = _estimator_repr_html(ovo)
+
+    # inner estimator shows only the changes
+    with config_context(print_changed_only=True):
+        assert f"
{str(ovo.estimator)}" in html_output
+        assert "LinearSVC" in html_output
+
+    # outter estimator
+    assert f"
{str(ovo)}" in html_output
+
+
+def test_duck_typing_nested_estimator():
+    # Test duck typing metaestimators with GP
+    kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1)
+    gp = GaussianProcessRegressor(kernel=kernel)
+    html_output = _estimator_repr_html(gp)
+
+    with config_context(print_changed_only=True):
+        assert f"
{str(gp.kernel)}" in html_output
+
+    assert f"
{str(gp)}" in html_output
 
 
 @pytest.mark.parametrize('print_changed_only', [True, False])

From b5c26b0ea05b315111fe6f16cc28b6e2ea226c1d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Fri, 27 Mar 2020 11:56:38 -0400
Subject: [PATCH 51/81] STY Update

---
 sklearn/_display_estimator.py           | 3 ++-
 sklearn/tests/test_display_estimator.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index cedcd38d870f6..a930f00916951 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -131,7 +131,8 @@ def _write_estimator_html(out, estimator, estimator_label,
             else:  # parallel
                 out.write('
') # wrap element in a serial visualblock - serial_block = _VisualBlock('serial', [est], dash_wrapped=False) + serial_block = _VisualBlock('serial', [est], + dash_wrapped=False) _write_estimator_html(out, serial_block, name, name_details) out.write('
') # sk-parallel-item diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 9182a1e219044..da4cb929fc1c1 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -1,6 +1,5 @@ from contextlib import closing from io import StringIO -from itertools import chain import pytest @@ -203,6 +202,7 @@ def test_stacking_classsifer(final_estimator): else: assert final_estimator.__class__.__name__ in html_output + @pytest.mark.parametrize("final_estimator", [None, LinearSVR()]) def test_stacking_regressor(final_estimator): reg = StackingRegressor( From f56060c73a4f2f5940f5da459f2e161a6c69bd17 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 30 Mar 2020 19:46:14 -0400 Subject: [PATCH 52/81] TST Fix --- sklearn/tests/test_display_estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index da4cb929fc1c1..bfd030dcbb604 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -247,12 +247,12 @@ def test_ovo_classifier_duck_typing_meta(): def test_duck_typing_nested_estimator(): # Test duck typing metaestimators with GP - kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1) + kernel = RationalQuadratic(length_scale=1.0, alpha=0.1) gp = GaussianProcessRegressor(kernel=kernel) html_output = _estimator_repr_html(gp) with config_context(print_changed_only=True): - assert f"
{str(gp.kernel)}" in html_output
+        assert f"
{str(kernel)}" in html_output
 
     assert f"
{str(gp)}" in html_output
 

From 52a640a529d304e7bd3f19a24f83530e4728eaa0 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 13 Apr 2020 14:19:30 -0400
Subject: [PATCH 53/81] CLN Move to utils

---
 sklearn/base.py                                     |  2 +-
 sklearn/compose/_column_transformer.py              |  2 +-
 sklearn/ensemble/_stacking.py                       |  2 +-
 sklearn/ensemble/_voting.py                         |  2 +-
 sklearn/pipeline.py                                 |  2 +-
 sklearn/utils/__init__.py                           |  3 ++-
 sklearn/{ => utils}/_display_estimator.py           | 11 ++++++-----
 sklearn/{ => utils}/tests/test_display_estimator.py |  6 +++---
 8 files changed, 16 insertions(+), 14 deletions(-)
 rename sklearn/{ => utils}/_display_estimator.py (97%)
 rename sklearn/{ => utils}/tests/test_display_estimator.py (98%)

diff --git a/sklearn/base.py b/sklearn/base.py
index 08b0fc820705d..f4bf765afc5e4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -20,7 +20,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from ._display_estimator import _estimator_repr_html
+from .utils._display_estimator import _estimator_repr_html
 
 _DEFAULT_TAGS = {
     'non_deterministic': False,
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index a501eaa145475..e1504cd91ff8f 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -15,7 +15,7 @@
 from joblib import Parallel, delayed
 
 from ..base import clone, TransformerMixin
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index 479dd4cc55fe8..520a52634e1c4 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -13,7 +13,7 @@
 from ..base import clone
 from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
 from ..base import is_classifier, is_regressor
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 
 from ._base import _fit_single_estimator
 from ._base import _BaseHeterogeneousEnsemble
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index 23c8e177bd3a0..6a7f465c5f77d 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -32,7 +32,7 @@
 from ..utils.validation import column_or_1d
 from ..utils.validation import _deprecate_positional_args
 from ..exceptions import NotFittedError
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 
 
 class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0bc0087ace115..beb99a61fe44d 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -18,7 +18,7 @@
 from joblib import Parallel, delayed
 
 from .base import clone, TransformerMixin
-from ._display_estimator import _VisualBlock
+from .utils._display_estimator import _VisualBlock
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch, _print_elapsed_time
 from .utils.validation import check_memory
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index aac6e292a198a..559fb39a5406d 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -30,6 +30,7 @@
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric, check_scalar)
+from ._display_estimator import estimator_repr_html
 from .. import get_config
 
 
@@ -51,7 +52,7 @@
            "check_symmetric", "indices_to_mask", "deprecated",
            "parallel_backend", "register_parallel_backend",
            "resample", "shuffle", "check_matplotlib_support", "all_estimators",
-           "DataConversionWarning"
+           "DataConversionWarning", "estimator_repr_html"
            ]
 
 IS_PYPY = platform.python_implementation() == 'PyPy'
diff --git a/sklearn/_display_estimator.py b/sklearn/utils/_display_estimator.py
similarity index 97%
rename from sklearn/_display_estimator.py
rename to sklearn/utils/_display_estimator.py
index a930f00916951..44a2251566f4d 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -283,8 +283,10 @@ def _write_estimator_html(out, estimator, estimator_label,
 """.replace('  ', '').replace('\n', '')  # noqa
 
 
-def _estimator_repr_html(estimator):
-    """Build a HTML representation of an estimator
+def estimator_repr_html(estimator):
+    """Build a HTML representation of an estimator.
+
+    Read more in the :ref:`User Guide `.
 
     Parameters
     ----------
@@ -293,9 +295,8 @@ def _estimator_repr_html(estimator):
 
     Returns
     -------
-    html: str or iPython HTML object
-        HTML representation of estimator. When called in jupyter notebook or
-        lab, a iPython HTML object is returned.
+    html: str
+        HTML representation of estimator.
     """
     with closing(StringIO()) as out:
         out.write(f''
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
similarity index 98%
rename from sklearn/tests/test_display_estimator.py
rename to sklearn/utils/tests/test_display_estimator.py
index bfd030dcbb604..b4305cde23537 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -25,9 +25,9 @@
 from sklearn.ensemble import StackingRegressor
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import RationalQuadratic
-from sklearn._display_estimator import _write_label_html
-from sklearn._display_estimator import _get_visual_block
-from sklearn._display_estimator import _estimator_repr_html
+from sklearn.utils._display_estimator import _write_label_html
+from sklearn.utils._display_estimator import _get_visual_block
+from sklearn.utils._display_estimator import _estimator_repr_html
 
 
 @pytest.mark.parametrize("checked", [True, False])

From 66ebce904c6581bfd148de9c8c28aa9a4e453376 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 13 Apr 2020 14:44:56 -0400
Subject: [PATCH 54/81] DOC Adds html representation in another example

---
 examples/compose/plot_column_transformer.py | 15 ++++++++++++++-
 sklearn/base.py                             |  9 +++++++++
 sklearn/utils/__init__.py                   |  1 -
 sklearn/utils/_display_estimator.py         |  2 +-
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 0cfc9f5de0054..aff1876cfdf3e 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -115,7 +115,20 @@ def transform(self, posts):
     ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
-# limit the list of categories to make running this example faster.
+###############################################################################
+# HTML representation of ``Pipeline``
+###############################################################################
+# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
+# representation of the estimator is displayed as follows:
+pipeline
+
+###############################################################################
+# Classification Report
+###############################################################################
+# Finally, the pipeline is trained and a classification report is generated
+# on a testing subset. We limit the list of categories to make running this
+# example faster.
+
 categories = ['alt.atheism', 'talk.religion.misc']
 X_train, y_train = fetch_20newsgroups(random_state=1,
                                       subset='train',
diff --git a/sklearn/base.py b/sklearn/base.py
index f4bf765afc5e4..35b6402494740 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -413,6 +413,15 @@ def _validate_data(self, X, y=None, reset=True, **check_params):
         return out
 
     def _repr_html_(self):
+        """Build a HTML representation of an estimator.
+
+        Read more in the :ref:`User Guide `.
+
+        Returns
+        -------
+        html: str
+            HTML representation of estimator.
+        """
         return _estimator_repr_html(self)
 
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 559fb39a5406d..7ff6df15f8fd8 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -30,7 +30,6 @@
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric, check_scalar)
-from ._display_estimator import estimator_repr_html
 from .. import get_config
 
 
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 44a2251566f4d..b61558a9dc90a 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -283,7 +283,7 @@ def _write_estimator_html(out, estimator, estimator_label,
 """.replace('  ', '').replace('\n', '')  # noqa
 
 
-def estimator_repr_html(estimator):
+def _estimator_repr_html(estimator):
     """Build a HTML representation of an estimator.
 
     Read more in the :ref:`User Guide `.

From 24029d3dd3b93da7a494ba8c75f069c9f05d7949 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Tue, 14 Apr 2020 13:19:51 -0400
Subject: [PATCH 55/81] DOC Adds reference to _repr_html_

---
 doc/modules/compose.rst                | 6 ++++++
 doc/sphinxext/display_est_repr_html.py | 7 ++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 39b3c4abcc177..44c542ecc0652 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -564,6 +564,12 @@ many estimators. An example of the HTML output can be seen below.
                           ('classifier', LogisticRegression())])
    clf
 
+As an alternative, the HTML can be written to a file using the `_repr_html_`
+method::
+
+        with open('my_estimator.html', 'w') as f:
+            f.write(clf._repr_html_())
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py`
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
index 4868a10e2fb3b..710c334d99e24 100644
--- a/doc/sphinxext/display_est_repr_html.py
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -38,6 +38,7 @@ def run(self):
         input_code['language'] = 'python'
         output.append(input_code)
 
+        html_repr = f"

{html_repr}

" html_node = nodes.raw('', html_repr, format='html') output.append(html_node) @@ -62,8 +63,4 @@ def run(self): def setup(app): app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) - - return { - 'parallel_read_safe': True, - 'parallel_write_safe': True, - } + return {'parallel_read_safe': True, 'parallel_write_safe': True} From adc977be2c54594110395eacde786d0a8ff4b8b7 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 14 Apr 2020 13:44:05 -0400 Subject: [PATCH 56/81] FIx bug --- sklearn/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 7ff6df15f8fd8..aac6e292a198a 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -51,7 +51,7 @@ "check_symmetric", "indices_to_mask", "deprecated", "parallel_backend", "register_parallel_backend", "resample", "shuffle", "check_matplotlib_support", "all_estimators", - "DataConversionWarning", "estimator_repr_html" + "DataConversionWarning" ] IS_PYPY = platform.python_implementation() == 'PyPy' From 9df7573099ffb38a8ae7d81e4a66f25fa81c5b00 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sun, 19 Apr 2020 11:04:31 -0400 Subject: [PATCH 57/81] CLN Rename secret protocol --- doc/modules/compose.rst | 4 ++-- sklearn/compose/_column_transformer.py | 2 +- sklearn/ensemble/_stacking.py | 10 +++++----- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 4 ++-- sklearn/utils/_display_estimator.py | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 44c542ecc0652..db4f05801038d 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -567,8 +567,8 @@ many estimators. An example of the HTML output can be seen below. As an alternative, the HTML can be written to a file using the `_repr_html_` method:: - with open('my_estimator.html', 'w') as f: - f.write(clf._repr_html_()) + with open('my_estimator.html', 'w') as f: + f.write(clf._repr_html_()) .. topic:: Examples: diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e1504cd91ff8f..93fcba1d3c0b8 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -631,7 +631,7 @@ def _hstack(self, Xs): Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs] return np.hstack(Xs) - def _sk_repr_html(self): + def _sk_visual_block_(self): names, transformers, name_details = zip(*self.transformers) return _VisualBlock('parallel', transformers, names=names, name_details=name_details) diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 520a52634e1c4..ac2e79638096e 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -234,7 +234,7 @@ def predict(self, X, **predict_params): self.transform(X), **predict_params ) - def _sk_repr_html(self, final_estimator): + def _sk_visual_block_(self, final_estimator): names, estimators = zip(*self.estimators) parallel = _VisualBlock('parallel', estimators, names=names, dash_wrapped=False) @@ -505,12 +505,12 @@ def transform(self, X): """ return self._transform(X) - def _sk_repr_html(self): + def _sk_visual_block_(self): if self.final_estimator is None: final_estimator = LogisticRegression() else: final_estimator = self.final_estimator - return super()._sk_repr_html(final_estimator) + return super()._sk_visual_block_(final_estimator) class StackingRegressor(RegressorMixin, _BaseStacking): @@ -682,9 +682,9 @@ def transform(self, X): """ return self._transform(X) - def _sk_repr_html(self): + def _sk_visual_block_(self): if self.final_estimator is None: final_estimator = RidgeCV() else: final_estimator = self.final_estimator - return super()._sk_repr_html(final_estimator) + return super()._sk_visual_block_(final_estimator) diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 6a7f465c5f77d..31f7228d34091 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -105,7 +105,7 @@ def n_features_in_(self): return self.estimators_[0].n_features_in_ - def _sk_repr_html(self): + def _sk_visual_block_(self): names, estimators = zip(*self.estimators) return _VisualBlock('parallel', estimators, names=names) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index beb99a61fe44d..2ee07edb7f079 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -620,7 +620,7 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ - def _sk_repr_html(self): + def _sk_visual_block_(self): _, estimators = zip(*self.steps) def _get_name(name, est): @@ -1012,7 +1012,7 @@ def n_features_in_(self): # X is passed to all transformers so we just delegate to the first one return self.transformer_list[0][1].n_features_in_ - def _sk_repr_html(self): + def _sk_visual_block_(self): names, transformers = zip(*self.transformer_list) return _VisualBlock('parallel', transformers, names=names) diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py index b61558a9dc90a..3ab58ab44c5aa 100644 --- a/sklearn/utils/_display_estimator.py +++ b/sklearn/utils/_display_estimator.py @@ -47,7 +47,7 @@ def __init__(self, kind, estimators, *, names=None, name_details=None, self.names = names self.name_details = name_details - def _sk_repr_html(self): + def _sk_visual_block_(self): return self @@ -77,7 +77,7 @@ def _get_visual_block(estimator): """Generate information about how to display an estimator. """ with suppress(AttributeError): - return estimator._sk_repr_html() + return estimator._sk_visual_block_() if isinstance(estimator, str): return _VisualBlock('single', estimator, From ef025749c0eb925a0209e8a064c683fcde733c51 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 22 Apr 2020 19:56:13 -0400 Subject: [PATCH 58/81] CLN Address comments --- doc/sphinxext/display_est_repr_html.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index 710c334d99e24..4dc87e95a6bf2 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -1,14 +1,17 @@ """ -Primary used to display the html output of `_repr_html_` of estimators +Primarily used to display the html output of `_repr_html_` of estimators """ -import sys from sphinx.util.docutils import SphinxDirective +from contextlib import redirect_stderr, redirect_stdout from docutils import nodes from io import StringIO class DisplayEstimatorRepr(SphinxDirective): - "Execute Python code and includes stdout as HTML" + """Execute Python and runs `_repr_html_` on the last element on the code + block. The last element in the code block should be an estimator with + support for `_repr_html_`. + """ has_content = True required_arguments = 0 @@ -19,13 +22,10 @@ def execute(self, code, format_str): final_output = code_parts[-1] code_parts[-1] = format_str.format(final_output) code = '\n'.join(code_parts) - orig_stdout, orig_stderr = sys.stdout, sys.stderr output, err = StringIO(), StringIO() - - sys.stdout, sys.stderr = output, err - exec(code) - sys.stdout, sys.stderr = orig_stdout, orig_stderr + with redirect_stdout(output), redirect_stderr(err): + exec(code) return f"{output.getvalue()}{err.getvalue()}" @@ -34,7 +34,7 @@ def run(self): code = "\n".join(self.content) html_repr = self.execute(code, format_str='print({}._repr_html_())') - input_code = nodes.literal_block(code, code) + input_code = nodes.literal_block(rawsource=code, text=code) input_code['language'] = 'python' output.append(input_code) @@ -54,7 +54,8 @@ def run(self): output.append(latex_node) str_repr = self.execute(code, format_str='print(repr({}))') - str_repr_node = nodes.literal_block(str_repr, str_repr) + str_repr_node = nodes.literal_block(rawsource=str_repr, + text=str_repr) str_repr_node['language'] = 'python' output.append(str_repr_node) From c1b451c4bcab68f6d91d9002d2bd2a27543ac150 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sun, 26 Apr 2020 21:43:11 -0400 Subject: [PATCH 59/81] CLN Address comments --- doc/modules/classes.rst | 1 + doc/modules/compose.rst | 10 ++- doc/sphinxext/display_est_repr_html.py | 10 ++- examples/compose/plot_column_transformer.py | 7 -- .../plot_column_transformer_mixed_types.py | 2 + sklearn/_config.py | 17 ++++- sklearn/base.py | 15 ++-- sklearn/tests/test_config.py | 8 +- sklearn/utils/__init__.py | 3 +- sklearn/utils/_display_estimator.py | 9 +-- sklearn/utils/tests/test_display_estimator.py | 74 +++++++++---------- 11 files changed, 83 insertions(+), 73 deletions(-) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 3d9924638b69b..f9904ad56b100 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1569,6 +1569,7 @@ Plotting utils.deprecated utils.estimator_checks.check_estimator utils.estimator_checks.parametrize_with_checks + utils.estimator_repr_html utils.extmath.safe_sparse_dot utils.extmath.randomized_range_finder utils.extmath.randomized_svd diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index db4f05801038d..78f7dc2f63c29 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -539,6 +539,7 @@ many estimators. An example of the HTML output can be seen below. .. display_estimator_repr_html:: + from sklearn import config_context from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer @@ -552,7 +553,7 @@ many estimators. An example of the HTML output can be seen below. categorical_features = ['embarked', 'sex', 'pclass'] categorical_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), + ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), ('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( @@ -562,13 +563,14 @@ many estimators. An example of the HTML output can be seen below. clf = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', LogisticRegression())]) - clf + with config_context(repr_html=True): + clf As an alternative, the HTML can be written to a file using the `_repr_html_` method:: - with open('my_estimator.html', 'w') as f: - f.write(clf._repr_html_()) + with config_context(repr_html=True), open('my_estimator.html', 'w') as f: + f.write(clf._repr_html_()) .. topic:: Examples: diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index 4dc87e95a6bf2..676b87c284b6d 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -20,7 +20,9 @@ class DisplayEstimatorRepr(SphinxDirective): def execute(self, code, format_str): code_parts = code.split('\n') final_output = code_parts[-1] - code_parts[-1] = format_str.format(final_output) + final_est = final_output.lstrip(' ') + n_whitespace = len(final_output) - len(final_est) + code_parts[-1] = " " * n_whitespace + format_str.format(final_est) code = '\n'.join(code_parts) output, err = StringIO(), StringIO() @@ -32,14 +34,14 @@ def execute(self, code, format_str): def run(self): output = [] code = "\n".join(self.content) - html_repr = self.execute(code, format_str='print({}._repr_html_())') + repr_html = self.execute(code, format_str='print({}._repr_html_())') input_code = nodes.literal_block(rawsource=code, text=code) input_code['language'] = 'python' output.append(input_code) - html_repr = f"

{html_repr}

" - html_node = nodes.raw('', html_repr, format='html') + repr_html = f"

{repr_html}

" + html_node = nodes.raw('', repr_html, format='html') output.append(html_node) if self.env.app.builder.name == 'latex': diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py index aff1876cfdf3e..09877114124ac 100644 --- a/examples/compose/plot_column_transformer.py +++ b/examples/compose/plot_column_transformer.py @@ -115,13 +115,6 @@ def transform(self, posts): ('svc', LinearSVC(dual=False)), ], verbose=True) -############################################################################### -# HTML representation of ``Pipeline`` -############################################################################### -# When the ``Pipeline`` is printed out in a jupyter notebook an HTML -# representation of the estimator is displayed as follows: -pipeline - ############################################################################### # Classification Report ############################################################################### diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 920ef37bfb333..6c32d394aaa78 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -27,6 +27,7 @@ import numpy as np +from sklearn import set_config from sklearn.compose import ColumnTransformer from sklearn.datasets import fetch_openml from sklearn.pipeline import Pipeline @@ -92,6 +93,7 @@ ############################################################################### # When the ``Pipeline`` is printed out in a jupyter notebook an HTML # representation of the estimator is displayed as follows: +set_config(repr_html=True) clf ############################################################################### diff --git a/sklearn/_config.py b/sklearn/_config.py index c7f3934ee1cb3..4b81bddc0e517 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -7,6 +7,7 @@ 'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)), 'working_memory': int(os.environ.get('SKLEARN_WORKING_MEMORY', 1024)), 'print_changed_only': False, + 'repr_html': False, } @@ -27,7 +28,7 @@ def get_config(): def set_config(assume_finite=None, working_memory=None, - print_changed_only=None): + print_changed_only=None, repr_html=None): """Set global scikit-learn configuration .. versionadded:: 0.19 @@ -59,6 +60,12 @@ def set_config(assume_finite=None, working_memory=None, .. versionadded:: 0.21 + repr_html : bool, optional + If True, estimators will be displayed with HTML in a jupyter lab or + notebook context. + + .. versionadded:: 0.23 + See Also -------- config_context: Context manager for global scikit-learn configuration @@ -70,6 +77,8 @@ def set_config(assume_finite=None, working_memory=None, _global_config['working_memory'] = working_memory if print_changed_only is not None: _global_config['print_changed_only'] = print_changed_only + if repr_html is not None: + _global_config['repr_html'] = repr_html @contextmanager @@ -97,6 +106,12 @@ def config_context(**new_config): behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with all the non-changed parameters. + repr_html : bool, optional + If True, estimators will be displayed with HTML in a jupyter lab or + notebook context. + + .. versionadded:: 0.23 + Notes ----- All settings, not just those presently modified, will be returned to diff --git a/sklearn/base.py b/sklearn/base.py index 4db7ff4ae8715..fd2d5b9bfe110 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -17,10 +17,11 @@ import numpy as np from . import __version__ +from ._config import get_config from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array -from .utils._display_estimator import _estimator_repr_html +from .utils._display_estimator import estimator_repr_html from .utils.validation import _deprecate_positional_args _DEFAULT_TAGS = { @@ -437,16 +438,20 @@ def _validate_data(self, X, y=None, reset=True, return out def _repr_html_(self): - """Build a HTML representation of an estimator. + """HTML or string representation of an estimator depending on + global configuration flag `repr_html`. Read more in the :ref:`User Guide `. Returns ------- - html: str - HTML representation of estimator. + repr: str + HTML or string representation of estimator. """ - return _estimator_repr_html(self) + repr_html = get_config()["repr_html"] + if repr_html: + return estimator_repr_htmlself) + return repr(self) class ClassifierMixin: diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py index dfa944110ad7a..983dbb0277b8b 100644 --- a/sklearn/tests/test_config.py +++ b/sklearn/tests/test_config.py @@ -4,7 +4,7 @@ def test_config_context(): assert get_config() == {'assume_finite': False, 'working_memory': 1024, - 'print_changed_only': False} + 'print_changed_only': False, 'repr_html': False} # Not using as a context manager affects nothing config_context(assume_finite=True) @@ -12,7 +12,8 @@ def test_config_context(): with config_context(assume_finite=True): assert get_config() == {'assume_finite': True, 'working_memory': 1024, - 'print_changed_only': False} + 'print_changed_only': False, + 'repr_html': False} assert get_config()['assume_finite'] is False with config_context(assume_finite=True): @@ -37,7 +38,8 @@ def test_config_context(): assert get_config()['assume_finite'] is True assert get_config() == {'assume_finite': False, 'working_memory': 1024, - 'print_changed_only': False} + 'print_changed_only': False, + 'repr_html': False} # No positional arguments assert_raises(TypeError, config_context, True) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index aac6e292a198a..acffddf7ccf56 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -25,6 +25,7 @@ from ..exceptions import DataConversionWarning from .deprecation import deprecated from .fixes import np_version +from ._display_estimator import estimator_repr_html from .validation import (as_float_array, assert_all_finite, check_random_state, column_or_1d, check_array, @@ -51,7 +52,7 @@ "check_symmetric", "indices_to_mask", "deprecated", "parallel_backend", "register_parallel_backend", "resample", "shuffle", "check_matplotlib_support", "all_estimators", - "DataConversionWarning" + "DataConversionWarning", "estimator_repr_html" ] IS_PYPY = platform.python_implementation() == 'PyPy' diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py index 3ab58ab44c5aa..ab5c9745ca929 100644 --- a/sklearn/utils/_display_estimator.py +++ b/sklearn/utils/_display_estimator.py @@ -105,12 +105,7 @@ def _write_estimator_html(out, estimator, estimator_label, estimator_label_details, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - if first_call: - est_block = _get_visual_block(estimator) - else: - # deeper calls will only show the changes - with config_context(print_changed_only=True): - est_block = _get_visual_block(estimator) + est_block = _get_visual_block(estimator) if est_block.kind in ('serial', 'parallel'): dashed_wrapped = first_call or est_block.dash_wrapped @@ -283,7 +278,7 @@ def _write_estimator_html(out, estimator, estimator_label, """.replace(' ', '').replace('\n', '') # noqa -def _estimator_repr_html(estimator): +def estimator_repr_html(estimator): """Build a HTML representation of an estimator. Read more in the :ref:`User Guide `. diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py index b4305cde23537..152062f5ec168 100644 --- a/sklearn/utils/tests/test_display_estimator.py +++ b/sklearn/utils/tests/test_display_estimator.py @@ -27,7 +27,7 @@ from sklearn.gaussian_process.kernels import RationalQuadratic from sklearn.utils._display_estimator import _write_label_html from sklearn.utils._display_estimator import _get_visual_block -from sklearn.utils._display_estimator import _estimator_repr_html +from sklearn.utils._display_estimator import estimator_repr_html @pytest.mark.parametrize("checked", [True, False]) @@ -151,7 +151,7 @@ def test_display_estimator_pipeline(): pipe = Pipeline([ ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) ]) - html_output = _estimator_repr_html(pipe) + html_output = estimator_repr_html(pipe) # top level estimators show estimator with changes assert str(pipe) in html_output @@ -159,32 +159,30 @@ def test_display_estimator_pipeline(): assert (f"
" f"
{str(est)}") in html_output
 
-    # all other estimators are shown with only its changes
-    with config_context(print_changed_only=True):
-        assert str(num_trans['pass']) in html_output
-        assert 'passthrough' in html_output
-        assert str(num_trans['imputer']) in html_output
+    assert str(num_trans['pass']) in html_output
+    assert 'passthrough' in html_output
+    assert str(num_trans['imputer']) in html_output
 
-        for _, _, cols in preprocess.transformers:
-            assert f"
{cols}
" in html_output + for _, _, cols in preprocess.transformers: + assert f"
{cols}
" in html_output - # feature union - for name, _ in feat_u.transformer_list: - assert f"" in html_output + # feature union + for name, _ in feat_u.transformer_list: + assert f"" in html_output - pca = feat_u.transformer_list[0][1] - assert f"
{str(pca)}
" in html_output + pca = feat_u.transformer_list[0][1] + assert f"
{str(pca)}
" in html_output - tsvd = feat_u.transformer_list[1][1] - first = tsvd['first'] - select = tsvd['select'] - assert f"
{str(first)}
" in html_output - assert f"
{str(select)}
" in html_output + tsvd = feat_u.transformer_list[1][1] + first = tsvd['first'] + select = tsvd['select'] + assert f"
{str(first)}
" in html_output + assert f"
{str(select)}
" in html_output - # voting classifer - for name, est in clf.estimators: - assert f"" in html_output - assert f"
{str(est)}
" in html_output + # voting classifer + for name, est in clf.estimators: + assert f"" in html_output + assert f"
{str(est)}
" in html_output @pytest.mark.parametrize("final_estimator", [None, LinearSVC()]) @@ -194,11 +192,11 @@ def test_stacking_classsifer(final_estimator): clf = StackingClassifier( estimators=estimators, final_estimator=final_estimator) - html_output = _estimator_repr_html(clf) + html_output = estimator_repr_html(clf) assert str(clf) in html_output if final_estimator is None: - assert "LogisticRegression()
" in html_output + assert "LogisticRegression(" in html_output else: assert final_estimator.__class__.__name__ in html_output @@ -207,7 +205,7 @@ def test_stacking_classsifer(final_estimator): def test_stacking_regressor(final_estimator): reg = StackingRegressor( estimators=[('svr', LinearSVR())], final_estimator=final_estimator) - html_output = _estimator_repr_html(reg) + html_output = estimator_repr_html(reg) assert str(reg.estimators[0][0]) in html_output assert "LinearSVR" in html_output @@ -220,12 +218,10 @@ def test_stacking_regressor(final_estimator): def test_birch_duck_typing_meta(): # Test duck typing meta estimators with Birch birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) - html_output = _estimator_repr_html(birch) + html_output = estimator_repr_html(birch) - # inner estimator shows only the changes - with config_context(print_changed_only=True): - assert f"
{str(birch.n_clusters)}" in html_output
-        assert "AgglomerativeClustering" in html_output
+    assert f"
{str(birch.n_clusters)}" in html_output
+    assert "AgglomerativeClustering" in html_output
 
     # outer estimator contains all changes
     assert f"
{str(birch)}" in html_output
@@ -234,12 +230,10 @@ def test_birch_duck_typing_meta():
 def test_ovo_classifier_duck_typing_meta():
     # Test duck typing metaestimators with OVO
     ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
-    html_output = _estimator_repr_html(ovo)
+    html_output = estimator_repr_html(ovo)
 
-    # inner estimator shows only the changes
-    with config_context(print_changed_only=True):
-        assert f"
{str(ovo.estimator)}" in html_output
-        assert "LinearSVC" in html_output
+    assert f"
{str(ovo.estimator)}" in html_output
+    assert "LinearSVC" in html_output
 
     # outter estimator
     assert f"
{str(ovo)}" in html_output
@@ -249,11 +243,9 @@ def test_duck_typing_nested_estimator():
     # Test duck typing metaestimators with GP
     kernel = RationalQuadratic(length_scale=1.0, alpha=0.1)
     gp = GaussianProcessRegressor(kernel=kernel)
-    html_output = _estimator_repr_html(gp)
-
-    with config_context(print_changed_only=True):
-        assert f"
{str(kernel)}" in html_output
+    html_output = estimator_repr_html(gp)
 
+    assert f"
{str(kernel)}" in html_output
     assert f"
{str(gp)}" in html_output
 
 
@@ -263,5 +255,5 @@ def test_one_estimator_print_change_only(print_changed_only):
 
     with config_context(print_changed_only=print_changed_only):
         pca_repr = str(pca)
-        html_output = _estimator_repr_html(pca)
+        html_output = estimator_repr_html(pca)
         assert pca_repr in html_output

From 17f05e848798b2b5387a77e4be693fc374a5a509 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:39:18 -0400
Subject: [PATCH 60/81] BUG Fix

---
 sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index fd2d5b9bfe110..19e8310f583eb 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -450,7 +450,7 @@ def _repr_html_(self):
         """
         repr_html = get_config()["repr_html"]
         if repr_html:
-            return estimator_repr_htmlself)
+            return estimator_repr_html(self)
         return repr(self)
 
 

From 47d72ba85123c6cf5a7c459fde6367382f35d175 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:40:15 -0400
Subject: [PATCH 61/81] DOC Use function

---
 doc/modules/compose.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 78f7dc2f63c29..71d371963b646 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -569,8 +569,9 @@ many estimators. An example of the HTML output can be seen below.
 As an alternative, the HTML can be written to a file using the `_repr_html_`
 method::
 
-   with config_context(repr_html=True), open('my_estimator.html', 'w') as f:
-       f.write(clf._repr_html_())
+   from sklearn.utils import estimator_repr_html
+   with open('my_estimator.html', 'w') as f:
+       f.write(estimator_repr_html(clf))
 
 .. topic:: Examples:
 

From bae645bd786d697c19c77ea876d35bc52c76838d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:40:55 -0400
Subject: [PATCH 62/81] REV Less diffs

---
 examples/compose/plot_column_transformer.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 09877114124ac..0cfc9f5de0054 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -115,13 +115,7 @@ def transform(self, posts):
     ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
-###############################################################################
-# Classification Report
-###############################################################################
-# Finally, the pipeline is trained and a classification report is generated
-# on a testing subset. We limit the list of categories to make running this
-# example faster.
-
+# limit the list of categories to make running this example faster.
 categories = ['alt.atheism', 'talk.religion.misc']
 X_train, y_train = fetch_20newsgroups(random_state=1,
                                       subset='train',

From c616802df4a6865f3862ea97550382ea4c35ad8f Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 08:07:55 -0400
Subject: [PATCH 63/81] REV Remove

---
 sklearn/utils/_display_estimator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index ab5c9745ca929..5f4438d8f6ac4 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -1,4 +1,3 @@
-from sklearn._config import config_context
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO

From 1fe69faaa436ca10cf402535b159062122785750 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 08:38:11 -0400
Subject: [PATCH 64/81] BUG Fix

---
 sklearn/base.py                               | 2 +-
 sklearn/utils/tests/test_display_estimator.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 19e8310f583eb..8b415cc3d5ed4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -451,7 +451,7 @@ def _repr_html_(self):
         repr_html = get_config()["repr_html"]
         if repr_html:
             return estimator_repr_html(self)
-        return repr(self)
+        return f"{repr(self)}"
 
 
 class ClassifierMixin:
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index 152062f5ec168..c4593ce3cc730 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -64,6 +64,10 @@ def test_get_visual_block_single_estimator():
     assert est_html_info.names == est.__class__.__name__
     assert est_html_info.name_details == str(est)
 
+    # wraps with code block
+    expected_str = f"{repr(est)}"
+    assert est._repr_html_() == expected_str
+
 
 def test_get_visual_block_pipeline():
     pipe = Pipeline([

From 8d23d5b3e23cf625739a620679a74f031bdb9929 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 09:28:17 -0400
Subject: [PATCH 65/81] REV Inner estimators do not show changes

---
 sklearn/utils/_display_estimator.py           |  7 ++-
 sklearn/utils/tests/test_display_estimator.py | 52 +++++++++++--------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 5f4438d8f6ac4..01c417c170d92 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -1,3 +1,4 @@
+from sklearn import config_context
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO
@@ -104,7 +105,11 @@ def _write_estimator_html(out, estimator, estimator_label,
                           estimator_label_details, first_call=False):
     """Write estimator to html in serial, parallel, or by itself (single).
     """
-    est_block = _get_visual_block(estimator)
+    if first_call:
+        est_block = _get_visual_block(estimator)
+    else:
+        with config_context(print_changed_only=True):
+            est_block = _get_visual_block(estimator)
 
     if est_block.kind in ('serial', 'parallel'):
         dashed_wrapped = first_call or est_block.dash_wrapped
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index c4593ce3cc730..5eb6ad732d2ce 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -163,30 +163,32 @@ def test_display_estimator_pipeline():
         assert (f"
" f"
{str(est)}") in html_output
 
-    assert str(num_trans['pass']) in html_output
-    assert 'passthrough' in html_output
-    assert str(num_trans['imputer']) in html_output
+    # low level estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert str(num_trans['pass']) in html_output
+        assert 'passthrough' in html_output
+        assert str(num_trans['imputer']) in html_output
 
-    for _, _, cols in preprocess.transformers:
-        assert f"
{cols}
" in html_output + for _, _, cols in preprocess.transformers: + assert f"
{cols}
" in html_output - # feature union - for name, _ in feat_u.transformer_list: - assert f"" in html_output + # feature union + for name, _ in feat_u.transformer_list: + assert f"" in html_output - pca = feat_u.transformer_list[0][1] - assert f"
{str(pca)}
" in html_output + pca = feat_u.transformer_list[0][1] + assert f"
{str(pca)}
" in html_output - tsvd = feat_u.transformer_list[1][1] - first = tsvd['first'] - select = tsvd['select'] - assert f"
{str(first)}
" in html_output - assert f"
{str(select)}
" in html_output + tsvd = feat_u.transformer_list[1][1] + first = tsvd['first'] + select = tsvd['select'] + assert f"
{str(first)}
" in html_output + assert f"
{str(select)}
" in html_output - # voting classifer - for name, est in clf.estimators: - assert f"" in html_output - assert f"
{str(est)}
" in html_output + # voting classifer + for name, est in clf.estimators: + assert f"" in html_output + assert f"
{str(est)}
" in html_output @pytest.mark.parametrize("final_estimator", [None, LinearSVC()]) @@ -224,8 +226,10 @@ def test_birch_duck_typing_meta(): birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) html_output = estimator_repr_html(birch) - assert f"
{str(birch.n_clusters)}" in html_output
-    assert "AgglomerativeClustering" in html_output
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"
{str(birch.n_clusters)}" in html_output
+        assert "AgglomerativeClustering" in html_output
 
     # outer estimator contains all changes
     assert f"
{str(birch)}" in html_output
@@ -236,8 +240,10 @@ def test_ovo_classifier_duck_typing_meta():
     ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
     html_output = estimator_repr_html(ovo)
 
-    assert f"
{str(ovo.estimator)}" in html_output
-    assert "LinearSVC" in html_output
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"
{str(ovo.estimator)}" in html_output
+        assert "LinearSVC" in html_output
 
     # outter estimator
     assert f"
{str(ovo)}" in html_output

From 3d41cafccea2cc9376ef06cbc9580328ebf263ea Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 11:35:18 -0400
Subject: [PATCH 66/81] ENH Uses _repr_mimebundle_

---
 sklearn/base.py                               | 20 +++++--------------
 sklearn/utils/_display_estimator.py           |  3 +--
 sklearn/utils/tests/test_display_estimator.py |  4 ----
 3 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 8b415cc3d5ed4..0e2fff59007ef 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -437,21 +437,11 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
-    def _repr_html_(self):
-        """HTML or string representation of an estimator depending on
-        global configuration flag `repr_html`.
-
-        Read more in the :ref:`User Guide `.
-
-        Returns
-        -------
-        repr: str
-            HTML or string representation of estimator.
-        """
-        repr_html = get_config()["repr_html"]
-        if repr_html:
-            return estimator_repr_html(self)
-        return f"{repr(self)}"
+    def _repr_mimebundle_(self, **kwargs):
+        output = {"text/plain": repr(self)}
+        if get_config()["repr_html"]:
+            output["text/html"] = estimator_repr_html(self)
+        return output
 
 
 class ClassifierMixin:
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 01c417c170d92..914ac0fe6fefb 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -299,8 +299,7 @@ def estimator_repr_html(estimator):
     """
     with closing(StringIO()) as out:
         out.write(f''
-                  f'sklearn-viz'
-                  f''
+                  f''
                   f'
') _write_estimator_html(out, estimator, estimator.__class__.__name__, str(estimator), first_call=True) diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py index 5eb6ad732d2ce..97869aa8ca2b4 100644 --- a/sklearn/utils/tests/test_display_estimator.py +++ b/sklearn/utils/tests/test_display_estimator.py @@ -64,10 +64,6 @@ def test_get_visual_block_single_estimator(): assert est_html_info.names == est.__class__.__name__ assert est_html_info.name_details == str(est) - # wraps with code block - expected_str = f"{repr(est)}" - assert est._repr_html_() == expected_str - def test_get_visual_block_pipeline(): pipe = Pipeline([ From 1cc87b6a9a3c5d0886d8c84762229b94082cafd7 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 27 Apr 2020 11:47:31 -0400 Subject: [PATCH 67/81] CLN Updates file names --- sklearn/base.py | 2 +- sklearn/compose/_column_transformer.py | 2 +- sklearn/ensemble/_stacking.py | 2 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 2 +- sklearn/tests/test_base.py | 14 ++++++++++++++ sklearn/utils/__init__.py | 2 +- ...isplay_estimator.py => _estimator_html_repr.py} | 9 ++++++--- ...ay_estimator.py => test_estimator_html_repr.py} | 8 ++++---- 9 files changed, 30 insertions(+), 13 deletions(-) rename sklearn/utils/{_display_estimator.py => _estimator_html_repr.py} (97%) rename sklearn/utils/tests/{test_display_estimator.py => test_estimator_html_repr.py} (97%) diff --git a/sklearn/base.py b/sklearn/base.py index 0e2fff59007ef..ae74368851226 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,7 +21,7 @@ from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array -from .utils._display_estimator import estimator_repr_html +from .utils._estimator_html_repr import estimator_repr_html from .utils.validation import _deprecate_positional_args _DEFAULT_TAGS = { diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index 261128fcfd3bf..f148633021a97 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from ..utils._display_estimator import _VisualBlock +from ..utils._estimator_html_repr import _VisualBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index ac2e79638096e..91431f805aa0f 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from ..utils._display_estimator import _VisualBlock +from ..utils._estimator_html_repr import _VisualBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index b866962ed53b2..5105619e17d17 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,7 +32,7 @@ from ..utils.validation import column_or_1d from ..utils.validation import _deprecate_positional_args from ..exceptions import NotFittedError -from ..utils._display_estimator import _VisualBlock +from ..utils._estimator_html_repr import _VisualBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 22471a76b3b67..6f02cb565e15c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from .utils._display_estimator import _VisualBlock +from .utils._estimator_html_repr import _VisualBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 95f7b01f27058..f12c07d49090e 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -23,6 +23,7 @@ from sklearn.base import TransformerMixin from sklearn.utils._mocking import MockDataFrame +from sklearn import config_context import pickle @@ -511,3 +512,16 @@ def fit(self, X, y=None): params = est.get_params() assert params['param'] is None + + +def test_repr_mimebundle_(): + # Checks the repr_html configuration flag controls the json output + tree = DecisionTreeClassifier() + output = tree._repr_mimebundle_() + assert "text/plain" in output + assert "text/html" not in output + + with config_context(repr_html=True): + output = tree._repr_mimebundle_() + assert "text/plain" in output + assert "text/html" in output diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index acffddf7ccf56..1c36c69919ce3 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -25,7 +25,7 @@ from ..exceptions import DataConversionWarning from .deprecation import deprecated from .fixes import np_version -from ._display_estimator import estimator_repr_html +from ._estimator_html_repr import estimator_repr_html from .validation import (as_float_array, assert_all_finite, check_random_state, column_or_1d, check_array, diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_estimator_html_repr.py similarity index 97% rename from sklearn/utils/_display_estimator.py rename to sklearn/utils/_estimator_html_repr.py index 914ac0fe6fefb..84b071f61a955 100644 --- a/sklearn/utils/_display_estimator.py +++ b/sklearn/utils/_estimator_html_repr.py @@ -1,8 +1,10 @@ -from sklearn import config_context from contextlib import closing from contextlib import suppress from io import StringIO import uuid +import html + +from sklearn import config_context class _VisualBlock: @@ -98,7 +100,7 @@ def _get_visual_block(estimator): return _VisualBlock('single', estimator, names=estimator.__class__.__name__, - name_details=str(estimator)) + name_details=html.escape(str(estimator), quote=False)) def _write_estimator_html(out, estimator, estimator_label, @@ -302,7 +304,8 @@ def estimator_repr_html(estimator): f'' f'
') _write_estimator_html(out, estimator, estimator.__class__.__name__, - str(estimator), first_call=True) + html.escape(str(estimator), quote=False), + first_call=True) out.write('
') html_output = out.getvalue() diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_estimator_html_repr.py similarity index 97% rename from sklearn/utils/tests/test_display_estimator.py rename to sklearn/utils/tests/test_estimator_html_repr.py index 97869aa8ca2b4..af6e6bcb44c8c 100644 --- a/sklearn/utils/tests/test_display_estimator.py +++ b/sklearn/utils/tests/test_estimator_html_repr.py @@ -25,9 +25,9 @@ from sklearn.ensemble import StackingRegressor from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RationalQuadratic -from sklearn.utils._display_estimator import _write_label_html -from sklearn.utils._display_estimator import _get_visual_block -from sklearn.utils._display_estimator import estimator_repr_html +from sklearn.utils._estimator_html_repr import _write_label_html +from sklearn.utils._estimator_html_repr import _get_visual_block +from sklearn.utils._estimator_html_repr import estimator_repr_html @pytest.mark.parametrize("checked", [True, False]) @@ -120,7 +120,7 @@ def test_get_visual_block_column_transformer(): assert est_html_info.name_details == (['num1', 'num2'], [0, 3]) -def test_display_estimator_pipeline(): +def test_estimator_html_repr_pipeline(): num_trans = Pipeline(steps=[ ('pass', 'passthrough'), ('imputer', SimpleImputer(strategy='median')) From 3af9151de16436b40f0a2f8766ef73d9a5ef7ba4 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 27 Apr 2020 12:24:43 -0400 Subject: [PATCH 68/81] DOC Remove sphinx extension --- doc/conf.py | 1 - doc/modules/compose.rst | 39 +++------------ doc/sphinxext/display_est_repr_html.py | 69 -------------------------- 3 files changed, 6 insertions(+), 103 deletions(-) delete mode 100644 doc/sphinxext/display_est_repr_html.py diff --git a/doc/conf.py b/doc/conf.py index 22f9061c8a9c3..a824ab21b33e5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,7 +39,6 @@ 'sphinx.ext.imgconverter', 'sphinx_gallery.gen_gallery', 'sphinx_issues', - 'display_est_repr_html' ] # this is needed for some reason... diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 71d371963b646..805dba4334f36 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -535,41 +535,14 @@ Visualizing Composite Estimators By default, estimators are displayed with a HTML representation when shown in a jupyter notebook. This can be useful to diagnose or visualize a Pipeline with -many estimators. An example of the HTML output can be seen below. - -.. display_estimator_repr_html:: - - from sklearn import config_context - from sklearn.compose import ColumnTransformer - from sklearn.pipeline import Pipeline - from sklearn.impute import SimpleImputer - from sklearn.preprocessing import StandardScaler, OneHotEncoder - from sklearn.linear_model import LogisticRegression - - numeric_features = ['age', 'fare'] - numeric_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='median')), - ('scaler', StandardScaler())]) - - categorical_features = ['embarked', 'sex', 'pclass'] - categorical_transformer = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), - ('onehot', OneHotEncoder(handle_unknown='ignore'))]) - - preprocessor = ColumnTransformer( - transformers=[ - ('num', numeric_transformer, numeric_features), - ('cat', categorical_transformer, categorical_features)]) - - clf = Pipeline(steps=[('preprocessor', preprocessor), - ('classifier', LogisticRegression())]) - with config_context(repr_html=True): - clf - -As an alternative, the HTML can be written to a file using the `_repr_html_` -method:: +many estimators. An example of the HTML output is shown in the +**HTML representation of Pipeline** section of +:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`. +As an alternative, the HTML can be written to a file using +:func:`~sklearn.utils.estimator_repr_html`:: from sklearn.utils import estimator_repr_html + with open('my_estimator.html', 'w') as f: f.write(estimator_repr_html(clf)) diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py deleted file mode 100644 index 676b87c284b6d..0000000000000 --- a/doc/sphinxext/display_est_repr_html.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Primarily used to display the html output of `_repr_html_` of estimators -""" -from sphinx.util.docutils import SphinxDirective -from contextlib import redirect_stderr, redirect_stdout -from docutils import nodes -from io import StringIO - - -class DisplayEstimatorRepr(SphinxDirective): - """Execute Python and runs `_repr_html_` on the last element on the code - block. The last element in the code block should be an estimator with - support for `_repr_html_`. - """ - - has_content = True - required_arguments = 0 - optional_arguments = 0 - - def execute(self, code, format_str): - code_parts = code.split('\n') - final_output = code_parts[-1] - final_est = final_output.lstrip(' ') - n_whitespace = len(final_output) - len(final_est) - code_parts[-1] = " " * n_whitespace + format_str.format(final_est) - code = '\n'.join(code_parts) - - output, err = StringIO(), StringIO() - with redirect_stdout(output), redirect_stderr(err): - exec(code) - - return f"{output.getvalue()}{err.getvalue()}" - - def run(self): - output = [] - code = "\n".join(self.content) - repr_html = self.execute(code, format_str='print({}._repr_html_())') - - input_code = nodes.literal_block(rawsource=code, text=code) - input_code['language'] = 'python' - output.append(input_code) - - repr_html = f"

{repr_html}

" - html_node = nodes.raw('', repr_html, format='html') - output.append(html_node) - - if self.env.app.builder.name == 'latex': - code_results_latex = r""" - \begin{sphinxadmonition}{note}{Note:} - The HTML output of this code snippet can only been seen on the HTML - version of the documentation. The following is a string - representation. - \end{sphinxadmonition} - """ - latex_node = nodes.raw('', code_results_latex, format='latex') - output.append(latex_node) - - str_repr = self.execute(code, format_str='print(repr({}))') - str_repr_node = nodes.literal_block(rawsource=str_repr, - text=str_repr) - str_repr_node['language'] = 'python' - output.append(str_repr_node) - - return output - - -def setup(app): - app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) - return {'parallel_read_safe': True, 'parallel_write_safe': True} From 0b4a64dc4dc122f809250978c79cd1647dde6e75 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 27 Apr 2020 16:06:51 -0400 Subject: [PATCH 69/81] CLN Uses None --- doc/modules/classes.rst | 2 +- doc/modules/compose.rst | 6 ++--- sklearn/base.py | 11 +++++----- sklearn/compose/_column_transformer.py | 2 +- sklearn/ensemble/_stacking.py | 2 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 2 +- sklearn/tests/test_base.py | 12 +++++----- sklearn/utils/__init__.py | 4 ++-- ...tml_repr.py => _display_estimator_html.py} | 13 +++++------ ...repr.py => test_display_estimator_html.py} | 22 +++++++++---------- 11 files changed, 39 insertions(+), 39 deletions(-) rename sklearn/utils/{_estimator_html_repr.py => _display_estimator_html.py} (95%) rename sklearn/utils/tests/{test_estimator_html_repr.py => test_display_estimator_html.py} (93%) diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index f9904ad56b100..be12641ab4a7b 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -1569,7 +1569,7 @@ Plotting utils.deprecated utils.estimator_checks.check_estimator utils.estimator_checks.parametrize_with_checks - utils.estimator_repr_html + utils.display_estimator_html utils.extmath.safe_sparse_dot utils.extmath.randomized_range_finder utils.extmath.randomized_svd diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 805dba4334f36..3d9a08fe395aa 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -539,12 +539,12 @@ many estimators. An example of the HTML output is shown in the **HTML representation of Pipeline** section of :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`. As an alternative, the HTML can be written to a file using -:func:`~sklearn.utils.estimator_repr_html`:: +:func:`~sklearn.utils.display_estimator_html`:: - from sklearn.utils import estimator_repr_html + from sklearn.utils import display_estimator_html with open('my_estimator.html', 'w') as f: - f.write(estimator_repr_html(clf)) + f.write(display_estimator_html(clf)) .. topic:: Examples: diff --git a/sklearn/base.py b/sklearn/base.py index ae74368851226..a4074e1d42d3b 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -13,6 +13,7 @@ import platform import inspect import re +import html import numpy as np @@ -21,7 +22,7 @@ from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array -from .utils._estimator_html_repr import estimator_repr_html +from .utils._display_estimator_html import display_estimator_html from .utils.validation import _deprecate_positional_args _DEFAULT_TAGS = { @@ -437,11 +438,11 @@ def _validate_data(self, X, y=None, reset=True, return out - def _repr_mimebundle_(self, **kwargs): - output = {"text/plain": repr(self)} + def _repr_html_(self): + """HTML representation of estimator.""" if get_config()["repr_html"]: - output["text/html"] = estimator_repr_html(self) - return output + return display_estimator_html(self) + return None class ClassifierMixin: diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index f148633021a97..f354672c42e9b 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from ..utils._estimator_html_repr import _VisualBlock +from ..utils._display_estimator_html import _VisualBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 91431f805aa0f..870757f5b9497 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from ..utils._estimator_html_repr import _VisualBlock +from ..utils._display_estimator_html import _VisualBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 5105619e17d17..c85a6c86d2dfd 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,7 +32,7 @@ from ..utils.validation import column_or_1d from ..utils.validation import _deprecate_positional_args from ..exceptions import NotFittedError -from ..utils._estimator_html_repr import _VisualBlock +from ..utils._display_estimator_html import _VisualBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 6f02cb565e15c..b289607c54be9 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from .utils._estimator_html_repr import _VisualBlock +from .utils._display_estimator_html import _VisualBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index f12c07d49090e..96c82c7b0ef3f 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -1,5 +1,6 @@ # Author: Gael Varoquaux # License: BSD 3 clause +import html import numpy as np import scipy.sparse as sp @@ -517,11 +518,10 @@ def fit(self, X, y=None): def test_repr_mimebundle_(): # Checks the repr_html configuration flag controls the json output tree = DecisionTreeClassifier() - output = tree._repr_mimebundle_() - assert "text/plain" in output - assert "text/html" not in output + output = tree._repr_html_() + assert output == f"{html.escape(str(tree))}" with config_context(repr_html=True): - output = tree._repr_mimebundle_() - assert "text/plain" in output - assert "text/html" in output + output = tree._repr_html_() + # html output + assert "' + out.write(f'' f'
') _write_estimator_html(out, estimator, estimator.__class__.__name__, - html.escape(str(estimator), quote=False), - first_call=True) - out.write('
') + str(estimator), first_call=True) + out.write('
') html_output = out.getvalue() return html_output diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_display_estimator_html.py similarity index 93% rename from sklearn/utils/tests/test_estimator_html_repr.py rename to sklearn/utils/tests/test_display_estimator_html.py index af6e6bcb44c8c..77bad73d753e0 100644 --- a/sklearn/utils/tests/test_estimator_html_repr.py +++ b/sklearn/utils/tests/test_display_estimator_html.py @@ -25,9 +25,9 @@ from sklearn.ensemble import StackingRegressor from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RationalQuadratic -from sklearn.utils._estimator_html_repr import _write_label_html -from sklearn.utils._estimator_html_repr import _get_visual_block -from sklearn.utils._estimator_html_repr import estimator_repr_html +from sklearn.utils._display_estimator_html import _write_label_html +from sklearn.utils._display_estimator_html import _get_visual_block +from sklearn.utils._display_estimator_html import display_estimator_html @pytest.mark.parametrize("checked", [True, False]) @@ -120,7 +120,7 @@ def test_get_visual_block_column_transformer(): assert est_html_info.name_details == (['num1', 'num2'], [0, 3]) -def test_estimator_html_repr_pipeline(): +def test_display_estimator_html_pipeline(): num_trans = Pipeline(steps=[ ('pass', 'passthrough'), ('imputer', SimpleImputer(strategy='median')) @@ -151,7 +151,7 @@ def test_estimator_html_repr_pipeline(): pipe = Pipeline([ ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) ]) - html_output = estimator_repr_html(pipe) + html_output = display_estimator_html(pipe) # top level estimators show estimator with changes assert str(pipe) in html_output @@ -194,7 +194,7 @@ def test_stacking_classsifer(final_estimator): clf = StackingClassifier( estimators=estimators, final_estimator=final_estimator) - html_output = estimator_repr_html(clf) + html_output = display_estimator_html(clf) assert str(clf) in html_output if final_estimator is None: @@ -207,7 +207,7 @@ def test_stacking_classsifer(final_estimator): def test_stacking_regressor(final_estimator): reg = StackingRegressor( estimators=[('svr', LinearSVR())], final_estimator=final_estimator) - html_output = estimator_repr_html(reg) + html_output = display_estimator_html(reg) assert str(reg.estimators[0][0]) in html_output assert "LinearSVR" in html_output @@ -220,7 +220,7 @@ def test_stacking_regressor(final_estimator): def test_birch_duck_typing_meta(): # Test duck typing meta estimators with Birch birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3)) - html_output = estimator_repr_html(birch) + html_output = display_estimator_html(birch) # inner estimators do not show changes with config_context(print_changed_only=True): @@ -234,7 +234,7 @@ def test_birch_duck_typing_meta(): def test_ovo_classifier_duck_typing_meta(): # Test duck typing metaestimators with OVO ovo = OneVsOneClassifier(LinearSVC(penalty='l1')) - html_output = estimator_repr_html(ovo) + html_output = display_estimator_html(ovo) # inner estimators do not show changes with config_context(print_changed_only=True): @@ -249,7 +249,7 @@ def test_duck_typing_nested_estimator(): # Test duck typing metaestimators with GP kernel = RationalQuadratic(length_scale=1.0, alpha=0.1) gp = GaussianProcessRegressor(kernel=kernel) - html_output = estimator_repr_html(gp) + html_output = display_estimator_html(gp) assert f"
{str(kernel)}" in html_output
     assert f"
{str(gp)}" in html_output
@@ -261,5 +261,5 @@ def test_one_estimator_print_change_only(print_changed_only):
 
     with config_context(print_changed_only=print_changed_only):
         pca_repr = str(pca)
-        html_output = estimator_repr_html(pca)
+        html_output = display_estimator_html(pca)
         assert pca_repr in html_output

From 689d3f29c42a608a00d504f1c3a442d6d9691abe Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 16:37:02 -0400
Subject: [PATCH 70/81] ENH Uses _repr_mimebundle_

---
 doc/conf.py                                   |  1 +
 doc/modules/compose.rst                       | 38 ++++++++--
 doc/sphinxext/display_est_repr_html.py        | 70 +++++++++++++++++++
 .../plot_column_transformer_mixed_types.py    |  8 ---
 sklearn/base.py                               |  9 ++-
 sklearn/tests/test_base.py                    | 12 ++--
 6 files changed, 114 insertions(+), 24 deletions(-)
 create mode 100644 doc/sphinxext/display_est_repr_html.py

diff --git a/doc/conf.py b/doc/conf.py
index a824ab21b33e5..f081f16ef3f47 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -39,6 +39,7 @@
     'sphinx.ext.imgconverter',
     'sphinx_gallery.gen_gallery',
     'sphinx_issues',
+    'display_est_repr_html',
 ]
 
 # this is needed for some reason...
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 3d9a08fe395aa..665842fd40cd7 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -535,11 +535,39 @@ Visualizing Composite Estimators
 
 By default, estimators are displayed with a HTML representation when shown in a
 jupyter notebook. This can be useful to diagnose or visualize a Pipeline with
-many estimators. An example of the HTML output is shown in the
-**HTML representation of Pipeline** section of 
-:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
-As an alternative, the HTML can be written to a file using
-:func:`~sklearn.utils.display_estimator_html`::
+many estimators. An example of the HTML output can be seen below.
+
+.. display_estimator_repr_html::
+
+    from sklearn import config_context
+    from sklearn.compose import ColumnTransformer
+    from sklearn.pipeline import Pipeline
+    from sklearn.impute import SimpleImputer
+    from sklearn.preprocessing import StandardScaler, OneHotEncoder
+    from sklearn.linear_model import LogisticRegression
+
+    numeric_features = ['age', 'fare']
+    numeric_transformer = Pipeline(steps=[
+        ('imputer', SimpleImputer(strategy='median')),
+        ('scaler', StandardScaler())])
+
+    categorical_features = ['embarked', 'sex', 'pclass']
+    categorical_transformer = Pipeline(steps=[
+     ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
+        ('onehot', OneHotEncoder(handle_unknown='ignore'))])
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ('num', numeric_transformer, numeric_features),
+            ('cat', categorical_transformer, categorical_features)])
+
+    clf = Pipeline(steps=[('preprocessor', preprocessor),
+                           ('classifier', LogisticRegression())])
+    with config_context(repr_html=True):
+        clf
+
+As an alternative, the HTML can be written to a file using the `_repr_html_`
+method::
 
    from sklearn.utils import display_estimator_html
 
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
new file mode 100644
index 0000000000000..6ef8801758b74
--- /dev/null
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -0,0 +1,70 @@
+"""
+Primarily used to display the html output of `_repr_html_` of estimators
+"""
+from sphinx.util.docutils import SphinxDirective
+from contextlib import redirect_stderr, redirect_stdout
+from docutils import nodes
+from io import StringIO
+
+
+class DisplayEstimatorRepr(SphinxDirective):
+    """Execute Python and runs `_repr_html_` on the last element on the code
+    block. The last element in the code block should be an estimator with
+    support for `_repr_html_`.
+    """
+
+    has_content = True
+    required_arguments = 0
+    optional_arguments = 0
+
+    def execute(self, code, format_str):
+        code_parts = code.split('\n')
+        final_output = code_parts[-1]
+        final_est = final_output.lstrip(' ')
+        n_whitespace = len(final_output) - len(final_est)
+        code_parts[-1] = " " * n_whitespace + format_str.format(final_est)
+        code = '\n'.join(code_parts)
+
+        output, err = StringIO(), StringIO()
+        with redirect_stdout(output), redirect_stderr(err):
+            exec(code)
+
+        return f"{output.getvalue()}{err.getvalue()}"
+
+    def run(self):
+        output = []
+        code = "\n".join(self.content)
+        repr_html = self.execute(
+            code, format_str='print({}._repr_mimebundle_()["text/html"])')
+
+        input_code = nodes.literal_block(rawsource=code, text=code)
+        input_code['language'] = 'python'
+        output.append(input_code)
+
+        repr_html = f"

{repr_html}

" + html_node = nodes.raw('', repr_html, format='html') + output.append(html_node) + + if self.env.app.builder.name == 'latex': + code_results_latex = r""" + \begin{sphinxadmonition}{note}{Note:} + The HTML output of this code snippet can only been seen on the HTML + version of the documentation. The following is a string + representation. + \end{sphinxadmonition} + """ + latex_node = nodes.raw('', code_results_latex, format='latex') + output.append(latex_node) + + str_repr = self.execute(code, format_str='print(repr({}))') + str_repr_node = nodes.literal_block(rawsource=str_repr, + text=str_repr) + str_repr_node['language'] = 'python' + output.append(str_repr_node) + + return output + + +def setup(app): + app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) + return {'parallel_read_safe': True, 'parallel_write_safe': True} diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 6c32d394aaa78..1d4023437917f 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -88,14 +88,6 @@ clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test)) -############################################################################### -# HTML representation of ``Pipeline`` -############################################################################### -# When the ``Pipeline`` is printed out in a jupyter notebook an HTML -# representation of the estimator is displayed as follows: -set_config(repr_html=True) -clf - ############################################################################### # Use ``ColumnTransformer`` by selecting column by data types ############################################################################### diff --git a/sklearn/base.py b/sklearn/base.py index a4074e1d42d3b..b843a3b06eec0 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -13,7 +13,6 @@ import platform import inspect import re -import html import numpy as np @@ -438,11 +437,11 @@ def _validate_data(self, X, y=None, reset=True, return out - def _repr_html_(self): - """HTML representation of estimator.""" + def _repr_mimebundle_(self, **kwargs): + output = {"text/plain": repr(self)} if get_config()["repr_html"]: - return display_estimator_html(self) - return None + output["text/html"] = display_estimator_html(self) + return output class ClassifierMixin: diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 96c82c7b0ef3f..f12c07d49090e 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -1,6 +1,5 @@ # Author: Gael Varoquaux # License: BSD 3 clause -import html import numpy as np import scipy.sparse as sp @@ -518,10 +517,11 @@ def fit(self, X, y=None): def test_repr_mimebundle_(): # Checks the repr_html configuration flag controls the json output tree = DecisionTreeClassifier() - output = tree._repr_html_() - assert output == f"{html.escape(str(tree))}" + output = tree._repr_mimebundle_() + assert "text/plain" in output + assert "text/html" not in output with config_context(repr_html=True): - output = tree._repr_html_() - # html output - assert "