sklearn-viz

' '

' @@ -158,7 +175,7 @@ def _write_estimator_html(out, estimator, name): } .sk-dashed-wrapped { border: 1px dashed gray; - padding: 0.25em; + padding: 0 0.25em 0.25em 0.25em; } .sk-label { text-align: center; @@ -166,10 +183,8 @@ def _write_estimator_html(out, estimator, name): font-weight: bold; background: white; display: inline-block; - border: 1px dotted rgb(171, 171, 171); - border-radius: 0.25em; - padding: 0.2em 0.5em; - margin: 0.1em; + text-decoration: underline; + margin: 0 0.5em; } .sk-label-container { text-align: center; @@ -213,6 +228,7 @@ def _write_estimator_html(out, estimator, name): .sk-top-container { display: flex; color: black; + padding-bottom: 1em; } """ diff --git a/sklearn/inspection/tests/test_display_estimator.py b/sklearn/inspection/tests/test_display_estimator.py index d02f2eb45aed1..63f9489dc69ef 100644 --- a/sklearn/inspection/tests/test_display_estimator.py +++ b/sklearn/inspection/tests/test_display_estimator.py @@ -14,6 +14,8 @@ from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectPercentile from sklearn.preprocessing import OneHotEncoder +from sklearn.svm import LinearSVC +from sklearn.multiclass import OneVsOneClassifier from sklearn.inspection._display_estimator import _write_label_html from sklearn.inspection._display_estimator import _estimator_tool_tip from sklearn.inspection._display_estimator import _type_of_html_estimator @@ -122,201 +124,201 @@ def test_type_of_html_estimator_column_transformer(): expected_display_estimator = """ - -

- preprocessor + +

+ preprocessor +

- num +

+ num +

- passthrough +

+ passthrough +

- SimpleImputer +

+ SimpleImputer +

cat

cat +

- SimpleImputer +

+ SimpleImputer +

- OneHotEncoder +

+ OneHotEncoder +

- feat_u -

- pca -

- PCA -

+ feat_u

- tsvd +

+ pca +

- TruncatedSVD + PCA

- SelectPercentile +

+ tsvd +

+ TruncatedSVD +

+ SelectPercentile +

- classifier -

- lr -

+ classifier

- LogisticRegression + lr

- mlp +

+ LogisticRegression +

- MLPClassifier + mlp +

+ MLPClassifier +

@@ -325,9 +327,7 @@ def test_type_of_html_estimator_column_transformer():

- - + """.format(style=_STYLE).replace('\n', '').replace(' ', '') @@ -365,3 +365,36 @@ def test_display_estimator(): html_output = display_estimator(pipe) assert expected_display_estimator == html_output.replace(' ', '') + + +expected_display_estimator_ovo = """ + +

+ OneVsOneClassifier +

+ LinearSVC +

+ +""".format(style=_STYLE).replace('\n', '').replace(' ', '') + + +def test_display_estimator_ovo_classifier(): + ovo = OneVsOneClassifier(LinearSVC()) + html_output = display_estimator(ovo) + assert expected_display_estimator_ovo == html_output.replace(' ', '') From 50ed9f339f11ba060eae88276e1c47312a4ce715 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 25 Jul 2019 15:19:39 -0400 Subject: [PATCH 10/81] STY Removes underline --- sklearn/inspection/_display_estimator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index f3c2e2c6c3561..70ae509673c38 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -183,7 +183,6 @@ def _write_estimator_html(out, estimator, name): font-weight: bold; background: white; display: inline-block; - text-decoration: underline; margin: 0 0.5em; } .sk-label-container { From 1598cade87ff6ec4b0191274a0adf4814091a3b1 Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Wed, 31 Jul 2019 11:32:18 -0400 Subject: [PATCH 11/81] ENH Updates style --- sklearn/inspection/_display_estimator.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 70ae509673c38..6d55cf09e38ae 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -67,11 +67,11 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - elif hasattr(estimator, "base_estimator"): - name = estimator.__class__.__name__ - name_tip = _estimator_tool_tip(estimator) - inner_estimator = estimator.base_estimator - return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + # elif hasattr(estimator, "base_estimator"): + # name = estimator.__class__.__name__ + # name_tip = _estimator_tool_tip(estimator) + # inner_estimator = estimator.base_estimator + # return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) elif isinstance(estimator, BaseEstimator): name = estimator.__class__.__name__ @@ -158,6 +158,7 @@ def _write_estimator_html(out, estimator, name): .sk-parallel { display: flex; align-items: stretch; + justify-content: center; } .sk-parallel-item { display: flex; @@ -173,17 +174,21 @@ def _write_estimator_html(out, estimator, name): align-self: flex-start; width: 50%; } +.sk-parallel-item:only-child::after { + width: 0; +} .sk-dashed-wrapped { border: 1px dashed gray; padding: 0 0.25em 0.25em 0.25em; } .sk-label { - text-align: center; + min-width: 70%; font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; + line-height: 1.4em; } .sk-label-container { text-align: center; @@ -227,7 +232,7 @@ def _write_estimator_html(out, estimator, name): .sk-top-container { display: flex; color: black; - padding-bottom: 1em; + padding-bottom: 2em; } """ From 464f6d86f7c95630563da1ea4054e9fc4f56dc5a Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Thu, 1 Aug 2019 14:42:53 -0400 Subject: [PATCH 12/81] ENH Update style --- sklearn/inspection/_display_estimator.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 6d55cf09e38ae..25c2be2039454 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -67,12 +67,6 @@ def _type_of_html_estimator(estimator): inner_estimator = estimator.estimator return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - # elif hasattr(estimator, "base_estimator"): - # name = estimator.__class__.__name__ - # name_tip = _estimator_tool_tip(estimator) - # inner_estimator = estimator.base_estimator - # return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) - elif isinstance(estimator, BaseEstimator): name = estimator.__class__.__name__ tool_tip = _estimator_tool_tip(estimator) @@ -182,17 +176,14 @@ def _write_estimator_html(out, estimator, name): padding: 0 0.25em 0.25em 0.25em; } .sk-label { - min-width: 70%; font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; - line-height: 1.4em; } .sk-label-container { text-align: center; - border: #f0f8ff solid red; } .sk-serial-item { margin-bottom: 0.25em; From f4d882c0f837f287c32e091cae5731e451106e9c Mon Sep 17 00:00:00 2001 From: Thomas Fan Date: Mon, 5 Aug 2019 15:10:39 -0400 Subject: [PATCH 13/81] STY Update styling --- sklearn/inspection/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/inspection/_display_estimator.py index 25c2be2039454..1c533db0f6440 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/inspection/_display_estimator.py @@ -205,7 +205,7 @@ def _write_estimator_html(out, estimator, name): opacity: 0; font-weight: 400; position: absolute; - top: 100%; + top: 0; left: 0; padding: 0.5em; overflow: hidden; From c4cfe63b4d1d1aadc9c277858f1bb52173561b9e Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 11 Mar 2020 10:38:02 -0400 Subject: [PATCH 14/81] CLN Address comments --- doc/sphinxext/display_html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinxext/display_html.py b/doc/sphinxext/display_html.py index 31bcaf83280d0..ed175a84aee5b 100644 --- a/doc/sphinxext/display_html.py +++ b/doc/sphinxext/display_html.py @@ -9,6 +9,7 @@ class ExecuteHTML(Directive): + "Execute Python code and includes stdout as HTML" has_content = True required_arguments = 0 From 92be3e9bdd519025ce9dae13ce144c02ba54d9ea Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 12 Mar 2020 21:48:14 -0400 Subject: [PATCH 15/81] ENH Makes display_estimator privatte --- doc/conf.py | 1 - .../{inspection => }/_display_estimator.py | 167 +++++--- sklearn/base.py | 4 + sklearn/inspection/__init__.py | 2 - .../tests/test_display_estimator.py | 400 ------------------ sklearn/tests/test_display_estimator.py | 176 ++++++++ 6 files changed, 288 insertions(+), 462 deletions(-) rename sklearn/{inspection => }/_display_estimator.py (62%) delete mode 100644 sklearn/inspection/tests/test_display_estimator.py create mode 100644 sklearn/tests/test_display_estimator.py diff --git a/doc/conf.py b/doc/conf.py index ee8bdb3768eb5..778dad7554a70 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,7 +39,6 @@ 'sphinx.ext.imgconverter', 'sphinx_gallery.gen_gallery', 'sphinx_issues', - 'display_html' ] # this is needed for some reason... diff --git a/sklearn/inspection/_display_estimator.py b/sklearn/_display_estimator.py similarity index 62% rename from sklearn/inspection/_display_estimator.py rename to sklearn/_display_estimator.py index 1c533db0f6440..06755b8f9c019 100644 --- a/sklearn/inspection/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,76 +1,93 @@ from collections import namedtuple from contextlib import closing from io import StringIO +import uuid -from .._config import config_context -from ..base import BaseEstimator -from ..pipeline import Pipeline -from ..pipeline import FeatureUnion -from ..compose import ColumnTransformer -from ..ensemble import VotingClassifier, VotingRegressor - -def _estimator_tool_tip(estimator): +def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly - display tooltips. + display estimator details. """ return str(estimator).replace('\n', ' ') +def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class): + out.write( + f'

' + f'

') + + if tool_tip is not None: + est_id = uuid.uuid4() + out.write(f'' + f'' + f'{name}' + f'

{tool_tip}'
+                  f'

') + else: + out.write(f'{name}') + out.write('

') # outer_class inner_class + + def _write_label_html(out, name, tool_tip): """Write label to html""" - out.write('

' - '

' - '{}

'.format(tool_tip, name)) + _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label") _EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_tips') + 'type, estimators, names, name_details') def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ + from sklearn.base import BaseEstimator + from sklearn.pipeline import Pipeline + from sklearn.pipeline import FeatureUnion + from sklearn.compose import ColumnTransformer + from sklearn.ensemble import VotingClassifier, VotingRegressor + if isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) + return _EstHTMLInfo('single', [estimator], [estimator], [estimator]) elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') + return _EstHTMLInfo('single', [estimator], ['None'], ['None']) elif isinstance(estimator, Pipeline): estimators = [step[1] for step in estimator.steps] names = [step[0] for step in estimator.steps] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('serial', estimators, names, name_tips) + name_details = [_estimator_details(est) for est in estimators] + return _EstHTMLInfo('serial', estimators, names, name_details) elif isinstance(estimator, ColumnTransformer): estimators = [trans[1] for trans in estimator.transformers] names = [trans[0] for trans in estimator.transformers] - name_tips = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) elif isinstance(estimator, FeatureUnion): estimators = [trans[1] for trans in estimator.transformer_list] names = [trans[0] for trans in estimator.transformer_list] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) elif isinstance(estimator, (VotingClassifier, VotingRegressor)): estimators = [est[1] for est in estimator.estimators] names = [est[0] for est in estimator.estimators] - name_tips = [_estimator_tool_tip(est) for est in estimators] - return _EstHTMLInfo('parallel', estimators, names, name_tips) + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) elif hasattr(estimator, "estimator"): - name = estimator.__class__.__name__ - name_tip = _estimator_tool_tip(estimator) + names = [estimator.__class__.__name__] + name_details = [_estimator_details(estimator)] inner_estimator = estimator.estimator - return _EstHTMLInfo('single-meta', inner_estimator, name, name_tip) + return _EstHTMLInfo('single-meta', [inner_estimator], names, + name_details) elif isinstance(estimator, BaseEstimator): - name = estimator.__class__.__name__ - tool_tip = _estimator_tool_tip(estimator) - return _EstHTMLInfo('single', estimator, name, tool_tip) + names = [estimator.__class__.__name__] + tool_tips = [_estimator_details(estimator)] + return _EstHTMLInfo('single', [estimator], names, tool_tips) else: raise ValueError("Invalid estimator") @@ -84,7 +101,7 @@ def _write_estimator_html(out, estimator, name): if est_html_info.type == 'serial': out.write('

') est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_tips) + est_html_info.name_details) for est, name, tool_tip in est_infos: _write_estimator_html(out, est, name) out.write('

') # sk-serial @@ -92,12 +109,12 @@ def _write_estimator_html(out, estimator, name): elif est_html_info.type == 'parallel': out.write('

') if name: - tool_tip = _estimator_tool_tip(estimator) + tool_tip = _estimator_details(estimator) _write_label_html(out, name, tool_tip) out.write('

') est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_tips) + est_html_info.name_details) for est, name, tool_tip in est_infos: out.write('

') _write_label_html(out, name, tool_tip) @@ -105,20 +122,58 @@ def _write_estimator_html(out, estimator, name): _write_estimator_html(out, est, name) out.write('

') # sk-parallel-item sk-serial out.write('

') # sk-parallel sk-serial-item + elif est_html_info.type == 'single-meta': out.write('

') - _write_label_html(out, est_html_info.names, est_html_info.name_tips) - _write_estimator_html(out, est_html_info.estimators, + _write_label_html(out, est_html_info.names[0], + est_html_info.name_details[0]) + _write_estimator_html(out, est_html_info.estimators[0], est_html_info.estimators.__class__.__name__) out.write('

') # sk-serial-item # sk-serial + elif est_html_info.type == 'single': - out.write('

' - '

' - '{}

'.format(est_html_info.name_tips, - est_html_info.names)) + _write_dropdown_html(out, est_html_info.names[0], + est_html_info.name_details[0], + "sk-serial-item", "sk-estimator") _STYLE = """ +.sk-toggleable { + background-color: white; +} +.sk-toggleable__label { + cursor: pointer; + display: block; + width: 100%; + margin-bottom: 0; +} +.sk-toggleable__content { + max-height: 0; + max-width: 0; + overflow: hidden; + text-align: left; + background-color: #f0f8ff; +} +div.sk-toggleable__content pre { + margin: 0.5em; + border-radius: 0.25em; +} +.sk-toggleable__control:checked~.sk-toggleable__content { + max-height: 200px; + max-width: 100%; + overflow: auto; +} +.sk-hidden--visually { + border: 0; + clip: rect(1px 1px 1px 1px); + clip: rect(1px, 1px, 1px, 1px); + height: 1px; + margin: -1px; + overflow: hidden; + padding: 0; + position: absolute; + width: 1px; +} .sk-estimator { font-family: monospace; background-color: #f0f8ff; @@ -149,11 +204,15 @@ def _write_estimator_html(out, estimator, name): float: left; background: white; } +.sk-serial-item { + z-index: 1; +} .sk-parallel { display: flex; align-items: stretch; justify-content: center; } + .sk-parallel-item { display: flex; flex-direction: column; @@ -175,19 +234,19 @@ def _write_estimator_html(out, estimator, name): border: 1px dashed gray; padding: 0 0.25em 0.25em 0.25em; } -.sk-label { +.sk-label label { font-family: monospace; font-weight: bold; background: white; display: inline-block; margin: 0 0.5em; + line-height: 1.4em; + width: 97%; } .sk-label-container { text-align: center; -} -.sk-serial-item { - margin-bottom: 0.25em; - background: white; + border: #f0f8ff solid red; + z-index: 1; } .sk-container { display: flex; @@ -220,12 +279,7 @@ def _write_estimator_html(out, estimator, name): opacity: 1; z-index: 2; } -.sk-top-container { - display: flex; - color: black; - padding-bottom: 2em; -} -""" +""".replace('\n', '').replace(' ', '') def display_estimator(estimator, print_changed_only=True): @@ -246,15 +300,18 @@ def display_estimator(estimator, print_changed_only=True): HTML representation of estimator. When called in jupyter notebook or lab, a iPython HTML object is returned. """ + from sklearn._config import config_context + from sklearn.pipeline import Pipeline with config_context(print_changed_only=print_changed_only), \ closing(StringIO()) as out: + # This forces estimators to always be serial at the first layer if not isinstance(estimator, Pipeline): estimator = Pipeline([(estimator.__class__.__name__, estimator)]) out.write('') out.write('

') @@ -263,12 +320,4 @@ def display_estimator(estimator, print_changed_only=True): out.write('') html_output = out.getvalue() - # wrap in iPython HTML if in a notebook context - try: - cls_name = get_ipython().__class__.__name__ - if cls_name != 'ZMQInteractiveShell': - return html_output - from IPython.display import HTML - return HTML(html_output) - except (ImportError, NameError): - return html_output + return html_output diff --git a/sklearn/base.py b/sklearn/base.py index 70dec8c030418..73b1c95bb3f08 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -20,6 +20,7 @@ from .utils import _IS_32BIT from .utils.validation import check_X_y from .utils.validation import check_array +from ._display_estimator import display_estimator _DEFAULT_TAGS = { 'non_deterministic': False, @@ -411,6 +412,9 @@ def _validate_data(self, X, y=None, reset=True, **check_params): return out + def _repr_html_(self): + return display_estimator(self) + class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index ca64f3147d8f2..bfa28f2b3a4f8 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -13,7 +13,6 @@ from .partial_dependence import partial_dependence from ._permutation_importance import permutation_importance # noqa -from ._display_estimator import display_estimator # noqa from ._plot.partial_dependence import plot_partial_dependence # noqa from ._plot.partial_dependence import PartialDependenceDisplay # noqa @@ -24,5 +23,4 @@ 'plot_partial_dependence', 'permutation_importance', 'PartialDependenceDisplay', - 'display_estimator', ] diff --git a/sklearn/inspection/tests/test_display_estimator.py b/sklearn/inspection/tests/test_display_estimator.py deleted file mode 100644 index 63f9489dc69ef..0000000000000 --- a/sklearn/inspection/tests/test_display_estimator.py +++ /dev/null @@ -1,400 +0,0 @@ -from contextlib import closing -from io import StringIO - -import pytest - -from sklearn.linear_model import LogisticRegression -from sklearn.neural_network import MLPClassifier -from sklearn.impute import SimpleImputer -from sklearn.decomposition import PCA -from sklearn.decomposition import TruncatedSVD -from sklearn.pipeline import Pipeline -from sklearn.pipeline import FeatureUnion -from sklearn.compose import ColumnTransformer -from sklearn.ensemble import VotingClassifier -from sklearn.feature_selection import SelectPercentile -from sklearn.preprocessing import OneHotEncoder -from sklearn.svm import LinearSVC -from sklearn.multiclass import OneVsOneClassifier -from sklearn.inspection._display_estimator import _write_label_html -from sklearn.inspection._display_estimator import _estimator_tool_tip -from sklearn.inspection._display_estimator import _type_of_html_estimator -from sklearn.inspection._display_estimator import display_estimator -from sklearn.inspection._display_estimator import _STYLE - - -@pytest.mark.parametrize('est, expected', [ - ('None', 'None'), - ('passthrough', 'passthrough'), - ('hello\nworld', 'hello world') -]) -def test_estimator_tool_tip(est, expected): - assert expected == _estimator_tool_tip(est) - - -def test_write_label_html(): - name = "LogisticRegression" - tool_tip = "hello-world" - - expected = ('

' - 'LogisticRegression

') - - with closing(StringIO()) as out: - _write_label_html(out, name, tool_tip) - html_label = out.getvalue() - assert html_label == expected - - -def test_type_of_html_estimator_error(): - with pytest.raises(ValueError, match="Invalid estimator"): - _type_of_html_estimator(100) - - -@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) -def test_type_of_html_estimator_single_str_none(est): - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == str(est) - assert est_html_info.name_tips == str(est) - - -def test_type_of_html_estimator_single_estimator(): - est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' - assert est_html_info.estimators == est - assert est_html_info.names == est.__class__.__name__ - assert est_html_info.name_tips == _estimator_tool_tip(est) - - -def test_type_of_html_estimator_pipeline(): - pipe = Pipeline([ - ('imputer', SimpleImputer()), - ('classifier', LogisticRegression()) - ]) - est_html_info = _type_of_html_estimator(pipe) - assert est_html_info.type == 'serial' - assert est_html_info.estimators == [step[1] for step in pipe.steps] - assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_tips == [_estimator_tool_tip(step[1]) - for step in pipe.steps] - - -def test_type_of_html_estimator_feature_union(): - f_union = FeatureUnion([ - ('pca', PCA()), ('svd', TruncatedSVD()) - ]) - est_html_info = _type_of_html_estimator(f_union) - assert est_html_info.type == 'parallel' - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.estimators == [trans[1] - for trans in f_union.transformer_list] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in f_union.transformer_list] - - -def test_type_of_html_estimator_voting(): - clf = VotingClassifier([ - ('log_reg', LogisticRegression()), - ('mlp', MLPClassifier()) - ]) - est_html_info = _type_of_html_estimator(clf) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in clf.estimators] - assert est_html_info.names == ['log_reg', 'mlp'] - assert est_html_info.name_tips == [_estimator_tool_tip(trans[1]) - for trans in clf.estimators] - - -def test_type_of_html_estimator_column_transformer(): - ct = ColumnTransformer([ - ('pca', PCA(), ['num1', 'num2']), - ('svd', TruncatedSVD, [0, 3]) - ]) - est_html_info = _type_of_html_estimator(ct) - assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in ct.transformers] - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.name_tips == [['num1', 'num2'], [0, 3]] - - -expected_display_estimator = """ -

- preprocessor -

- num -

- passthrough -

- SimpleImputer -

cat -

- SimpleImputer -

- OneHotEncoder -

- feat_u -

- pca -

- PCA -

- tsvd -

- TruncatedSVD -

- SelectPercentile -

- classifier -

- lr -

- LogisticRegression -

- mlp -

- MLPClassifier -

- -""".format(style=_STYLE).replace('\n', '').replace(' ', '') - - -def test_display_estimator(): - num_trans = Pipeline(steps=[ - ('pass', 'passthrough'), - ('imputer', SimpleImputer(strategy='median')) - ]) - - cat_trans = Pipeline(steps=[ - ('imputer', SimpleImputer(strategy='constant', - missing_values='empty')), - ('one-hot', OneHotEncoder()) - ]) - - preprocess = ColumnTransformer([ - ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), - ('cat', cat_trans, [0, 1, 2, 3]) - ]) - - feat_u = FeatureUnion([ - ('pca', PCA(n_components=1)), - ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), - ('select', SelectPercentile())])) - ]) - - clf = VotingClassifier([ - ('lr', LogisticRegression(solver='lbfgs', random_state=1)), - ('mlp', MLPClassifier(alpha=0.001)) - ]) - - pipe = Pipeline([ - ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) - ]) - html_output = display_estimator(pipe) - - assert expected_display_estimator == html_output.replace(' ', '') - - -expected_display_estimator_ovo = """ - -

- OneVsOneClassifier -

- LinearSVC -

- -""".format(style=_STYLE).replace('\n', '').replace(' ', '') - - -def test_display_estimator_ovo_classifier(): - ovo = OneVsOneClassifier(LinearSVC()) - html_output = display_estimator(ovo) - assert expected_display_estimator_ovo == html_output.replace(' ', '') diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py new file mode 100644 index 0000000000000..2f5a4cffe5711 --- /dev/null +++ b/sklearn/tests/test_display_estimator.py @@ -0,0 +1,176 @@ +from contextlib import closing +from io import StringIO + +import pytest + +from sklearn.linear_model import LogisticRegression +from sklearn.neural_network import MLPClassifier +from sklearn.impute import SimpleImputer +from sklearn.decomposition import PCA +from sklearn.decomposition import TruncatedSVD +from sklearn.pipeline import Pipeline +from sklearn.pipeline import FeatureUnion +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import VotingClassifier +from sklearn.feature_selection import SelectPercentile +from sklearn.preprocessing import OneHotEncoder +from sklearn.svm import LinearSVC +from sklearn.multiclass import OneVsOneClassifier +from sklearn._display_estimator import _write_label_html +from sklearn._display_estimator import _estimator_details +from sklearn._display_estimator import _type_of_html_estimator +from sklearn._display_estimator import display_estimator + + +@pytest.mark.parametrize('est, expected', [ + ('None', 'None'), + ('passthrough', 'passthrough'), + ('hello\nworld', 'hello world') +]) +def test_estimator_tool_tip(est, expected): + assert expected == _estimator_details(est) + + +def test_write_label_html(): + name = "LogisticRegression" + tool_tip = "hello-world" + + with closing(StringIO()) as out: + _write_label_html(out, name, tool_tip) + html_label = out.getvalue() + assert 'LogisticRegression' in html_label + assert html_label.startswith('

') + assert '

hello-world

' in html_label + + +def test_type_of_html_estimator_error(): + with pytest.raises(ValueError, match="Invalid estimator"): + _type_of_html_estimator(100) + + +@pytest.mark.parametrize('est', ['passthrough', 'drop', None]) +def test_type_of_html_estimator_single_str_none(est): + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators[0] == est + assert est_html_info.names[0] == str(est) + assert est_html_info.name_details[0] == str(est) + + +def test_type_of_html_estimator_single_estimator(): + est = LogisticRegression(C=10.0) + est_html_info = _type_of_html_estimator(est) + assert est_html_info.type == 'single' + assert est_html_info.estimators[0] == est + assert est_html_info.names[0] == est.__class__.__name__ + assert est_html_info.name_details[0] == _estimator_details(est) + + +def test_type_of_html_estimator_pipeline(): + pipe = Pipeline([ + ('imputer', SimpleImputer()), + ('classifier', LogisticRegression()) + ]) + est_html_info = _type_of_html_estimator(pipe) + assert est_html_info.type == 'serial' + assert est_html_info.estimators == [step[1] for step in pipe.steps] + assert est_html_info.names == ['imputer', 'classifier'] + assert est_html_info.name_details == [_estimator_details(step[1]) + for step in pipe.steps] + + +def test_type_of_html_estimator_feature_union(): + f_union = FeatureUnion([ + ('pca', PCA()), ('svd', TruncatedSVD()) + ]) + est_html_info = _type_of_html_estimator(f_union) + assert est_html_info.type == 'parallel' + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.estimators == [trans[1] + for trans in f_union.transformer_list] + assert est_html_info.name_details == [None, None] + + +def test_type_of_html_estimator_voting(): + clf = VotingClassifier([ + ('log_reg', LogisticRegression()), + ('mlp', MLPClassifier()) + ]) + est_html_info = _type_of_html_estimator(clf) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in clf.estimators] + assert est_html_info.names == ['log_reg', 'mlp'] + assert est_html_info.name_details == [None, None] + + +def test_type_of_html_estimator_column_transformer(): + ct = ColumnTransformer([ + ('pca', PCA(), ['num1', 'num2']), + ('svd', TruncatedSVD, [0, 3]) + ]) + est_html_info = _type_of_html_estimator(ct) + assert est_html_info.type == 'parallel' + assert est_html_info.estimators == [trans[1] + for trans in ct.transformers] + assert est_html_info.names == ['pca', 'svd'] + assert est_html_info.name_details == [['num1', 'num2'], [0, 3]] + + +def test_display_estimator_pipeline(): + num_trans = Pipeline(steps=[ + ('pass', 'passthrough'), + ('imputer', SimpleImputer(strategy='median')) + ]) + + cat_trans = Pipeline(steps=[ + ('imputer', SimpleImputer(strategy='constant', + missing_values='empty')), + ('one-hot', OneHotEncoder()) + ]) + + preprocess = ColumnTransformer([ + ('num', num_trans, ['a', 'b', 'c', 'd', 'e']), + ('cat', cat_trans, [0, 1, 2, 3]) + ]) + + feat_u = FeatureUnion([ + ('pca', PCA(n_components=1)), + ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)), + ('select', SelectPercentile())])) + ]) + + clf = VotingClassifier([ + ('lr', LogisticRegression(solver='lbfgs', random_state=1)), + ('mlp', MLPClassifier(alpha=0.001)) + ]) + + pipe = Pipeline([ + ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf) + ]) + html_output = display_estimator(pipe) + + expected_strings = [ + 'passthrough', + 'div class=\"sk-toggleable__content\">

SimpleImputer'
+      '(strategy=\'median\')',
+      'SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
+      '',
+      '(\'one-hot\', OneHotEncoder())',
+      'preprocessor',
+      '[\'a\', \'b\', \'c\', \'d\', \'e\']',
+      'LogisticRegression(random_state=1)',
+      'SelectPercentile()',
+      '>TruncatedSVD',
+      'TruncatedSVD(n_components=3)',
+    ]
+
+    for expected_string in expected_strings:
+        assert expected_string in html_output
+
+
+def test_display_estimator_ovo_classifier():
+    ovo = OneVsOneClassifier(LinearSVC())
+    html_output = display_estimator(ovo)
+    assert "OneVsOneClassifier(estimator=LinearSVC())" in html_output
+    assert "LinearSVC" in html_output

From 1b471702c2bd98af82427f18c37b2be42f9f8dde Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Thu, 12 Mar 2020 23:21:12 -0400
Subject: [PATCH 16/81] ENN Major visual changes

---
 doc/conf.py                                   |  1 +
 doc/modules/compose.rst                       |  7 +-
 ...splay_html.py => display_est_repr_html.py} | 16 ++--
 sklearn/_display_estimator.py                 | 82 ++++++++++---------
 sklearn/base.py                               |  4 +-
 sklearn/tests/test_display_estimator.py       | 11 +--
 6 files changed, 62 insertions(+), 59 deletions(-)
 rename doc/sphinxext/{display_html.py => display_est_repr_html.py} (66%)

diff --git a/doc/conf.py b/doc/conf.py
index 778dad7554a70..ef36d077435ed 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -39,6 +39,7 @@
     'sphinx.ext.imgconverter',
     'sphinx_gallery.gen_gallery',
     'sphinx_issues',
+    'display_est_repr_html'
 ]
 
 # this is needed for some reason...
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 8cfaff2c0c263..fd76eb4b90176 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -538,21 +538,20 @@ above example would be::
 Visualizing Composite Estimators
 ================================
 
-:func:`sklearn.inspection.display_estimator` outputs a html representation of
+In by default a jupyter notebook outputs a html representation of
 composite estimators. This can be useful to diagnose or visualize a Pipeline
 with may estimators. For example, the estimator defined in 
 The composite estimator defined in 
 :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`
 can be visualized as:
 
-.. display_html::
+.. display_estimator_repr_html::
 
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import StandardScaler, OneHotEncoder
    from sklearn.linear_model import LogisticRegression
-   from sklearn.inspection import display_estimator
 
    numeric_features = ['age', 'fare']
    numeric_transformer = Pipeline(steps=[
@@ -571,7 +570,7 @@ can be visualized as:
 
    clf = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression())])
-   print(display_estimator(clf))
+   clf
 
 .. topic:: Examples:
 
diff --git a/doc/sphinxext/display_html.py b/doc/sphinxext/display_est_repr_html.py
similarity index 66%
rename from doc/sphinxext/display_html.py
rename to doc/sphinxext/display_est_repr_html.py
index ed175a84aee5b..dc72e71390c6f 100644
--- a/doc/sphinxext/display_html.py
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -1,6 +1,5 @@
 """
-Primary used to display the html output  `sklearn.inspection.display_estimator`
-in sphinx.
+Primary used to display the html output of `_repr_html_` of estimators
 """
 import sys
 from docutils.parsers.rst import Directive
@@ -8,15 +7,18 @@
 from io import StringIO
 
 
-class ExecuteHTML(Directive):
+class DisplayReprEstimator(Directive):
     "Execute Python code and includes stdout as HTML"
 
     has_content = True
     required_arguments = 0
     optional_arguments = 0
 
-    @classmethod
-    def execute(cls, code):
+    def execute(self, code):
+        code_parts = code.split('\n')
+        final_output = code_parts[-1]
+        code_parts[-1] = f'print({final_output}._repr_html_())'
+        code = '\n'.join(code_parts)
         orig_stdout, orig_stderr = sys.stdout, sys.stderr
 
         output, err = StringIO(), StringIO()
@@ -25,7 +27,7 @@ def execute(cls, code):
         exec(code)
         sys.stdout, sys.stderr = orig_stdout, orig_stderr
 
-        return "".join(['',
+        return "".join(['',
                         output.getvalue(), err.getvalue(), ""])
 
     def run(self):
@@ -42,4 +44,4 @@ def run(self):
 
 
 def setup(app):
-    app.add_directive('display_html', ExecuteHTML)
+    app.add_directive('display_estimator_repr_html', DisplayReprEstimator)
diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 06755b8f9c019..6d70660d0ef17 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -41,7 +41,7 @@ def _write_label_html(out, name, tool_tip):
 def _type_of_html_estimator(estimator):
     """Generate information about how to display an estimator.
     """
-    from sklearn.base import BaseEstimator
+    # import here to avoid circular import from base.py
     from sklearn.pipeline import Pipeline
     from sklearn.pipeline import FeatureUnion
     from sklearn.compose import ColumnTransformer
@@ -83,14 +83,10 @@ def _type_of_html_estimator(estimator):
         inner_estimator = estimator.estimator
         return _EstHTMLInfo('single-meta', [inner_estimator], names,
                             name_details)
-
-    elif isinstance(estimator, BaseEstimator):
-        names = [estimator.__class__.__name__]
-        tool_tips = [_estimator_details(estimator)]
-        return _EstHTMLInfo('single', [estimator], names, tool_tips)
-
-    else:
-        raise ValueError("Invalid estimator")
+    # Base estimator
+    names = [estimator.__class__.__name__]
+    tool_tips = [_estimator_details(estimator)]
+    return _EstHTMLInfo('single', [estimator], names, tool_tips)
 
 
 def _write_estimator_html(out, estimator, name):
@@ -138,16 +134,19 @@ def _write_estimator_html(out, estimator, name):
 
 
 _STYLE = """
-.sk-toggleable {
+div.sk-toggleable {
   background-color: white;
 }
-.sk-toggleable__label {
+label.sk-toggleable__label {
   cursor: pointer;
   display: block;
   width: 100%;
   margin-bottom: 0;
+  padding: 0.2em 0.3em;
+  box-sizing: border-box;
+  text-align: center;
 }
-.sk-toggleable__content {
+div.sk-toggleable__content {
   max-height: 0;
   max-width: 0;
   overflow: hidden;
@@ -155,15 +154,15 @@ def _write_estimator_html(out, estimator, name):
   background-color: #f0f8ff;
 }
 div.sk-toggleable__content pre {
-  margin: 0.5em;
+  margin: 0.2em;
   border-radius: 0.25em;
 }
-.sk-toggleable__control:checked~.sk-toggleable__content {
+input.sk-toggleable__control:checked~div.sk-toggleable__content {
   max-height: 200px;
   max-width: 100%;
   overflow: auto;
 }
-.sk-hidden--visually {
+input.sk-hidden--visually {
   border: 0;
   clip: rect(1px 1px 1px 1px);
   clip: rect(1px, 1px, 1px, 1px);
@@ -174,81 +173,86 @@ def _write_estimator_html(out, estimator, name):
   position: absolute;
   width: 1px;
 }
-.sk-estimator {
+div.sk-estimator {
   font-family: monospace;
   background-color: #f0f8ff;
-  padding: 0.5em;
   margin: 0.25em 0.25em;
   border: 1px dotted black;
   border-radius: 0.25em;
-  text-align: center;
+  box-sizing: border-box;
+}
+div.sk-estimator:hover {
+  background-color: #a3d4ff;
 }
-.sk-parallel-item::after {
+div.sk-parallel-item::after {
   content: "";
   width: 100%;
   border-bottom: 1px solid gray;
   flex-grow: 1;
 }
-.sk-serial::before {
+div.sk-label:hover label.sk-toggleable__label {
+  color: #0087fe;
+  background-color: rgb(246, 246, 246);
+  border-radius: 0.25em;
+}
+div.sk-serial::before {
   content: "";
   position: absolute;
   border-left: 1px solid gray;
+  box-sizing: border-box;
   top: 2em;
   bottom: 0;
   left: 50%;
 }
-.sk-serial {
+div.sk-serial {
   display: flex;
   flex-direction: column;
   align-items: center;
   float: left;
   background: white;
 }
-.sk-serial-item {
+div.sk-serial-item {
   z-index: 1;
 }
-.sk-parallel {
+div.sk-parallel {
   display: flex;
   align-items: stretch;
   justify-content: center;
 }
-
-.sk-parallel-item {
+div.sk-parallel-item {
   display: flex;
   flex-direction: column;
   position: relative;
   background: white;
 }
-.sk-parallel-item:first-child::after {
+div.sk-parallel-item:first-child::after {
   align-self: flex-end;
   width: 50%;
 }
-.sk-parallel-item:last-child::after {
+div.sk-parallel-item:last-child::after {
   align-self: flex-start;
   width: 50%;
 }
-.sk-parallel-item:only-child::after {
+div.sk-parallel-item:only-child::after {
   width: 0;
 }
-.sk-dashed-wrapped {
+div.sk-dashed-wrapped {
   border: 1px dashed gray;
-  padding: 0 0.25em 0.25em 0.25em;
+  padding: 0 0.3em 0.3em 0.3em;
+  box-sizing: border-box;
 }
-.sk-label label {
+div.sk-label label {
   font-family: monospace;
   font-weight: bold;
   background: white;
   display: inline-block;
-  margin: 0 0.5em;
   line-height: 1.4em;
-  width: 97%;
 }
-.sk-label-container {
+div.sk-label-container {
   text-align: center;
-  border: #f0f8ff solid red;
   z-index: 1;
 }
-.sk-container {
+div.sk-container {
   display: flex;
   flex-direction: column;
   align-items: flex-start;
@@ -270,6 +274,7 @@ def _write_estimator_html(out, estimator, name):
   overflow: hidden;
   background-color: #f0f8ff;
   border: 1px solid gray;
+  box-sizing: border-box;
   white-space: pre;
   content: attr(sk-data-tooltip);
   text-align: left;
@@ -279,10 +284,10 @@ def _write_estimator_html(out, estimator, name):
   opacity: 1;
   z-index: 2;
 }
-""".replace('\n', '').replace('  ', '')
+""".replace('  ', '').replace('\n', '')
 
 
-def display_estimator(estimator, print_changed_only=True):
+def _estimator_repr_html(estimator, print_changed_only=True):
     """Build a HTML representation of an estimator
 
     Parameters
@@ -300,6 +305,7 @@ def display_estimator(estimator, print_changed_only=True):
         HTML representation of estimator. When called in jupyter notebook or
         lab, a iPython HTML object is returned.
     """
+    # import here to avoid circular import from base.py
     from sklearn._config import config_context
     from sklearn.pipeline import Pipeline
 
diff --git a/sklearn/base.py b/sklearn/base.py
index 73b1c95bb3f08..08b0fc820705d 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -20,7 +20,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from ._display_estimator import display_estimator
+from ._display_estimator import _estimator_repr_html
 
 _DEFAULT_TAGS = {
     'non_deterministic': False,
@@ -413,7 +413,7 @@ def _validate_data(self, X, y=None, reset=True, **check_params):
         return out
 
     def _repr_html_(self):
-        return display_estimator(self)
+        return _estimator_repr_html(self)
 
 
 class ClassifierMixin:
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py
index 2f5a4cffe5711..74a7b4b540a9c 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/tests/test_display_estimator.py
@@ -19,7 +19,7 @@
 from sklearn._display_estimator import _write_label_html
 from sklearn._display_estimator import _estimator_details
 from sklearn._display_estimator import _type_of_html_estimator
-from sklearn._display_estimator import display_estimator
+from sklearn._display_estimator import _estimator_repr_html
 
 
 @pytest.mark.parametrize('est, expected', [
@@ -43,11 +43,6 @@ def test_write_label_html():
         assert 'hello-world' in html_label
 
 
-def test_type_of_html_estimator_error():
-    with pytest.raises(ValueError, match="Invalid estimator"):
-        _type_of_html_estimator(100)
-
-
 @pytest.mark.parametrize('est', ['passthrough', 'drop', None])
 def test_type_of_html_estimator_single_str_none(est):
     est_html_info = _type_of_html_estimator(est)
@@ -148,7 +143,7 @@ def test_display_estimator_pipeline():
     pipe = Pipeline([
         ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf)
     ])
-    html_output = display_estimator(pipe)
+    html_output = _estimator_repr_html(pipe)
 
     expected_strings = [
       'passthrough',
@@ -171,6 +166,6 @@ def test_display_estimator_pipeline():
 
 def test_display_estimator_ovo_classifier():
     ovo = OneVsOneClassifier(LinearSVC())
-    html_output = display_estimator(ovo)
+    html_output = _estimator_repr_html(ovo)
     assert "OneVsOneClassifier(estimator=LinearSVC())" in html_output
     assert "LinearSVC" in html_output

From 733bade7aaf6ea4d27d77d98a6e3bb9c65c41b92 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Thu, 12 Mar 2020 23:31:27 -0400
Subject: [PATCH 17/81] ENH Update viz

---
 sklearn/_display_estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 6d70660d0ef17..dc3711ba00bc8 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -182,7 +182,7 @@ def _write_estimator_html(out, estimator, name):
   box-sizing: border-box;
 }
 div.sk-estimator:hover {
-  background-color: #a3d4ff;
+  background-color: #c1e2ff;
 }
 div.sk-parallel-item::after {
   content: "";

From ae98ae9892f83a0aab49bede07e437b9e9cc1c6d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Fri, 13 Mar 2020 11:39:22 -0400
Subject: [PATCH 18/81] STY Update

---
 sklearn/_display_estimator.py           | 1 +
 sklearn/tests/test_display_estimator.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index dc3711ba00bc8..25fa76b2c4971 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -156,6 +156,7 @@ def _write_estimator_html(out, estimator, name):
 div.sk-toggleable__content pre {
   margin: 0.2em;
   border-radius: 0.25em;
+  background-color: #f0f8ff;
 }
 input.sk-toggleable__control:checked~div.sk-toggleable__content {
   max-height: 200px;
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py
index 74a7b4b540a9c..63aaf74f3eb56 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/tests/test_display_estimator.py
@@ -167,5 +167,5 @@ def test_display_estimator_pipeline():
 def test_display_estimator_ovo_classifier():
     ovo = OneVsOneClassifier(LinearSVC())
     html_output = _estimator_repr_html(ovo)
-    assert "OneVsOneClassifier(estimator=LinearSVC())" in html_output
+    assert "pre>OneVsOneClassifier(estimator=LinearSVC())" in html_output
     assert "LinearSVC" in html_output

From 7b1de5fe87be4aa2eaaec8a0f6bc68686e3ad89e Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Fri, 13 Mar 2020 14:29:32 -0400
Subject: [PATCH 19/81] STY Update

---
 sklearn/_display_estimator.py           | 84 ++++++++++++-------------
 sklearn/tests/test_display_estimator.py | 11 ++--
 2 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 25fa76b2c4971..a0df605d12387 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -4,22 +4,26 @@
 import uuid
 
 
-def _estimator_details(estimator):
+def _estimator_details(estimator, print_changed_only=True):
     """Replace newlines to allow for css content: attr(...) to properly
     display estimator details.
     """
-    return str(estimator).replace('\n', '
')
+    from sklearn._config import config_context
+    with config_context(print_changed_only=print_changed_only):
+        return str(estimator).replace('\n', '
')
 
 
-def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class):
+def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class,
+                         checked=False):
     out.write(
         f''
         f'')
 
     if tool_tip is not None:
+        checked_str = 'checked' if checked else ''
         est_id = uuid.uuid4()
         out.write(f''
+                  f'id="{est_id}" type="checkbox" {checked_str}>'
                   f''
                   f'{name}'
                   f'{tool_tip}'
@@ -29,16 +33,17 @@ def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class):
     out.write('')  # outer_class inner_class
 
 
-def _write_label_html(out, name, tool_tip):
+def _write_label_html(out, name, tool_tip, checked=False):
     """Write label to html"""
-    _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label")
+    _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label",
+                         checked=checked)
 
 
 _EstHTMLInfo = namedtuple('_EstHTMLInfo',
                           'type, estimators, names, name_details')
 
 
-def _type_of_html_estimator(estimator):
+def _type_of_html_estimator(estimator, first_call=False):
     """Generate information about how to display an estimator.
     """
     # import here to avoid circular import from base.py
@@ -48,7 +53,8 @@ def _type_of_html_estimator(estimator):
     from sklearn.ensemble import VotingClassifier, VotingRegressor
 
     if isinstance(estimator, str):
-        return _EstHTMLInfo('single', [estimator], [estimator], [estimator])
+        return _EstHTMLInfo('single', [estimator], [estimator],
+                            [estimator])
 
     elif estimator is None:
         return _EstHTMLInfo('single', [estimator], ['None'], ['None'])
@@ -83,16 +89,20 @@ def _type_of_html_estimator(estimator):
         inner_estimator = estimator.estimator
         return _EstHTMLInfo('single-meta', [inner_estimator], names,
                             name_details)
-    # Base estimator
+
+    # Base estimator, if this is the first call, then all parameters are
+    # printed
     names = [estimator.__class__.__name__]
-    tool_tips = [_estimator_details(estimator)]
+    tool_tips = [_estimator_details(estimator,
+                                    print_changed_only=not first_call)]
     return _EstHTMLInfo('single', [estimator], names, tool_tips)
 
 
-def _write_estimator_html(out, estimator, name):
+def _write_estimator_html(out, estimator, name, first_call=False):
     """Write estimator to html in serial, parallel, or by itself (single).
     """
-    est_html_info = _type_of_html_estimator(estimator)
+    est_html_info = _type_of_html_estimator(estimator,
+                                            first_call=first_call)
 
     if est_html_info.type == 'serial':
         out.write('')
@@ -130,7 +140,8 @@ def _write_estimator_html(out, estimator, name):
     elif est_html_info.type == 'single':
         _write_dropdown_html(out, est_html_info.names[0],
                              est_html_info.name_details[0],
-                             "sk-serial-item", "sk-estimator")
+                             "sk-serial-item", "sk-estimator",
+                             checked=first_call)
 
 
 _STYLE = """
@@ -163,6 +174,12 @@ def _write_estimator_html(out, estimator, name):
   max-width: 100%;
   overflow: auto;
 }
+div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  background-color: #d4ebff;
+}
+div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {
+  background-color: #d4ebff;
+}
 input.sk-hidden--visually {
   border: 0;
   clip: rect(1px 1px 1px 1px);
@@ -183,7 +200,7 @@ def _write_estimator_html(out, estimator, name):
   box-sizing: border-box;
 }
 div.sk-estimator:hover {
-  background-color: #c1e2ff;
+  background-color: #d4ebff;
 }
 div.sk-parallel-item::after {
   content: "";
@@ -192,9 +209,7 @@ def _write_estimator_html(out, estimator, name):
   flex-grow: 1;
 }
 div.sk-label:hover label.sk-toggleable__label {
-  color: #0087fe;
-  background-color: rgb(246, 246, 246);
-  border-radius: 0.25em;
+  background-color: #d4ebff;
 }
 div.sk-serial::before {
   content: "";
@@ -212,6 +227,7 @@ def _write_estimator_html(out, estimator, name):
   float: left;
   background: white;
 }
+
 div.sk-serial-item {
   z-index: 1;
 }
@@ -239,7 +255,7 @@ def _write_estimator_html(out, estimator, name):
 }
 div.sk-dashed-wrapped {
   border: 1px dashed gray;
-  padding: 0 0.3em 0.3em 0.3em;
+  margin: 0 0.3em 0.3em 0.3em;
   box-sizing: border-box;
 }
 div.sk-label label {
@@ -285,10 +301,10 @@ def _write_estimator_html(out, estimator, name):
   opacity: 1;
   z-index: 2;
 }
-""".replace('  ', '').replace('\n', '')
+""".replace('  ', '').replace('\n', '')  # noqa
 
 
-def _estimator_repr_html(estimator, print_changed_only=True):
+def _estimator_repr_html(estimator):
     """Build a HTML representation of an estimator
 
     Parameters
@@ -296,35 +312,19 @@ def _estimator_repr_html(estimator, print_changed_only=True):
     estimator : estimator object
         The estimator to visualize.
 
-    print_changed_only : bool, optional (default=True)
-        If True, only the parameters that were set to non-default
-        values will be printed when printing an estimator.
-
     Returns
     -------
     html: str or iPython HTML object
         HTML representation of estimator. When called in jupyter notebook or
         lab, a iPython HTML object is returned.
     """
-    # import here to avoid circular import from base.py
-    from sklearn._config import config_context
-    from sklearn.pipeline import Pipeline
-
-    with config_context(print_changed_only=print_changed_only), \
-            closing(StringIO()) as out:
-
-        # This forces estimators to always be serial at the first layer
-        if not isinstance(estimator, Pipeline):
-            estimator = Pipeline([(estimator.__class__.__name__, estimator)])
-
-        out.write('')
+    with closing(StringIO()) as out:
 
-        out.write('')
-        _write_estimator_html(out, estimator, '')
-        out.write('')  # sk-top-container # sk-container
-        out.write('')
+        out.write(f''
+                  f'')
+        _write_estimator_html(out, estimator, estimator.__class__.__name__,
+                              first_call=True)
+        out.write('')
 
         html_output = out.getvalue()
         return html_output
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py
index 63aaf74f3eb56..6739a8c9533e3 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/tests/test_display_estimator.py
@@ -31,16 +31,19 @@ def test_estimator_tool_tip(est, expected):
     assert expected == _estimator_details(est)
 
 
-def test_write_label_html():
+@pytest.mark.parametrize("checked", [True, False])
+def test_write_label_html(checked):
     name = "LogisticRegression"
     tool_tip = "hello-world"
 
     with closing(StringIO()) as out:
-        _write_label_html(out, name, tool_tip)
+        _write_label_html(out, name, tool_tip, checked=checked)
         html_label = out.getvalue()
         assert 'LogisticRegression' in html_label
         assert html_label.startswith('')
         assert 'hello-world' in html_label
+        if checked:
+            assert 'checked>' in html_label
 
 
 @pytest.mark.parametrize('est', ['passthrough', 'drop', None])
@@ -151,7 +154,7 @@ def test_display_estimator_pipeline():
       '(strategy=\'median\')',
       'SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
       '',
-      '(\'one-hot\', OneHotEncoder())',
+      '(\'one-hot\', OneHotEncoder',
       'preprocessor',
       '[\'a\', \'b\', \'c\', \'d\', \'e\']',
       'LogisticRegression(random_state=1)',
@@ -167,5 +170,5 @@ def test_display_estimator_pipeline():
 def test_display_estimator_ovo_classifier():
     ovo = OneVsOneClassifier(LinearSVC())
     html_output = _estimator_repr_html(ovo)
-    assert "pre>OneVsOneClassifier(estimator=LinearSVC())

" in html_output + assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output assert "LinearSVC" in html_output From 741bc138238a3c82ed7e7b5d463fd0428b538f82 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 15:06:54 -0400 Subject: [PATCH 20/81] CLN Cleaner code --- sklearn/_display_estimator.py | 88 +++++++++++++------------ sklearn/tests/test_display_estimator.py | 11 +++- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a0df605d12387..1600cd54e26d9 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -8,9 +8,7 @@ def _estimator_details(estimator, print_changed_only=True): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - from sklearn._config import config_context - with config_context(print_changed_only=print_changed_only): - return str(estimator).replace('\n', ' ') + return str(estimator).replace('\n', ' ') def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class, @@ -51,56 +49,59 @@ def _type_of_html_estimator(estimator, first_call=False): from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier, VotingRegressor + from sklearn._config import config_context - if isinstance(estimator, str): - return _EstHTMLInfo('single', [estimator], [estimator], - [estimator]) - - elif estimator is None: - return _EstHTMLInfo('single', [estimator], ['None'], ['None']) - - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [_estimator_details(est) for est in estimators] - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif hasattr(estimator, "estimator"): - names = [estimator.__class__.__name__] - name_details = [_estimator_details(estimator)] - inner_estimator = estimator.estimator - return _EstHTMLInfo('single-meta', [inner_estimator], names, - name_details) + with config_context(print_changed_only=True): + if isinstance(estimator, str): + return _EstHTMLInfo('single', [estimator], [estimator], + [estimator]) + + elif estimator is None: + return _EstHTMLInfo('single', [estimator], ['None'], ['None']) + + elif isinstance(estimator, Pipeline): + estimators = [step[1] for step in estimator.steps] + names = [step[0] for step in estimator.steps] + name_details = [_estimator_details(est) for est in estimators] + return _EstHTMLInfo('serial', estimators, names, name_details) + + elif isinstance(estimator, ColumnTransformer): + estimators = [trans[1] for trans in estimator.transformers] + names = [trans[0] for trans in estimator.transformers] + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, FeatureUnion): + estimators = [trans[1] for trans in estimator.transformer_list] + names = [trans[0] for trans in estimator.transformer_list] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, (VotingClassifier, VotingRegressor)): + estimators = [est[1] for est in estimator.estimators] + names = [est[0] for est in estimator.estimators] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif hasattr(estimator, "estimator"): + estimators = [estimator.estimator] + names = [estimator.__class__.__name__] + name_details = [_estimator_details(estimator)] + return _EstHTMLInfo('single-meta', estimators, names, + name_details) # Base estimator, if this is the first call, then all parameters are # printed names = [estimator.__class__.__name__] - tool_tips = [_estimator_details(estimator, - print_changed_only=not first_call)] + with config_context(print_changed_only=not first_call): + tool_tips = [_estimator_details(estimator)] return _EstHTMLInfo('single', [estimator], names, tool_tips) def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ + from sklearn._config import config_context est_html_info = _type_of_html_estimator(estimator, first_call=first_call) @@ -115,7 +116,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('

') if name: - tool_tip = _estimator_details(estimator) + with config_context(print_changed_only=True): + tool_tip = _estimator_details(estimator) _write_label_html(out, name, tool_tip) out.write('

') diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 6739a8c9533e3..acaa3ba9c9948 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,6 +3,7 @@ import pytest +from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -56,8 +57,9 @@ def test_type_of_html_estimator_single_str_none(est): def test_type_of_html_estimator_single_estimator(): + # single estimator prints all the details est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) + est_html_info = _type_of_html_estimator(est, first_call=True) assert est_html_info.type == 'single' assert est_html_info.estimators[0] == est assert est_html_info.names[0] == est.__class__.__name__ @@ -65,6 +67,7 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): + # multiple estimators in a pipeline prints only the changes pipe = Pipeline([ ('imputer', SimpleImputer()), ('classifier', LogisticRegression()) @@ -73,8 +76,10 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.type == 'serial' assert est_html_info.estimators == [step[1] for step in pipe.steps] assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_details == [_estimator_details(step[1]) - for step in pipe.steps] + + with config_context(print_changed_only=True): + assert est_html_info.name_details == [_estimator_details(step[1]) + for step in pipe.steps] def test_type_of_html_estimator_feature_union(): From b0dd3f2d75191e07313a680cd239fba4fe433b4f Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 15:32:03 -0400 Subject: [PATCH 21/81] CLN Improves logic --- sklearn/_display_estimator.py | 38 ++++++++++++------------- sklearn/tests/test_display_estimator.py | 6 +--- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 1600cd54e26d9..cec6663d3ec9f 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -11,30 +11,30 @@ def _estimator_details(estimator, print_changed_only=True): return str(estimator).replace('\n', ' ') -def _write_dropdown_html(out, name, tool_tip, outer_class, inner_class, +def _write_dropdown_html(out, name, name_details, outer_class, inner_class, checked=False): out.write( f'

' f'

') - if tool_tip is not None: + if name_details is not None: checked_str = 'checked' if checked else '' est_id = uuid.uuid4() out.write(f'' f'' f'{name}' - f'

{tool_tip}'
+                  f'{name_details}'
                   f'')
     else:
         out.write(f'{name}')
     out.write('

') # outer_class inner_class -def _write_label_html(out, name, tool_tip, checked=False): +def _write_label_html(out, name, name_details, checked=False): """Write label to html""" - _write_dropdown_html(out, name, tool_tip, "sk-label-container", "sk-label", - checked=checked) + _write_dropdown_html(out, name, name_details, "sk-label-container", + "sk-label", checked=checked) _EstHTMLInfo = namedtuple('_EstHTMLInfo', @@ -62,7 +62,7 @@ def _type_of_html_estimator(estimator, first_call=False): elif isinstance(estimator, Pipeline): estimators = [step[1] for step in estimator.steps] names = [step[0] for step in estimator.steps] - name_details = [_estimator_details(est) for est in estimators] + name_details = [None] * len(names) return _EstHTMLInfo('serial', estimators, names, name_details) elif isinstance(estimator, ColumnTransformer): @@ -83,7 +83,8 @@ def _type_of_html_estimator(estimator, first_call=False): name_details = [None] * len(names) return _EstHTMLInfo('parallel', estimators, names, name_details) - elif hasattr(estimator, "estimator"): + elif (hasattr(estimator, "estimator") and + hasattr(estimator.estimator, 'get_params')): estimators = [estimator.estimator] names = [estimator.__class__.__name__] name_details = [_estimator_details(estimator)] @@ -94,8 +95,8 @@ def _type_of_html_estimator(estimator, first_call=False): # printed names = [estimator.__class__.__name__] with config_context(print_changed_only=not first_call): - tool_tips = [_estimator_details(estimator)] - return _EstHTMLInfo('single', [estimator], names, tool_tips) + name_details = [_estimator_details(estimator)] + return _EstHTMLInfo('single', [estimator], names, name_details) def _write_estimator_html(out, estimator, name, first_call=False): @@ -107,9 +108,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): if est_html_info.type == 'serial': out.write('

') - est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_details) - for est, name, tool_tip in est_infos: + est_infos = zip(est_html_info.estimators, est_html_info.names) + for est, name in est_infos: _write_estimator_html(out, est, name) out.write('

') # sk-serial @@ -117,17 +117,17 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') if name: with config_context(print_changed_only=True): - tool_tip = _estimator_details(estimator) - _write_label_html(out, name, tool_tip) + name_details = _estimator_details(estimator) + _write_label_html(out, name, name_details) out.write('

') est_infos = zip(est_html_info.estimators, est_html_info.names, est_html_info.name_details) - for est, name, tool_tip in est_infos: + for est, name, name_details in est_infos: out.write('

') - _write_label_html(out, name, tool_tip) + _write_label_html(out, name, name_details) out.write('

') - _write_estimator_html(out, est, name) + _write_estimator_html(out, est, '') out.write('

') # sk-parallel-item sk-serial out.write('

') # sk-parallel sk-serial-item @@ -137,7 +137,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): est_html_info.name_details[0]) _write_estimator_html(out, est_html_info.estimators[0], est_html_info.estimators.__class__.__name__) - out.write('

') # sk-serial-item # sk-serial + out.write('

') # sk-serial-item elif est_html_info.type == 'single': _write_dropdown_html(out, est_html_info.names[0], diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index acaa3ba9c9948..b6ac4067b3a40 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -67,7 +67,6 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): - # multiple estimators in a pipeline prints only the changes pipe = Pipeline([ ('imputer', SimpleImputer()), ('classifier', LogisticRegression()) @@ -76,10 +75,7 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.type == 'serial' assert est_html_info.estimators == [step[1] for step in pipe.steps] assert est_html_info.names == ['imputer', 'classifier'] - - with config_context(print_changed_only=True): - assert est_html_info.name_details == [_estimator_details(step[1]) - for step in pipe.steps] + assert est_html_info.name_details == [None, None] def test_type_of_html_estimator_feature_union(): From 1b14ce2a94fee8afe95ce8d926343e76c075bf16 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 17:34:27 -0400 Subject: [PATCH 22/81] CLN More polish --- sklearn/_display_estimator.py | 161 ++++++++++-------------- sklearn/tests/test_display_estimator.py | 22 ++-- 2 files changed, 79 insertions(+), 104 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index cec6663d3ec9f..8fa01769bd373 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -8,11 +8,16 @@ def _estimator_details(estimator, print_changed_only=True): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - return str(estimator).replace('\n', ' ') + from sklearn._config import config_context + with config_context(print_changed_only=print_changed_only): + return str(estimator).replace('\n', ' ') -def _write_dropdown_html(out, name, name_details, outer_class, inner_class, - checked=False): +def _write_label_html(out, name, name_details, + outer_class="sk-label-container", + inner_class="sk-label", + checked=False): + """Write labeled html with or without a dropdown with named details""" out.write( f'

' f'

') @@ -31,17 +36,13 @@ def _write_dropdown_html(out, name, name_details, outer_class, inner_class, out.write('

') # outer_class inner_class -def _write_label_html(out, name, name_details, checked=False): - """Write label to html""" - _write_dropdown_html(out, name, name_details, "sk-label-container", - "sk-label", checked=checked) - - +# if type == 'single' then estimators, names, and name_details represent +# repsent the single _EstHTMLInfo = namedtuple('_EstHTMLInfo', 'type, estimators, names, name_details') -def _type_of_html_estimator(estimator, first_call=False): +def _type_of_html_estimator(estimator, print_changed_only=True): """Generate information about how to display an estimator. """ # import here to avoid circular import from base.py @@ -49,62 +50,56 @@ def _type_of_html_estimator(estimator, first_call=False): from sklearn.pipeline import FeatureUnion from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier, VotingRegressor - from sklearn._config import config_context - with config_context(print_changed_only=True): - if isinstance(estimator, str): - return _EstHTMLInfo('single', [estimator], [estimator], - [estimator]) - - elif estimator is None: - return _EstHTMLInfo('single', [estimator], ['None'], ['None']) - - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [None] * len(names) - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif (hasattr(estimator, "estimator") and - hasattr(estimator.estimator, 'get_params')): - estimators = [estimator.estimator] - names = [estimator.__class__.__name__] - name_details = [_estimator_details(estimator)] - return _EstHTMLInfo('single-meta', estimators, names, - name_details) + if isinstance(estimator, str): + return _EstHTMLInfo('single', estimator, estimator, estimator) + + elif estimator is None: + return _EstHTMLInfo('single', estimator, 'None', 'None') + + elif isinstance(estimator, Pipeline): + estimators = [step[1] for step in estimator.steps] + names = [step[0] for step in estimator.steps] + name_details = [None] * len(names) + return _EstHTMLInfo('serial', estimators, names, name_details) + + elif isinstance(estimator, ColumnTransformer): + estimators = [trans[1] for trans in estimator.transformers] + names = [trans[0] for trans in estimator.transformers] + name_details = [trans[2] for trans in estimator.transformers] + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, FeatureUnion): + estimators = [trans[1] for trans in estimator.transformer_list] + names = [trans[0] for trans in estimator.transformer_list] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif isinstance(estimator, (VotingClassifier, VotingRegressor)): + estimators = [est[1] for est in estimator.estimators] + names = [est[0] for est in estimator.estimators] + name_details = [None] * len(names) + return _EstHTMLInfo('parallel', estimators, names, name_details) + + elif (hasattr(estimator, "estimator") and + hasattr(estimator.estimator, 'get_params')): + inner_estimator = estimator.estimator + inner_name = inner_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', inner_estimator, inner_name, None) # Base estimator, if this is the first call, then all parameters are # printed - names = [estimator.__class__.__name__] - with config_context(print_changed_only=not first_call): - name_details = [_estimator_details(estimator)] - return _EstHTMLInfo('single', [estimator], names, name_details) + name = estimator.__class__.__name__ + name_detail = _estimator_details(estimator, + print_changed_only=print_changed_only) + return _EstHTMLInfo('single', estimator, name, name_detail) def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - from sklearn._config import config_context est_html_info = _type_of_html_estimator(estimator, - first_call=first_call) + print_changed_only=not first_call) if est_html_info.type == 'serial': out.write('

') @@ -116,8 +111,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('

') if name: - with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = _estimator_details(estimator) _write_label_html(out, name, name_details) out.write('

') @@ -133,17 +127,20 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'single-meta': out.write('

') - _write_label_html(out, est_html_info.names[0], - est_html_info.name_details[0]) - _write_estimator_html(out, est_html_info.estimators[0], - est_html_info.estimators.__class__.__name__) - out.write('

') # sk-serial-item + if name: + name_details = _estimator_details(estimator) + _write_label_html(out, name, name_details) + out.write('

') + _write_estimator_html(out, est_html_info.estimators, + est_html_info.names) + out.write('

') elif est_html_info.type == 'single': - _write_dropdown_html(out, est_html_info.names[0], - est_html_info.name_details[0], - "sk-serial-item", "sk-estimator", - checked=first_call) + _write_label_html(out, est_html_info.names, + est_html_info.name_details, + outer_class="sk-serial-item", + inner_class="sk-estimator", + checked=first_call) _STYLE = """ @@ -278,31 +275,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): position: relative; float: left; } -[sk-data-tooltip] { - position: relative; - cursor: pointer; -} -[sk-data-tooltip]:before { - visibility: hidden; - opacity: 0; - font-weight: 400; - position: absolute; - top: 0; - left: 0; - padding: 0.5em; - overflow: hidden; - background-color: #f0f8ff; - border: 1px solid gray; - box-sizing: border-box; - white-space: pre; - content: attr(sk-data-tooltip); - text-align: left; -} -[sk-data-tooltip]:hover:before { - visibility: visible; - opacity: 1; - z-index: 2; -} """.replace(' ', '').replace('\n', '') # noqa @@ -321,8 +293,9 @@ def _estimator_repr_html(estimator): lab, a iPython HTML object is returned. """ with closing(StringIO()) as out: - - out.write(f'' + out.write(f'' + f'sklearn-viz' + f'' f'

') _write_estimator_html(out, estimator, estimator.__class__.__name__, first_call=True) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index b6ac4067b3a40..08ad2f6107255 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,7 +3,6 @@ import pytest -from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -21,6 +20,7 @@ from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html +from sklearn._config import config_context @pytest.mark.parametrize('est, expected', [ @@ -51,19 +51,21 @@ def test_write_label_html(checked): def test_type_of_html_estimator_single_str_none(est): est_html_info = _type_of_html_estimator(est) assert est_html_info.type == 'single' - assert est_html_info.estimators[0] == est - assert est_html_info.names[0] == str(est) - assert est_html_info.name_details[0] == str(est) + assert est_html_info.estimators == est + assert est_html_info.names == str(est) + assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_single_estimator(): - # single estimator prints all the details +@pytest.mark.parametrize('print_changed_only', [True, False]) +def test_type_of_html_estimator_single_estimator(print_changed_only): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est, first_call=True) + est_html_info = _type_of_html_estimator( + est, print_changed_only=print_changed_only) assert est_html_info.type == 'single' - assert est_html_info.estimators[0] == est - assert est_html_info.names[0] == est.__class__.__name__ - assert est_html_info.name_details[0] == _estimator_details(est) + assert est_html_info.estimators == est + assert est_html_info.names == est.__class__.__name__ + assert (est_html_info.name_details == + _estimator_details(est, print_changed_only=print_changed_only)) def test_type_of_html_estimator_pipeline(): From e03362f6a3e369a69added5f9e1a28e8bcbdacdc Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 17:44:17 -0400 Subject: [PATCH 23/81] CLN More polish --- sklearn/_display_estimator.py | 17 ++++++++++++----- sklearn/tests/test_display_estimator.py | 1 - 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 8fa01769bd373..8e65091a1b080 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -36,10 +36,16 @@ def _write_label_html(out, name, name_details, out.write('

') # outer_class inner_class -# if type == 'single' then estimators, names, and name_details represent -# repsent the single _EstHTMLInfo = namedtuple('_EstHTMLInfo', 'type, estimators, names, name_details') +# In this section, the parameters mean estimators, names, and name_details +# if type == 'single', then the parameters are single items representing the +# single estimator +# if type == 'parallel', then the paramters are list representing the +# parallel estimators +# if type == 'serial', then the parameters are list representing the serial +# estimators +# if type == 'single-meta', then parameters represent the wrapped estimator def _type_of_html_estimator(estimator, print_changed_only=True): @@ -83,9 +89,10 @@ def _type_of_html_estimator(estimator, print_changed_only=True): elif (hasattr(estimator, "estimator") and hasattr(estimator.estimator, 'get_params')): - inner_estimator = estimator.estimator - inner_name = inner_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', inner_estimator, inner_name, None) + wrapped_estimator = estimator.estimator + wrapped_name = wrapped_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, + None) # Base estimator, if this is the first call, then all parameters are # printed diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 08ad2f6107255..922f55fb0fd5e 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -20,7 +20,6 @@ from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html -from sklearn._config import config_context @pytest.mark.parametrize('est, expected', [ From 975c8233ce92509c01287f8f0c791b0db99de5b7 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 18:38:22 -0400 Subject: [PATCH 24/81] STY Minor adjustment --- sklearn/_display_estimator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 8e65091a1b080..eae05415783a8 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -261,8 +261,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-dashed-wrapped { border: 1px dashed gray; - margin: 0 0.3em 0.3em 0.3em; + margin: 0 0.2em 0.2em 0.2em; box-sizing: border-box; + padding-bottom: 0.2em; } div.sk-label label { font-family: monospace; From ecb3ae63968812d7153fbe1806acb9034b55aafb Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 20:31:41 -0400 Subject: [PATCH 25/81] ENH Adds a _sk_rep_html method --- sklearn/_display_estimator.py | 72 ++++++++----------------- sklearn/base.py | 6 +++ sklearn/compose/_column_transformer.py | 5 ++ sklearn/ensemble/_voting.py | 5 ++ sklearn/pipeline.py | 10 ++++ sklearn/tests/test_display_estimator.py | 39 +++++++------- 6 files changed, 65 insertions(+), 72 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index eae05415783a8..e1d98a7090af3 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,16 +1,15 @@ +from sklearn._config import config_context from collections import namedtuple from contextlib import closing from io import StringIO import uuid -def _estimator_details(estimator, print_changed_only=True): +def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly display estimator details. """ - from sklearn._config import config_context - with config_context(print_changed_only=print_changed_only): - return str(estimator).replace('\n', ' ') + return str(estimator).replace('\n', ' ') def _write_label_html(out, name, name_details, @@ -48,65 +47,29 @@ def _write_label_html(out, name, name_details, # if type == 'single-meta', then parameters represent the wrapped estimator -def _type_of_html_estimator(estimator, print_changed_only=True): +def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - # import here to avoid circular import from base.py - from sklearn.pipeline import Pipeline - from sklearn.pipeline import FeatureUnion - from sklearn.compose import ColumnTransformer - from sklearn.ensemble import VotingClassifier, VotingRegressor - if isinstance(estimator, str): return _EstHTMLInfo('single', estimator, estimator, estimator) - elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') - elif isinstance(estimator, Pipeline): - estimators = [step[1] for step in estimator.steps] - names = [step[0] for step in estimator.steps] - name_details = [None] * len(names) - return _EstHTMLInfo('serial', estimators, names, name_details) - - elif isinstance(estimator, ColumnTransformer): - estimators = [trans[1] for trans in estimator.transformers] - names = [trans[0] for trans in estimator.transformers] - name_details = [trans[2] for trans in estimator.transformers] - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, FeatureUnion): - estimators = [trans[1] for trans in estimator.transformer_list] - names = [trans[0] for trans in estimator.transformer_list] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif isinstance(estimator, (VotingClassifier, VotingRegressor)): - estimators = [est[1] for est in estimator.estimators] - names = [est[0] for est in estimator.estimators] - name_details = [None] * len(names) - return _EstHTMLInfo('parallel', estimators, names, name_details) - - elif (hasattr(estimator, "estimator") and - hasattr(estimator.estimator, 'get_params')): - wrapped_estimator = estimator.estimator + # looks like a meta estimator + if (hasattr(estimator, 'estimator') and + hasattr(getattr(estimator, 'estimator'), 'get_params')): + wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, None) - - # Base estimator, if this is the first call, then all parameters are - # printed - name = estimator.__class__.__name__ - name_detail = _estimator_details(estimator, - print_changed_only=print_changed_only) - return _EstHTMLInfo('single', estimator, name, name_detail) + return estimator._sk_repr_html() def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - est_html_info = _type_of_html_estimator(estimator, - print_changed_only=not first_call) + with config_context(print_changed_only=not first_call): + est_html_info = _type_of_html_estimator(estimator) if est_html_info.type == 'serial': out.write('

') @@ -118,12 +81,18 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'parallel': out.write('

') if name: - name_details = _estimator_details(estimator) + with config_context(print_changed_only=True): + name_details = _estimator_details(estimator) _write_label_html(out, name, name_details) out.write('

') + if est_html_info.name_details is None: + name_details = (None,) * len(est_html_info.estimators) + else: + name_details = est_html_info.name_details + est_infos = zip(est_html_info.estimators, est_html_info.names, - est_html_info.name_details) + name_details) for est, name, name_details in est_infos: out.write('

') _write_label_html(out, name, name_details) @@ -135,7 +104,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): elif est_html_info.type == 'single-meta': out.write('

') _write_estimator_html(out, est_html_info.estimators, diff --git a/sklearn/base.py b/sklearn/base.py index 08b0fc820705d..0e695ee712640 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -415,6 +415,12 @@ def _validate_data(self, X, y=None, reset=True, **check_params): def _repr_html_(self): return _estimator_repr_html(self) + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + return _EstHTMLInfo('single', self, + self.__class__.__name__, + str(self).replace('\n', ' ')) + class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e94757bca6993..aa7b0db3d1b7a 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -628,6 +628,11 @@ def _hstack(self, Xs): Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs] return np.hstack(Xs) + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, transformers, name_details = zip(*self.transformers) + return _EstHTMLInfo('parallel', transformers, names, name_details) + def _check_X(X): """Use check_array only on lists and other non-array-likes / sparse""" diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 0da6dc86c30fa..7bc36f10164a8 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -103,6 +103,11 @@ def n_features_in_(self): return self.estimators_[0].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, estimators = zip(*self.estimators) + return _EstHTMLInfo('parallel', estimators, names, None) + class VotingClassifier(ClassifierMixin, _BaseVoting): """Soft Voting/Majority Rule classifier for unfitted estimators. diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 64d2de70df531..1c7b407dadc3f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -633,6 +633,11 @@ def n_features_in_(self): # delegate to first step (which will call _check_is_fitted) return self.steps[0][1].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, estimators = zip(*self.steps) + return _EstHTMLInfo('serial', estimators, names, None) + def _name_estimators(estimators): """Generate names for estimators.""" @@ -1010,6 +1015,11 @@ def n_features_in_(self): # X is passed to all transformers so we just delegate to the first one return self.transformer_list[0][1].n_features_in_ + def _sk_repr_html(self): + from sklearn._display_estimator import _EstHTMLInfo + names, transformers = zip(*self.transformer_list) + return _EstHTMLInfo('parallel', transformers, names, None) + def make_union(*transformers, **kwargs): """ diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 922f55fb0fd5e..0cfacd00fb98f 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -55,16 +55,13 @@ def test_type_of_html_estimator_single_str_none(est): assert est_html_info.name_details == str(est) -@pytest.mark.parametrize('print_changed_only', [True, False]) -def test_type_of_html_estimator_single_estimator(print_changed_only): +def test_type_of_html_estimator_single_estimator(): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator( - est, print_changed_only=print_changed_only) + est_html_info = _type_of_html_estimator(est) assert est_html_info.type == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ - assert (est_html_info.name_details == - _estimator_details(est, print_changed_only=print_changed_only)) + assert (est_html_info.name_details == _estimator_details(est)) def test_type_of_html_estimator_pipeline(): @@ -74,9 +71,9 @@ def test_type_of_html_estimator_pipeline(): ]) est_html_info = _type_of_html_estimator(pipe) assert est_html_info.type == 'serial' - assert est_html_info.estimators == [step[1] for step in pipe.steps] - assert est_html_info.names == ['imputer', 'classifier'] - assert est_html_info.name_details == [None, None] + assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) + assert est_html_info.names == ('imputer', 'classifier') + assert est_html_info.name_details is None def test_type_of_html_estimator_feature_union(): @@ -85,10 +82,10 @@ def test_type_of_html_estimator_feature_union(): ]) est_html_info = _type_of_html_estimator(f_union) assert est_html_info.type == 'parallel' - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.estimators == [trans[1] - for trans in f_union.transformer_list] - assert est_html_info.name_details == [None, None] + assert est_html_info.names == ('pca', 'svd') + assert est_html_info.estimators == tuple( + trans[1] for trans in f_union.transformer_list) + assert est_html_info.name_details is None def test_type_of_html_estimator_voting(): @@ -98,10 +95,10 @@ def test_type_of_html_estimator_voting(): ]) est_html_info = _type_of_html_estimator(clf) assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in clf.estimators] - assert est_html_info.names == ['log_reg', 'mlp'] - assert est_html_info.name_details == [None, None] + assert est_html_info.estimators == tuple(trans[1] + for trans in clf.estimators) + assert est_html_info.names == ('log_reg', 'mlp') + assert est_html_info.name_details is None def test_type_of_html_estimator_column_transformer(): @@ -111,10 +108,10 @@ def test_type_of_html_estimator_column_transformer(): ]) est_html_info = _type_of_html_estimator(ct) assert est_html_info.type == 'parallel' - assert est_html_info.estimators == [trans[1] - for trans in ct.transformers] - assert est_html_info.names == ['pca', 'svd'] - assert est_html_info.name_details == [['num1', 'num2'], [0, 3]] + assert est_html_info.estimators == tuple( + trans[1] for trans in ct.transformers) + assert est_html_info.names == ('pca', 'svd') + assert est_html_info.name_details == (['num1', 'num2'], [0, 3]) def test_display_estimator_pipeline(): From 3451ab009df95b632c7f383a6782d7bc41d2fc12 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:20:42 -0400 Subject: [PATCH 26/81] CLN Less diffs --- examples/compose/plot_column_transformer_mixed_types.py | 2 +- sklearn/inspection/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index b1aebb3252505..037ff3fbc147a 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -136,7 +136,7 @@ ############################################################################### # Using the prediction pipeline in a grid search -# ---------------------------------------------- +############################################################################### # Grid search can also be performed on the different preprocessing steps # defined in the ``ColumnTransformer`` object, together with the classifier's # hyperparameters as part of the ``Pipeline``. diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index bfa28f2b3a4f8..5940ac22a2ef2 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -22,5 +22,5 @@ 'partial_dependence', 'plot_partial_dependence', 'permutation_importance', - 'PartialDependenceDisplay', + 'PartialDependenceDisplay' ] From 407cfff7271ab7d2af6cd0663cf38bd262b1c5a5 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:23:11 -0400 Subject: [PATCH 27/81] CLN Imports higher --- sklearn/base.py | 5 ++--- sklearn/compose/_column_transformer.py | 2 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 3 +-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 0e695ee712640..70f6cb7914dca 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,6 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html +from ._display_estimator import _EstHTMLInfo _DEFAULT_TAGS = { 'non_deterministic': False, @@ -416,9 +417,7 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo - return _EstHTMLInfo('single', self, - self.__class__.__name__, + return _EstHTMLInfo('single', self, self.__class__.__name__, str(self).replace('\n', ' ')) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index aa7b0db3d1b7a..12d8ad18b2d35 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,6 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin +from .._display_estimator import _EstHTMLInfo from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -629,7 +630,6 @@ def _hstack(self, Xs): return np.hstack(Xs) def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, transformers, name_details = zip(*self.transformers) return _EstHTMLInfo('parallel', transformers, names, name_details) diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 7bc36f10164a8..cfaedfb18edc2 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,6 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError +from .._display_estimator import _EstHTMLInfo class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -104,7 +105,6 @@ def n_features_in_(self): return self.estimators_[0].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, estimators = zip(*self.estimators) return _EstHTMLInfo('parallel', estimators, names, None) diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 1c7b407dadc3f..3be08e32ad31c 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,6 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin +from ._display_estimator import _EstHTMLInfo from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -634,7 +635,6 @@ def n_features_in_(self): return self.steps[0][1].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, estimators = zip(*self.steps) return _EstHTMLInfo('serial', estimators, names, None) @@ -1016,7 +1016,6 @@ def n_features_in_(self): return self.transformer_list[0][1].n_features_in_ def _sk_repr_html(self): - from sklearn._display_estimator import _EstHTMLInfo names, transformers = zip(*self.transformer_list) return _EstHTMLInfo('parallel', transformers, names, None) From 80d9b10dbf20a2968debe7535b24b165a9c4d776 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 21:36:25 -0400 Subject: [PATCH 28/81] ENH Better support for dark themes --- sklearn/_display_estimator.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index e1d98a7090af3..f4dae57d2f6ad 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -121,6 +121,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): _STYLE = """ +div.sk-top-container { + color: black; + background-color: white; +} div.sk-toggleable { background-color: white; } @@ -142,6 +146,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-toggleable__content pre { margin: 0.2em; + color: black; border-radius: 0.25em; background-color: #f0f8ff; } @@ -201,7 +206,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): flex-direction: column; align-items: center; float: left; - background: white; + background-color: white; } div.sk-serial-item { @@ -211,12 +216,13 @@ def _write_estimator_html(out, estimator, name, first_call=False): display: flex; align-items: stretch; justify-content: center; + background-color: white; } div.sk-parallel-item { display: flex; flex-direction: column; position: relative; - background: white; + background-color: white; } div.sk-parallel-item:first-child::after { align-self: flex-end; @@ -234,11 +240,12 @@ def _write_estimator_html(out, estimator, name, first_call=False): margin: 0 0.2em 0.2em 0.2em; box-sizing: border-box; padding-bottom: 0.2em; + background-color: white; } div.sk-label label { font-family: monospace; font-weight: bold; - background: white; + background-color: white; display: inline-block; line-height: 1.4em; } From 791374beb9043c463ddb9c3f260f0adf1bec941d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:16:21 -0400 Subject: [PATCH 29/81] DOC Includes note about html --- doc/modules/compose.rst | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index fd76eb4b90176..aeb8a553737f9 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -538,12 +538,14 @@ above example would be:: Visualizing Composite Estimators ================================ -In by default a jupyter notebook outputs a html representation of -composite estimators. This can be useful to diagnose or visualize a Pipeline -with may estimators. For example, the estimator defined in -The composite estimator defined in -:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py` -can be visualized as: +By default, estimators are displayed with a HTML representation when shown in a +jupyter notebook. This can be useful to diagnose or visualize a Pipeline with +many estimators. An example of the HTML output can been seen below. + +.. note:: + + The HTML output of this code snippet can only been seen on the HTML version + of the docs. .. display_estimator_repr_html:: From d297bc70190a58b0cff7b19af5fefd864dc9f040 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:33:01 -0400 Subject: [PATCH 30/81] STY Update --- sklearn/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index f4dae57d2f6ad..a94a022331184 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -247,7 +247,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): font-weight: bold; background-color: white; display: inline-block; - line-height: 1.4em; + line-height: 1em; } div.sk-label-container { text-align: center; From 9fde84ab44bb44175851f7c32dacacc7062a4d98 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:35:38 -0400 Subject: [PATCH 31/81] STY Update --- sklearn/_display_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a94a022331184..03aecf9106ae9 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -247,7 +247,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): font-weight: bold; background-color: white; display: inline-block; - line-height: 1em; + line-height: 1.2em; } div.sk-label-container { text-align: center; From f254e1d824a88830a6ff21a397ed889d05e67d6c Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:41:55 -0400 Subject: [PATCH 32/81] CLN --- sklearn/_display_estimator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 03aecf9106ae9..a82eb6408b104 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -54,9 +54,8 @@ def _type_of_html_estimator(estimator): return _EstHTMLInfo('single', estimator, estimator, estimator) elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') - # looks like a meta estimator - if (hasattr(estimator, 'estimator') and + elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ From 48aebee175a5c470249a2b6a8ae368333a99296c Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 22:45:50 -0400 Subject: [PATCH 33/81] CLN Moves code around --- sklearn/_display_estimator.py | 58 +++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index a82eb6408b104..db08ea66dba0d 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -5,6 +5,35 @@ import uuid +_EstHTMLInfo = namedtuple('_EstHTMLInfo', + 'type, estimators, names, name_details') +# In this section, the parameters mean estimators, names, and name_details +# if type == 'single', then the parameters are single items representing the +# single estimator +# if type == 'parallel', then the paramters are list representing the +# parallel estimators +# if type == 'serial', then the parameters are list representing the serial +# estimators +# if type == 'single-meta', then parameters represent the wrapped estimator + + +def _type_of_html_estimator(estimator): + """Generate information about how to display an estimator. + """ + if isinstance(estimator, str): + return _EstHTMLInfo('single', estimator, estimator, estimator) + elif estimator is None: + return _EstHTMLInfo('single', estimator, 'None', 'None') + # looks like a meta estimator + elif (hasattr(estimator, 'estimator') and + hasattr(getattr(estimator, 'estimator'), 'get_params')): + wrapped_estimator = getattr(estimator, 'estimator') + wrapped_name = wrapped_estimator.__class__.__name__ + return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, + None) + return estimator._sk_repr_html() + + def _estimator_details(estimator): """Replace newlines to allow for css content: attr(...) to properly display estimator details. @@ -35,35 +64,6 @@ def _write_label_html(out, name, name_details, out.write('

') # outer_class inner_class -_EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_details') -# In this section, the parameters mean estimators, names, and name_details -# if type == 'single', then the parameters are single items representing the -# single estimator -# if type == 'parallel', then the paramters are list representing the -# parallel estimators -# if type == 'serial', then the parameters are list representing the serial -# estimators -# if type == 'single-meta', then parameters represent the wrapped estimator - - -def _type_of_html_estimator(estimator): - """Generate information about how to display an estimator. - """ - if isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) - elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') - # looks like a meta estimator - elif (hasattr(estimator, 'estimator') and - hasattr(getattr(estimator, 'estimator'), 'get_params')): - wrapped_estimator = getattr(estimator, 'estimator') - wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, - None) - return estimator._sk_repr_html() - - def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ From d44c38ea2689ac2cc7a55231b6f31ec7a0837aec Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 13 Mar 2020 23:32:58 -0400 Subject: [PATCH 34/81] ENH Adds stacking viz --- sklearn/_display_estimator.py | 15 +++----- sklearn/base.py | 2 +- sklearn/ensemble/_stacking.py | 22 ++++++++++++ sklearn/tests/test_display_estimator.py | 47 +++++++++++++++++++------ 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index db08ea66dba0d..5e1b09f5fb294 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -20,7 +20,9 @@ def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, str): + if isinstance(estimator, _EstHTMLInfo): + return estimator + elif isinstance(estimator, str): return _EstHTMLInfo('single', estimator, estimator, estimator) elif estimator is None: return _EstHTMLInfo('single', estimator, 'None', 'None') @@ -34,13 +36,6 @@ def _type_of_html_estimator(estimator): return estimator._sk_repr_html() -def _estimator_details(estimator): - """Replace newlines to allow for css content: attr(...) to properly - display estimator details. - """ - return str(estimator).replace('\n', ' ') - - def _write_label_html(out, name, name_details, outer_class="sk-label-container", inner_class="sk-label", @@ -81,7 +76,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') if name: with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = str(estimator) _write_label_html(out, name, name_details) out.write('

') @@ -104,7 +99,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') if name: with config_context(print_changed_only=True): - name_details = _estimator_details(estimator) + name_details = str(estimator) _write_label_html(out, name, name_details) out.write('

') _write_estimator_html(out, est_html_info.estimators, diff --git a/sklearn/base.py b/sklearn/base.py index 70f6cb7914dca..77832cfc3e0ab 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -418,7 +418,7 @@ def _repr_html_(self): def _sk_repr_html(self): return _EstHTMLInfo('single', self, self.__class__.__name__, - str(self).replace('\n', ' ')) + str(self)) class ClassifierMixin: diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index ba817613523f6..18976e4b6a7b8 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,6 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor +from .._display_estimator import _EstHTMLInfo from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -232,6 +233,13 @@ def predict(self, X, **predict_params): self.transform(X), **predict_params ) + def _sk_repr_html(self, final_estimator): + names, estimators = zip(*self.estimators) + parallel = _EstHTMLInfo('parallel', estimators, names, None) + serial = _EstHTMLInfo('serial', (parallel, final_estimator), + ('', ''), None) + return _EstHTMLInfo('single-meta', serial, str(self), None) + class StackingClassifier(ClassifierMixin, _BaseStacking): """Stack of estimators with a final classifier. @@ -494,6 +502,13 @@ def transform(self, X): """ return self._transform(X) + def _sk_repr_html(self): + if self.final_estimator is None: + final_estimator = LogisticRegression() + else: + final_estimator = self.final_estimator + return super()._sk_repr_html(final_estimator) + class StackingRegressor(RegressorMixin, _BaseStacking): """Stack of estimators with a final regressor. @@ -662,3 +677,10 @@ def transform(self, X): Prediction outputs for each estimator. """ return self._transform(X) + + def _sk_repr_html(self): + if self.final_estimator is None: + final_estimator = RidgeCV() + else: + final_estimator = self.final_estimator + return super()._sk_repr_html(final_estimator) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 0cfacd00fb98f..a9013afddeccb 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -15,22 +15,16 @@ from sklearn.feature_selection import SelectPercentile from sklearn.preprocessing import OneHotEncoder from sklearn.svm import LinearSVC +from sklearn.svm import LinearSVR +from sklearn.tree import DecisionTreeClassifier from sklearn.multiclass import OneVsOneClassifier +from sklearn.ensemble import StackingClassifier +from sklearn.ensemble import StackingRegressor from sklearn._display_estimator import _write_label_html -from sklearn._display_estimator import _estimator_details from sklearn._display_estimator import _type_of_html_estimator from sklearn._display_estimator import _estimator_repr_html -@pytest.mark.parametrize('est, expected', [ - ('None', 'None'), - ('passthrough', 'passthrough'), - ('hello\nworld', 'hello world') -]) -def test_estimator_tool_tip(est, expected): - assert expected == _estimator_details(est) - - @pytest.mark.parametrize("checked", [True, False]) def test_write_label_html(checked): name = "LogisticRegression" @@ -61,7 +55,7 @@ def test_type_of_html_estimator_single_estimator(): assert est_html_info.type == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ - assert (est_html_info.name_details == _estimator_details(est)) + assert est_html_info.name_details == str(est) def test_type_of_html_estimator_pipeline(): @@ -171,3 +165,34 @@ def test_display_estimator_ovo_classifier(): html_output = _estimator_repr_html(ovo) assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output assert "LinearSVC" in html_output + + +@pytest.mark.parametrize("final_estimator", [None, LinearSVC()]) +def test_stacking_classsifer(final_estimator): + estimators = [('mlp', MLPClassifier(alpha=0.001)), + ('tree', DecisionTreeClassifier())] + clf = StackingClassifier( + estimators=estimators, final_estimator=final_estimator) + + html_output = _estimator_repr_html(clf) + + assert "('mlp', MLPClassifier(alpha=0.001)" in html_output + if final_estimator is None: + assert "LogisticRegression()" in html_output + else: + assert final_estimator.__class__.__name__ in html_output + + +@pytest.mark.parametrize("final_estimator", [None, LinearSVR()]) +def test_stacking_regressor(final_estimator): + reg = StackingRegressor( + estimators=[('svr', LinearSVR())], final_estimator=final_estimator) + + html_output = _estimator_repr_html(reg) + + assert "('svr', LinearSVR()" in html_output + print(html_output) + if final_estimator is None: + assert "RidgeCV" in html_output + else: + assert final_estimator.__class__.__name__ in html_output From 3811190df4782fe499425ac4026e8b273fc948bd Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 16:53:58 -0400 Subject: [PATCH 35/81] ENH Better viz --- sklearn/_display_estimator.py | 122 +++++++++++++----------- sklearn/base.py | 6 +- sklearn/compose/_column_transformer.py | 4 +- sklearn/ensemble/_stacking.py | 11 ++- sklearn/ensemble/_voting.py | 4 +- sklearn/pipeline.py | 7 +- sklearn/tests/test_display_estimator.py | 2 +- 7 files changed, 86 insertions(+), 70 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 5e1b09f5fb294..90fed34b944fb 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,38 +1,45 @@ from sklearn._config import config_context -from collections import namedtuple from contextlib import closing from io import StringIO import uuid -_EstHTMLInfo = namedtuple('_EstHTMLInfo', - 'type, estimators, names, name_details') -# In this section, the parameters mean estimators, names, and name_details -# if type == 'single', then the parameters are single items representing the -# single estimator -# if type == 'parallel', then the paramters are list representing the -# parallel estimators -# if type == 'serial', then the parameters are list representing the serial -# estimators -# if type == 'single-meta', then parameters represent the wrapped estimator +class _EstHTMLBlock: + """HTML Representation of Estimator + + If type == 'single', then the parameters are single items representing the + single estimator + if type == 'parallel', then the paramters are list representing the + parallel estimators + if type == 'serial', then the parameters are list representing the serial + estimators + if type == 'single-meta', then parameters represent the wrapped estimator + """ + def __init__(self, type, estimators, names, name_details, + dash_wrapped=True): + self.type = type + self.estimators = estimators + self.names = names + self.name_details = name_details + self.dash_wrapped = dash_wrapped def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, _EstHTMLInfo): + if isinstance(estimator, _EstHTMLBlock): return estimator elif isinstance(estimator, str): - return _EstHTMLInfo('single', estimator, estimator, estimator) + return _EstHTMLBlock('single', estimator, estimator, estimator) elif estimator is None: - return _EstHTMLInfo('single', estimator, 'None', 'None') + return _EstHTMLBlock('single', estimator, 'None', 'None') # looks like a meta estimator elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLInfo('single-meta', wrapped_estimator, wrapped_name, - None) + return _EstHTMLBlock('single-meta', wrapped_estimator, wrapped_name, + None) return estimator._sk_repr_html() @@ -41,9 +48,8 @@ def _write_label_html(out, name, name_details, inner_class="sk-label", checked=False): """Write labeled html with or without a dropdown with named details""" - out.write( - f'

' - f'

') + out.write(f'

' + f'

') if name_details is not None: checked_str = 'checked' if checked else '' @@ -59,58 +65,67 @@ def _write_label_html(out, name, name_details, out.write('

') # outer_class inner_class +def _write_named_label_html(out, estimator, name): + if not name or isinstance(estimator, _EstHTMLBlock): + return + with config_context(print_changed_only=True): + name_details = str(estimator) + _write_label_html(out, name, name_details) + + +def _write_sk_item(out, dash_wrapped=True): + dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" + out.write(f'

') + + def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): - est_html_info = _type_of_html_estimator(estimator) + est_block = _type_of_html_estimator(estimator) + + if est_block.type == 'serial': + _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) - if est_html_info.type == 'serial': out.write('

') - est_infos = zip(est_html_info.estimators, est_html_info.names) + est_infos = zip(est_block.estimators, est_block.names) for est, name in est_infos: + if name and not isinstance(est, _EstHTMLBlock): + name = f"{name}: {est.__class__.__name__}" _write_estimator_html(out, est, name) - out.write('

') # sk-serial + out.write('

') # sk-serial sk-item - elif est_html_info.type == 'parallel': - out.write('

') - if name: - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) + elif est_block.type == 'parallel': + _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) out.write('

') - if est_html_info.name_details is None: - name_details = (None,) * len(est_html_info.estimators) + if est_block.name_details is None: + name_details = (None,) * len(est_block.estimators) else: - name_details = est_html_info.name_details + name_details = est_block.name_details - est_infos = zip(est_html_info.estimators, est_html_info.names, - name_details) + est_infos = zip(est_block.estimators, est_block.names, name_details) for est, name, name_details in est_infos: out.write('

') _write_label_html(out, name, name_details) out.write('

') _write_estimator_html(out, est, '') out.write('

') # sk-parallel-item sk-serial - out.write('

') # sk-parallel sk-serial-item + out.write('

') # sk-parallel sk-item - elif est_html_info.type == 'single-meta': - out.write('

') - if name: - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) + elif est_block.type == 'single-meta': + _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + _write_named_label_html(out, estimator, name) out.write('

') - _write_estimator_html(out, est_html_info.estimators, - est_html_info.names) + _write_estimator_html(out, est_block.estimators, est_block.names) + # sk-parallel sk-parallel-item sk-item out.write('

') - elif est_html_info.type == 'single': - _write_label_html(out, est_html_info.names, - est_html_info.name_details, - outer_class="sk-serial-item", - inner_class="sk-estimator", + elif est_block.type == 'single': + _write_label_html(out, est_block.names, est_block.name_details, + outer_class="sk-item", inner_class="sk-estimator", checked=first_call) @@ -199,11 +214,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): display: flex; flex-direction: column; align-items: center; - float: left; background-color: white; } - -div.sk-serial-item { +div.sk-item { z-index: 1; } div.sk-parallel { @@ -231,9 +244,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): } div.sk-dashed-wrapped { border: 1px dashed gray; - margin: 0 0.2em 0.2em 0.2em; + margin: 0.2em; box-sizing: border-box; - padding-bottom: 0.2em; + padding-bottom: 0.1em; background-color: white; } div.sk-label label { @@ -244,8 +257,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): line-height: 1.2em; } div.sk-label-container { + position: relative; + z-index: 2; text-align: center; - z-index: 1; } div.sk-container { display: flex; diff --git a/sklearn/base.py b/sklearn/base.py index 77832cfc3e0ab..c0aeed9f620fe 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,7 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html -from ._display_estimator import _EstHTMLInfo +from ._display_estimator import _EstHTMLBlock _DEFAULT_TAGS = { 'non_deterministic': False, @@ -417,8 +417,8 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - return _EstHTMLInfo('single', self, self.__class__.__name__, - str(self)) + return _EstHTMLBlock('single', self, self.__class__.__name__, + str(self)) class ClassifierMixin: diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index dbba05ff21078..c9462bc6cd58e 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -633,7 +633,7 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _EstHTMLInfo('parallel', transformers, names, name_details) + return _EstHTMLBlock('parallel', transformers, names, name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 18976e4b6a7b8..67da335566780 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -235,10 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLInfo('parallel', estimators, names, None) - serial = _EstHTMLInfo('serial', (parallel, final_estimator), - ('', ''), None) - return _EstHTMLInfo('single-meta', serial, str(self), None) + parallel = _EstHTMLBlock('parallel', estimators, names, None, + dash_wrapped=False) + serial = _EstHTMLBlock('serial', (parallel, final_estimator), + ('', ''), None, dash_wrapped=False) + return _EstHTMLBlock('single-meta', serial, str(self), None) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index cfaedfb18edc2..f87bdd6c4151a 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,7 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError -from .._display_estimator import _EstHTMLInfo +from .._display_estimator import _EstHTMLBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLInfo('parallel', estimators, names, None) + return _EstHTMLBlock('parallel', estimators, names, None) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 3be08e32ad31c..b299d36f93887 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from ._display_estimator import _EstHTMLInfo +from ._display_estimator import _EstHTMLBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -636,7 +636,8 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.steps) - return _EstHTMLInfo('serial', estimators, names, None) + return _EstHTMLBlock('serial', estimators, names, None, + dash_wrapped=False) def _name_estimators(estimators): @@ -1017,7 +1018,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLInfo('parallel', transformers, names, None) + return _EstHTMLBlock('parallel', transformers, names, None) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index a9013afddeccb..7c35da0a6268e 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -148,7 +148,7 @@ def test_display_estimator_pipeline(): '

SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
       '

', '(\'one-hot\', OneHotEncoder', - 'preprocessor', + 'preprocessor: ColumnTransformer', '

[\'a\', \'b\', \'c\', \'d\', \'e\']

', '

LogisticRegression(random_state=1)

', '

SelectPercentile()

', From 9df8a4b970b2acf1aef6173c37753ab49d4ad1b1 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 18:56:23 -0400 Subject: [PATCH 36/81] CLN Improves code quality --- sklearn/_display_estimator.py | 64 ++++++++++++++----------- sklearn/ensemble/_stacking.py | 6 +-- sklearn/ensemble/_voting.py | 5 +- sklearn/pipeline.py | 17 +++++-- sklearn/tests/test_display_estimator.py | 12 +++-- 5 files changed, 66 insertions(+), 38 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 90fed34b944fb..78dfe66b879e0 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -7,22 +7,44 @@ class _EstHTMLBlock: """HTML Representation of Estimator - If type == 'single', then the parameters are single items representing the - single estimator - if type == 'parallel', then the paramters are list representing the - parallel estimators - if type == 'serial', then the parameters are list representing the serial - estimators - if type == 'single-meta', then parameters represent the wrapped estimator + Parameters + ---------- + type : {'serial', 'parallel', 'single'} + Type of HTML block + + estimators : list of estimators or _EstHTMLBlock + or a single estimator + If type is in ('parallel', 'serial'), then `estimators` is a list of + estimators. + If type == 'single', then `estimators` is a single estimator. + + names : list of str + If type in ('parallel', 'serial'), then `names` corresponds to + estimators + If type is 'single', then `names` is a single string corresponding to + the single estimator. + + name_details : list of str, str, or None, default=None + If type == 'parallel', then `name_details` corresponds to `names`. + If type == 'single', then `name_details` is a single string + corresponding to the single estimator. + `name_details` is not used when type == 'single'. + + dash_wrapped : bool, default=True + If true, wrapped HTML element will be wrapped with a dashed boarder. """ - def __init__(self, type, estimators, names, name_details, + def __init__(self, type, estimators, names, name_details=None, dash_wrapped=True): self.type = type self.estimators = estimators self.names = names - self.name_details = name_details self.dash_wrapped = dash_wrapped + if self.type == 'parallel' and name_details is None: + name_details = (None, ) * len(names) + + self.name_details = name_details + def _type_of_html_estimator(estimator): """Generate information about how to display an estimator. @@ -38,8 +60,7 @@ def _type_of_html_estimator(estimator): hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLBlock('single-meta', wrapped_estimator, wrapped_name, - None) + return _EstHTMLBlock('serial', [wrapped_estimator], [wrapped_name]) return estimator._sk_repr_html() @@ -66,6 +87,7 @@ def _write_label_html(out, name, name_details, def _write_named_label_html(out, estimator, name): + """Write label with details based on name""" if not name or isinstance(estimator, _EstHTMLBlock): return with config_context(print_changed_only=True): @@ -74,6 +96,7 @@ def _write_named_label_html(out, estimator, name): def _write_sk_item(out, dash_wrapped=True): + """Write sk-item with or without sk-dashed-wrapped""" dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" out.write(f'

') @@ -91,8 +114,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') est_infos = zip(est_block.estimators, est_block.names) for est, name in est_infos: - if name and not isinstance(est, _EstHTMLBlock): - name = f"{name}: {est.__class__.__name__}" _write_estimator_html(out, est, name) out.write('

') # sk-serial sk-item @@ -101,12 +122,8 @@ def _write_estimator_html(out, estimator, name, first_call=False): _write_named_label_html(out, estimator, name) out.write('

') - if est_block.name_details is None: - name_details = (None,) * len(est_block.estimators) - else: - name_details = est_block.name_details - - est_infos = zip(est_block.estimators, est_block.names, name_details) + est_infos = zip(est_block.estimators, est_block.names, + est_block.name_details) for est, name, name_details in est_infos: out.write('

') _write_label_html(out, name, name_details) @@ -115,14 +132,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') # sk-parallel-item sk-serial out.write('

') # sk-parallel sk-item - elif est_block.type == 'single-meta': - _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) - _write_named_label_html(out, estimator, name) - out.write('

') - _write_estimator_html(out, est_block.estimators, est_block.names) - # sk-parallel sk-parallel-item sk-item - out.write('

') - elif est_block.type == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", @@ -248,6 +257,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): box-sizing: border-box; padding-bottom: 0.1em; background-color: white; + position: relative; } div.sk-label label { font-family: monospace; diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 67da335566780..750935bff8b8c 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLBlock('parallel', estimators, names, None, + parallel = _EstHTMLBlock('parallel', estimators, names, dash_wrapped=False) serial = _EstHTMLBlock('serial', (parallel, final_estimator), - ('', ''), None, dash_wrapped=False) - return _EstHTMLBlock('single-meta', serial, str(self), None) + ('', ''), dash_wrapped=False) + return _EstHTMLBlock('serial', [serial], [str(self)]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index f87bdd6c4151a..7391096e17e32 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,6 +32,7 @@ from ..utils.validation import column_or_1d from ..exceptions import NotFittedError from .._display_estimator import _EstHTMLBlock +from .._config import config_context class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +107,9 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLBlock('parallel', estimators, names, None) + with config_context(print_changed_only=True): + name_details = [str(trans) for trans in estimators] + return _EstHTMLBlock('parallel', estimators, names, name_details) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index b299d36f93887..7bf00b4131657 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,6 +19,7 @@ from .base import clone, TransformerMixin from ._display_estimator import _EstHTMLBlock +from ._config import config_context from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -635,9 +636,15 @@ def n_features_in_(self): return self.steps[0][1].n_features_in_ def _sk_repr_html(self): - names, estimators = zip(*self.steps) - return _EstHTMLBlock('serial', estimators, names, None, - dash_wrapped=False) + _, estimators = zip(*self.steps) + + def _get_name(name, est): + if est is None or est == 'passthrough': + return f'{name}: passthrough' + # Is an estimator + return f'{name}: {est.__class__.__name__}' + names = [_get_name(name, est) for name, est in self.steps] + return _EstHTMLBlock('serial', estimators, names, dash_wrapped=False) def _name_estimators(estimators): @@ -1018,7 +1025,9 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLBlock('parallel', transformers, names, None) + with config_context(print_changed_only=True): + name_details = [str(trans) for trans in transformers] + return _EstHTMLBlock('parallel', transformers, names, name_details) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 7c35da0a6268e..720ffb860d8f4 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -61,12 +61,17 @@ def test_type_of_html_estimator_single_estimator(): def test_type_of_html_estimator_pipeline(): pipe = Pipeline([ ('imputer', SimpleImputer()), + ('do_nothing', 'passthrough'), + ('do_nothing_more', None), ('classifier', LogisticRegression()) ]) est_html_info = _type_of_html_estimator(pipe) assert est_html_info.type == 'serial' assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) - assert est_html_info.names == ('imputer', 'classifier') + assert est_html_info.names == ['imputer: SimpleImputer', + 'do_nothing: passthrough', + 'do_nothing_more: passthrough', + 'classifier: LogisticRegression'] assert est_html_info.name_details is None @@ -79,7 +84,7 @@ def test_type_of_html_estimator_feature_union(): assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) - assert est_html_info.name_details is None + assert est_html_info.name_details == ['PCA()', 'TruncatedSVD()'] def test_type_of_html_estimator_voting(): @@ -92,7 +97,8 @@ def test_type_of_html_estimator_voting(): assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') - assert est_html_info.name_details is None + assert est_html_info.name_details == ['LogisticRegression()', + 'MLPClassifier()'] def test_type_of_html_estimator_column_transformer(): From 50ee0b4e635214750e74033cd942b3309f35babb Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 20:10:12 -0400 Subject: [PATCH 37/81] STY Update --- sklearn/ensemble/_voting.py | 5 +---- sklearn/pipeline.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 7391096e17e32..5ce24ccae41ea 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -32,7 +32,6 @@ from ..utils.validation import column_or_1d from ..exceptions import NotFittedError from .._display_estimator import _EstHTMLBlock -from .._config import config_context class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -107,9 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - with config_context(print_changed_only=True): - name_details = [str(trans) for trans in estimators] - return _EstHTMLBlock('parallel', estimators, names, name_details) + return _EstHTMLBlock('parallel', estimators, names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7bf00b4131657..7ad7dc87b05af 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -19,7 +19,6 @@ from .base import clone, TransformerMixin from ._display_estimator import _EstHTMLBlock -from ._config import config_context from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -1025,9 +1024,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - with config_context(print_changed_only=True): - name_details = [str(trans) for trans in transformers] - return _EstHTMLBlock('parallel', transformers, names, name_details) + return _EstHTMLBlock('parallel', transformers, names) def make_union(*transformers, **kwargs): From 212ba2112f82b261262be41dbd6c341f0d520099 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Tue, 17 Mar 2020 21:55:40 -0400 Subject: [PATCH 38/81] TST Fix --- sklearn/tests/test_display_estimator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 720ffb860d8f4..047ffd75d16d5 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -84,7 +84,7 @@ def test_type_of_html_estimator_feature_union(): assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) - assert est_html_info.name_details == ['PCA()', 'TruncatedSVD()'] + assert est_html_info.name_details == (None, None) def test_type_of_html_estimator_voting(): @@ -97,8 +97,7 @@ def test_type_of_html_estimator_voting(): assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') - assert est_html_info.name_details == ['LogisticRegression()', - 'MLPClassifier()'] + assert est_html_info.name_details == (None, None) def test_type_of_html_estimator_column_transformer(): From 2a81f160c4e176e6f3fd0c370aca2b12db315046 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 13:38:37 -0400 Subject: [PATCH 39/81] WIP --- doc/modules/compose.rst | 2 +- doc/sphinxext/display_est_repr_html.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index aeb8a553737f9..366da54bd7d67 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -545,7 +545,7 @@ many estimators. An example of the HTML output can been seen below. .. note:: The HTML output of this code snippet can only been seen on the HTML version - of the docs. + of the documentation. .. display_estimator_repr_html:: diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index dc72e71390c6f..9a1cc5a7ec571 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -5,9 +5,10 @@ from docutils.parsers.rst import Directive from docutils import nodes from io import StringIO +from sphinx import addnodes -class DisplayReprEstimator(Directive): +class DisplayEstimatorRepr(Directive): "Execute Python code and includes stdout as HTML" has_content = True @@ -38,10 +39,20 @@ def run(self): input_code = nodes.literal_block(code, code) input_code['language'] = 'python' output.append(input_code) - code_results = nodes.raw('', code_results, format='html') - output.append(code_results) + + onlynode_html = addnodes.only(expr='html') + onlynode_html += nodes.raw('', code_results, format='html') + output.append(onlynode_html) + + onlynode_latex = addnodes.only(expr='latex') + onlynode_latex += nodes.raw('', code_results, format='html') + onlynode_latex += nodes.note('The HTML output of this code snippet ' + 'can only been seen on the HTML version ' + 'of the docs.') + output.append(onlynode_latex) + return output def setup(app): - app.add_directive('display_estimator_repr_html', DisplayReprEstimator) + app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) From 6c11293b2ddf430d3962056828200860c854e593 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 14:14:35 -0400 Subject: [PATCH 40/81] WIP --- sklearn/_display_estimator.py | 52 ++++++++++++------------- sklearn/base.py | 4 +- sklearn/compose/_column_transformer.py | 4 +- sklearn/ensemble/_stacking.py | 8 ++-- sklearn/ensemble/_voting.py | 4 +- sklearn/pipeline.py | 6 +-- sklearn/tests/test_display_estimator.py | 38 +++++++++--------- 7 files changed, 57 insertions(+), 59 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 78dfe66b879e0..d2b0db0792d92 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -4,63 +4,62 @@ import uuid -class _EstHTMLBlock: +class _VisualBlock: """HTML Representation of Estimator Parameters ---------- - type : {'serial', 'parallel', 'single'} - Type of HTML block + kind : {'serial', 'parallel', 'single'} + kind of HTML block - estimators : list of estimators or _EstHTMLBlock - or a single estimator - If type is in ('parallel', 'serial'), then `estimators` is a list of + estimators : list of estimators or `_VisualBlock`s or a single estimator + If kind is in ('parallel', 'serial'), then `estimators` is a list of estimators. - If type == 'single', then `estimators` is a single estimator. + If kind == 'single', then `estimators` is a single estimator. names : list of str - If type in ('parallel', 'serial'), then `names` corresponds to + If kind in ('parallel', 'serial'), then `names` corresponds to estimators - If type is 'single', then `names` is a single string corresponding to + If kind is 'single', then `names` is a single string corresponding to the single estimator. name_details : list of str, str, or None, default=None - If type == 'parallel', then `name_details` corresponds to `names`. - If type == 'single', then `name_details` is a single string + If kind == 'parallel', then `name_details` corresponds to `names`. + If kind == 'single', then `name_details` is a single string corresponding to the single estimator. - `name_details` is not used when type == 'single'. + `name_details` is not used when kind == 'single'. dash_wrapped : bool, default=True - If true, wrapped HTML element will be wrapped with a dashed boarder. + If true, wrapped HTML element will be wrapped with a dashed border. """ - def __init__(self, type, estimators, names, name_details=None, + def __init__(self, kind, estimators, names, name_details=None, dash_wrapped=True): - self.type = type + self.kind = kind self.estimators = estimators self.names = names self.dash_wrapped = dash_wrapped - if self.type == 'parallel' and name_details is None: + if self.kind == 'parallel' and name_details is None: name_details = (None, ) * len(names) self.name_details = name_details -def _type_of_html_estimator(estimator): +def _get_visual_block(estimator): """Generate information about how to display an estimator. """ - if isinstance(estimator, _EstHTMLBlock): + if isinstance(estimator, _VisualBlock): return estimator elif isinstance(estimator, str): - return _EstHTMLBlock('single', estimator, estimator, estimator) + return _VisualBlock('single', estimator, estimator, estimator) elif estimator is None: - return _EstHTMLBlock('single', estimator, 'None', 'None') + return _VisualBlock('single', estimator, 'None', 'None') # looks like a meta estimator elif (hasattr(estimator, 'estimator') and hasattr(getattr(estimator, 'estimator'), 'get_params')): wrapped_estimator = getattr(estimator, 'estimator') wrapped_name = wrapped_estimator.__class__.__name__ - return _EstHTMLBlock('serial', [wrapped_estimator], [wrapped_name]) + return _VisualBlock('serial', [wrapped_estimator], [wrapped_name]) return estimator._sk_repr_html() @@ -88,7 +87,7 @@ def _write_label_html(out, name, name_details, def _write_named_label_html(out, estimator, name): """Write label with details based on name""" - if not name or isinstance(estimator, _EstHTMLBlock): + if not name or isinstance(estimator, _VisualBlock): return with config_context(print_changed_only=True): name_details = str(estimator) @@ -105,9 +104,9 @@ def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): - est_block = _type_of_html_estimator(estimator) + est_block = _get_visual_block(estimator) - if est_block.type == 'serial': + if est_block.kind == 'serial': _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) _write_named_label_html(out, estimator, name) @@ -117,7 +116,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): _write_estimator_html(out, est, name) out.write('

') # sk-serial sk-item - elif est_block.type == 'parallel': + elif est_block.kind == 'parallel': _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) _write_named_label_html(out, estimator, name) out.write('

') @@ -132,7 +131,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') # sk-parallel-item sk-serial out.write('

') # sk-parallel sk-item - elif est_block.type == 'single': + elif est_block.kind == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", checked=first_call) @@ -276,7 +275,6 @@ def _write_estimator_html(out, estimator, name, first_call=False): flex-direction: column; align-items: flex-start; position: relative; - float: left; } """.replace(' ', '').replace('\n', '') # noqa diff --git a/sklearn/base.py b/sklearn/base.py index c0aeed9f620fe..fb328076e4f8f 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -21,7 +21,7 @@ from .utils.validation import check_X_y from .utils.validation import check_array from ._display_estimator import _estimator_repr_html -from ._display_estimator import _EstHTMLBlock +from ._display_estimator import _VisualBlock _DEFAULT_TAGS = { 'non_deterministic': False, @@ -417,7 +417,7 @@ def _repr_html_(self): return _estimator_repr_html(self) def _sk_repr_html(self): - return _EstHTMLBlock('single', self, self.__class__.__name__, + return _VisualBlock('single', self, self.__class__.__name__, str(self)) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index c9462bc6cd58e..ac24c14262538 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -15,7 +15,7 @@ from joblib import Parallel, delayed from ..base import clone, TransformerMixin -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock from ..pipeline import _fit_transform_one, _transform_one, _name_estimators from ..preprocessing import FunctionTransformer from ..utils import Bunch @@ -633,7 +633,7 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _EstHTMLBlock('parallel', transformers, names, name_details) + return _VisualBlock('parallel', transformers, names, name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 750935bff8b8c..6a2f8adad5c8e 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -13,7 +13,7 @@ from ..base import clone from ..base import ClassifierMixin, RegressorMixin, TransformerMixin from ..base import is_classifier, is_regressor -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock from ._base import _fit_single_estimator from ._base import _BaseHeterogeneousEnsemble @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _EstHTMLBlock('parallel', estimators, names, + parallel = _VisualBlock('parallel', estimators, names, dash_wrapped=False) - serial = _EstHTMLBlock('serial', (parallel, final_estimator), + serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), dash_wrapped=False) - return _EstHTMLBlock('serial', [serial], [str(self)]) + return _VisualBlock('serial', [serial], [str(self)]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 5ce24ccae41ea..8665dea15884e 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -31,7 +31,7 @@ from ..utils.multiclass import check_classification_targets from ..utils.validation import column_or_1d from ..exceptions import NotFittedError -from .._display_estimator import _EstHTMLBlock +from .._display_estimator import _VisualBlock class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble): @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _EstHTMLBlock('parallel', estimators, names) + return _VisualBlock('parallel', estimators, names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 7ad7dc87b05af..37a7fc9c406a4 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -18,7 +18,7 @@ from joblib import Parallel, delayed from .base import clone, TransformerMixin -from ._display_estimator import _EstHTMLBlock +from ._display_estimator import _VisualBlock from .utils.metaestimators import if_delegate_has_method from .utils import Bunch, _print_elapsed_time from .utils.validation import check_memory @@ -643,7 +643,7 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] - return _EstHTMLBlock('serial', estimators, names, dash_wrapped=False) + return _VisualBlock('serial', estimators, names, dash_wrapped=False) def _name_estimators(estimators): @@ -1024,7 +1024,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _EstHTMLBlock('parallel', transformers, names) + return _VisualBlock('parallel', transformers, names) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index 047ffd75d16d5..aa36e469f277c 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -21,7 +21,7 @@ from sklearn.ensemble import StackingClassifier from sklearn.ensemble import StackingRegressor from sklearn._display_estimator import _write_label_html -from sklearn._display_estimator import _type_of_html_estimator +from sklearn._display_estimator import _get_visual_block from sklearn._display_estimator import _estimator_repr_html @@ -41,32 +41,32 @@ def test_write_label_html(checked): @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) -def test_type_of_html_estimator_single_str_none(est): - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' +def test_get_visual_block_single_str_none(est): + est_html_info = _get_visual_block(est) + assert est_html_info.kind == 'single' assert est_html_info.estimators == est assert est_html_info.names == str(est) assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_single_estimator(): +def test_get_visual_block_single_estimator(): est = LogisticRegression(C=10.0) - est_html_info = _type_of_html_estimator(est) - assert est_html_info.type == 'single' + est_html_info = _get_visual_block(est) + assert est_html_info.kind == 'single' assert est_html_info.estimators == est assert est_html_info.names == est.__class__.__name__ assert est_html_info.name_details == str(est) -def test_type_of_html_estimator_pipeline(): +def test_get_visual_block_pipeline(): pipe = Pipeline([ ('imputer', SimpleImputer()), ('do_nothing', 'passthrough'), ('do_nothing_more', None), ('classifier', LogisticRegression()) ]) - est_html_info = _type_of_html_estimator(pipe) - assert est_html_info.type == 'serial' + est_html_info = _get_visual_block(pipe) + assert est_html_info.kind == 'serial' assert est_html_info.estimators == tuple(step[1] for step in pipe.steps) assert est_html_info.names == ['imputer: SimpleImputer', 'do_nothing: passthrough', @@ -75,38 +75,38 @@ def test_type_of_html_estimator_pipeline(): assert est_html_info.name_details is None -def test_type_of_html_estimator_feature_union(): +def test_get_visual_block_feature_union(): f_union = FeatureUnion([ ('pca', PCA()), ('svd', TruncatedSVD()) ]) - est_html_info = _type_of_html_estimator(f_union) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(f_union) + assert est_html_info.kind == 'parallel' assert est_html_info.names == ('pca', 'svd') assert est_html_info.estimators == tuple( trans[1] for trans in f_union.transformer_list) assert est_html_info.name_details == (None, None) -def test_type_of_html_estimator_voting(): +def test_get_visual_block_voting(): clf = VotingClassifier([ ('log_reg', LogisticRegression()), ('mlp', MLPClassifier()) ]) - est_html_info = _type_of_html_estimator(clf) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(clf) + assert est_html_info.kind == 'parallel' assert est_html_info.estimators == tuple(trans[1] for trans in clf.estimators) assert est_html_info.names == ('log_reg', 'mlp') assert est_html_info.name_details == (None, None) -def test_type_of_html_estimator_column_transformer(): +def test_get_visual_block_column_transformer(): ct = ColumnTransformer([ ('pca', PCA(), ['num1', 'num2']), ('svd', TruncatedSVD, [0, 3]) ]) - est_html_info = _type_of_html_estimator(ct) - assert est_html_info.type == 'parallel' + est_html_info = _get_visual_block(ct) + assert est_html_info.kind == 'parallel' assert est_html_info.estimators == tuple( trans[1] for trans in ct.transformers) assert est_html_info.names == ('pca', 'svd') From da83a68bd9ac4ad95a9a11fd087b6a43ed9f5c1d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 18 Mar 2020 14:16:29 -0400 Subject: [PATCH 41/81] CLN Address comments --- sklearn/_display_estimator.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index d2b0db0792d92..258487d521f45 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -94,12 +94,6 @@ def _write_named_label_html(out, estimator, name): _write_label_html(out, name, name_details) -def _write_sk_item(out, dash_wrapped=True): - """Write sk-item with or without sk-dashed-wrapped""" - dash_cls = " sk-dashed-wrapped" if dash_wrapped else "" - out.write(f'

') - - def _write_estimator_html(out, estimator, name, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ @@ -107,7 +101,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): est_block = _get_visual_block(estimator) if est_block.kind == 'serial': - _write_sk_item(out, dash_wrapped=first_call or est_block.dash_wrapped) + dashed_wrapped = first_call or est_block.dash_wrapped + dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" + out.write(f'

') + _write_named_label_html(out, estimator, name) out.write('

') @@ -117,7 +114,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): out.write('

') # sk-serial sk-item elif est_block.kind == 'parallel': - _write_sk_item(out, dash_wrapped=est_block.dash_wrapped) + dashed_wrapped = first_call or est_block.dash_wrapped + dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" + out.write(f'

') + _write_named_label_html(out, estimator, name) out.write('

') From 55a20e7ee3cad3e66949d16396ee31f76c2d3061 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 14:06:54 -0400 Subject: [PATCH 42/81] ENH Update sphinx extension --- doc/modules/compose.rst | 5 ----- doc/sphinxext/display_est_repr_html.py | 25 ++++++++++++------------- sklearn/ensemble/_stacking.py | 6 +++--- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 366da54bd7d67..4f82d0bd8da8a 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -542,11 +542,6 @@ By default, estimators are displayed with a HTML representation when shown in a jupyter notebook. This can be useful to diagnose or visualize a Pipeline with many estimators. An example of the HTML output can been seen below. -.. note:: - - The HTML output of this code snippet can only been seen on the HTML version - of the documentation. - .. display_estimator_repr_html:: from sklearn.compose import ColumnTransformer diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index 9a1cc5a7ec571..d93b783d5a1d4 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -28,8 +28,7 @@ def execute(self, code): exec(code) sys.stdout, sys.stderr = orig_stdout, orig_stderr - return "".join(['

', - output.getvalue(), err.getvalue(), "

"]) + return f"{output.getvalue()}{err.getvalue()}" def run(self): output = [] @@ -40,17 +39,17 @@ def run(self): input_code['language'] = 'python' output.append(input_code) - onlynode_html = addnodes.only(expr='html') - onlynode_html += nodes.raw('', code_results, format='html') - output.append(onlynode_html) - - onlynode_latex = addnodes.only(expr='latex') - onlynode_latex += nodes.raw('', code_results, format='html') - onlynode_latex += nodes.note('The HTML output of this code snippet ' - 'can only been seen on the HTML version ' - 'of the docs.') - output.append(onlynode_latex) - + html_node = nodes.raw('', code_results, format='html') + output.append(html_node) + + code_results_latex = r""" + \begin{sphinxadmonition}{note}{Note:} + The HTML output of this code snippet can only been seen on the HTML + version of the documentation + \end{sphinxadmonition} + """ + latex_node = nodes.raw('', code_results_latex, format='latex') + output.append(latex_node) return output diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 6a2f8adad5c8e..1794de018189b 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -236,9 +236,9 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) parallel = _VisualBlock('parallel', estimators, names, - dash_wrapped=False) - serial = _VisualBlock('serial', (parallel, final_estimator), - ('', ''), dash_wrapped=False) + dash_wrapped=False) + serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), + dash_wrapped=False) return _VisualBlock('serial', [serial], [str(self)]) From 169964d436cfdfe7c549a314546f299cf280c177 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 15:57:02 -0400 Subject: [PATCH 43/81] WIP --- doc/modules/compose.rst | 2 +- doc/sphinxext/display_est_repr_html.py | 6 +++++- .../compose/plot_column_transformer_mixed_types.py | 11 +++++++---- sklearn/_display_estimator.py | 4 +--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 4f82d0bd8da8a..81e219ee1a7d9 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -540,7 +540,7 @@ Visualizing Composite Estimators By default, estimators are displayed with a HTML representation when shown in a jupyter notebook. This can be useful to diagnose or visualize a Pipeline with -many estimators. An example of the HTML output can been seen below. +many estimators. An example of the HTML output can be seen below. .. display_estimator_repr_html:: diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index d93b783d5a1d4..b856d1045d654 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -5,7 +5,6 @@ from docutils.parsers.rst import Directive from docutils import nodes from io import StringIO -from sphinx import addnodes class DisplayEstimatorRepr(Directive): @@ -55,3 +54,8 @@ def run(self): def setup(app): app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 037ff3fbc147a..b1c1efd96dab0 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -59,10 +59,6 @@ # * ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``; # * ``sex``: categories encoded as strings ``{'female', 'male'}``; # * ``pclass``: ordinal integers ``{1, 2, 3}``. -# -# We create the preprocessing pipelines for both numeric and categorical data. -# A HTML visualization of the created pipeline can be found in -# :ref:`visualizing_composite_estimators`. numeric_features = ['age', 'fare'] numeric_transformer = Pipeline(steps=[ @@ -89,6 +85,13 @@ clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test)) +############################################################################### +# HTML representation of ``Pipeline`` +############################################################################### +# When the ``Pipeline`` is printed out in a jupyter notebook an HTML +# representation of the estimator is displayed as follows: +clf + ############################################################################### # Use ``ColumnTransformer`` by selecting column by data types ############################################################################### diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 258487d521f45..c0f1c59e0ee49 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -271,9 +271,7 @@ def _write_estimator_html(out, estimator, name, first_call=False): text-align: center; } div.sk-container { - display: flex; - flex-direction: column; - align-items: flex-start; + display: inline-block; position: relative; } """.replace(' ', '').replace('\n', '') # noqa From 1e1bd1b44052a78601acc63eda5c1e93373643d2 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 20 Mar 2020 15:57:42 -0400 Subject: [PATCH 44/81] REV Less diffs --- examples/compose/plot_column_transformer_mixed_types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index b1c1efd96dab0..920ef37bfb333 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -59,6 +59,8 @@ # * ``embarked``: categories encoded as strings ``{'C', 'S', 'Q'}``; # * ``sex``: categories encoded as strings ``{'female', 'male'}``; # * ``pclass``: ordinal integers ``{1, 2, 3}``. +# +# We create the preprocessing pipelines for both numeric and categorical data. numeric_features = ['age', 'fare'] numeric_transformer = Pipeline(steps=[ From ce0fc2c3c415d44127b935046567a39c34b6fe41 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sat, 21 Mar 2020 21:18:59 -0400 Subject: [PATCH 45/81] WIP --- sklearn/_display_estimator.py | 82 +++++++++++++++---------- sklearn/base.py | 4 -- sklearn/compose/_column_transformer.py | 3 +- sklearn/ensemble/_stacking.py | 6 +- sklearn/ensemble/_voting.py | 2 +- sklearn/pipeline.py | 5 +- sklearn/tests/test_display_estimator.py | 1 + 7 files changed, 58 insertions(+), 45 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index c0f1c59e0ee49..6a5caffcdea30 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,5 +1,6 @@ from sklearn._config import config_context from contextlib import closing +from contextlib import suppress from io import StringIO import uuid @@ -32,37 +33,22 @@ class _VisualBlock: dash_wrapped : bool, default=True If true, wrapped HTML element will be wrapped with a dashed border. """ - def __init__(self, kind, estimators, names, name_details=None, + def __init__(self, kind, estimators, *, names=None, name_details=None, dash_wrapped=True): self.kind = kind self.estimators = estimators - self.names = names self.dash_wrapped = dash_wrapped - if self.kind == 'parallel' and name_details is None: - name_details = (None, ) * len(names) + if self.kind in ('parallel', 'serial'): + if names is None: + names = (None, ) * len(estimators) + if name_details is None: + name_details = (None, ) * len(estimators) + self.names = names self.name_details = name_details -def _get_visual_block(estimator): - """Generate information about how to display an estimator. - """ - if isinstance(estimator, _VisualBlock): - return estimator - elif isinstance(estimator, str): - return _VisualBlock('single', estimator, estimator, estimator) - elif estimator is None: - return _VisualBlock('single', estimator, 'None', 'None') - # looks like a meta estimator - elif (hasattr(estimator, 'estimator') and - hasattr(getattr(estimator, 'estimator'), 'get_params')): - wrapped_estimator = getattr(estimator, 'estimator') - wrapped_name = wrapped_estimator.__class__.__name__ - return _VisualBlock('serial', [wrapped_estimator], [wrapped_name]) - return estimator._sk_repr_html() - - def _write_label_html(out, name, name_details, outer_class="sk-label-container", inner_class="sk-label", @@ -85,16 +71,36 @@ def _write_label_html(out, name, name_details, out.write('

') # outer_class inner_class -def _write_named_label_html(out, estimator, name): - """Write label with details based on name""" - if not name or isinstance(estimator, _VisualBlock): - return - with config_context(print_changed_only=True): - name_details = str(estimator) - _write_label_html(out, name, name_details) +def _get_visual_block(estimator): + """Generate information about how to display an estimator. + """ + with suppress(AttributeError): + return estimator._sk_repr_html() + + if isinstance(estimator, _VisualBlock): + return estimator + elif isinstance(estimator, str): + return _VisualBlock('single', estimator, + names=estimator, name_details=estimator) + elif estimator is None: + return _VisualBlock('single', estimator, + names='None', name_details='None') + + # check if estimator looks like a meta estimator wraps estimators + if hasattr(estimator, 'get_params'): + estimators = [] + for key, value in estimator.get_params().items(): + if '__' not in key and hasattr(value, 'get_params'): + estimators.append(value) + if len(estimators): + return _VisualBlock('parallel', estimators, names=None) + + return _VisualBlock('single', estimator, + names=estimator.__class__.__name__, + name_details=str(estimator)) -def _write_estimator_html(out, estimator, name, first_call=False): +def _write_estimator_html(out, estimator, estimator_label, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ with config_context(print_changed_only=not first_call): @@ -105,7 +111,10 @@ def _write_estimator_html(out, estimator, name, first_call=False): dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'

') - _write_named_label_html(out, estimator, name) + # write label of current if name is given + if estimator_label: + with config_context(print_changed_only=True): + _write_label_html(out, estimator_label, str(estimator)) out.write('

') est_infos = zip(est_block.estimators, est_block.names) @@ -118,14 +127,19 @@ def _write_estimator_html(out, estimator, name, first_call=False): dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'

') - _write_named_label_html(out, estimator, name) - out.write('

') + if estimator_label: + with config_context(print_changed_only=True): + _write_label_html(out, estimator_label, str(estimator)) + out.write('

') est_infos = zip(est_block.estimators, est_block.names, est_block.name_details) + for est, name, name_details in est_infos: out.write('

') - _write_label_html(out, name, name_details) + # name is associated with the parallel element + if name: + _write_label_html(out, name, name_details) out.write('

') _write_estimator_html(out, est, '') out.write('

') # sk-parallel-item sk-serial diff --git a/sklearn/base.py b/sklearn/base.py index fb328076e4f8f..22bce1cd9d6f6 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -416,10 +416,6 @@ def _validate_data(self, X, y=None, reset=True, **check_params): def _repr_html_(self): return _estimator_repr_html(self) - def _sk_repr_html(self): - return _VisualBlock('single', self, self.__class__.__name__, - str(self)) - class ClassifierMixin: """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index ac24c14262538..a501eaa145475 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -633,7 +633,8 @@ def _hstack(self, Xs): def _sk_repr_html(self): names, transformers, name_details = zip(*self.transformers) - return _VisualBlock('parallel', transformers, names, name_details) + return _VisualBlock('parallel', transformers, + names=names, name_details=name_details) def _check_X(X): diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py index 1794de018189b..09eca6f507e16 100644 --- a/sklearn/ensemble/_stacking.py +++ b/sklearn/ensemble/_stacking.py @@ -235,11 +235,11 @@ def predict(self, X, **predict_params): def _sk_repr_html(self, final_estimator): names, estimators = zip(*self.estimators) - parallel = _VisualBlock('parallel', estimators, names, + parallel = _VisualBlock('parallel', estimators, names=names, dash_wrapped=False) - serial = _VisualBlock('serial', (parallel, final_estimator), ('', ''), + serial = _VisualBlock('serial', (parallel, final_estimator), dash_wrapped=False) - return _VisualBlock('serial', [serial], [str(self)]) + return _VisualBlock('serial', [serial]) class StackingClassifier(ClassifierMixin, _BaseStacking): diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index 8665dea15884e..551d89737f12c 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -106,7 +106,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, estimators = zip(*self.estimators) - return _VisualBlock('parallel', estimators, names) + return _VisualBlock('parallel', estimators, names=names) class VotingClassifier(ClassifierMixin, _BaseVoting): diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 37a7fc9c406a4..666b703f6c04f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -643,7 +643,8 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] - return _VisualBlock('serial', estimators, names, dash_wrapped=False) + return _VisualBlock('serial', estimators, + names=names, dash_wrapped=False) def _name_estimators(estimators): @@ -1024,7 +1025,7 @@ def n_features_in_(self): def _sk_repr_html(self): names, transformers = zip(*self.transformer_list) - return _VisualBlock('parallel', transformers, names) + return _VisualBlock('parallel', transformers, names=names) def make_union(*transformers, **kwargs): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index aa36e469f277c..c5dec08964d47 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -182,6 +182,7 @@ def test_stacking_classsifer(final_estimator): html_output = _estimator_repr_html(clf) assert "('mlp', MLPClassifier(alpha=0.001)" in html_output + assert "('tree', DecisionTreeClassifier()" in html_output if final_estimator is None: assert "LogisticRegression()" in html_output else: From 93da06040fc670f508416ab18e728feaa73e4ab2 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Sun, 22 Mar 2020 14:59:16 -0400 Subject: [PATCH 46/81] CLN Address comments --- sklearn/_display_estimator.py | 56 +++++++++++++------------ sklearn/pipeline.py | 5 ++- sklearn/tests/test_display_estimator.py | 41 +++++++++++++----- 3 files changed, 64 insertions(+), 38 deletions(-) diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py index 6a5caffcdea30..1bef4f61de410 100644 --- a/sklearn/_display_estimator.py +++ b/sklearn/_display_estimator.py @@ -1,4 +1,5 @@ from sklearn._config import config_context +from sklearn._config import get_config from contextlib import closing from contextlib import suppress from io import StringIO @@ -14,24 +15,23 @@ class _VisualBlock: kind of HTML block estimators : list of estimators or `_VisualBlock`s or a single estimator - If kind is in ('parallel', 'serial'), then `estimators` is a list of + If kind != 'single', then `estimators` is a list of estimators. If kind == 'single', then `estimators` is a single estimator. names : list of str - If kind in ('parallel', 'serial'), then `names` corresponds to - estimators - If kind is 'single', then `names` is a single string corresponding to + If kind != 'single', then `names` corresponds to estimators. + If kind == 'single', then `names` is a single string corresponding to the single estimator. name_details : list of str, str, or None, default=None - If kind == 'parallel', then `name_details` corresponds to `names`. + If kind != 'single', then `name_details` corresponds to `names`. If kind == 'single', then `name_details` is a single string corresponding to the single estimator. - `name_details` is not used when kind == 'single'. dash_wrapped : bool, default=True If true, wrapped HTML element will be wrapped with a dashed border. + Only active when kind != 'single'. """ def __init__(self, kind, estimators, *, names=None, name_details=None, dash_wrapped=True): @@ -48,6 +48,9 @@ def __init__(self, kind, estimators, *, names=None, name_details=None, self.names = names self.name_details = name_details + def _sk_repr_html(self): + return self + def _write_label_html(out, name, name_details, outer_class="sk-label-container", @@ -77,9 +80,7 @@ def _get_visual_block(estimator): with suppress(AttributeError): return estimator._sk_repr_html() - if isinstance(estimator, _VisualBlock): - return estimator - elif isinstance(estimator, str): + if isinstance(estimator, str): return _VisualBlock('single', estimator, names=estimator, name_details=estimator) elif estimator is None: @@ -90,6 +91,7 @@ def _get_visual_block(estimator): if hasattr(estimator, 'get_params'): estimators = [] for key, value in estimator.get_params().items(): + # Only look at the estimators in the first layer if '__' not in key and hasattr(value, 'get_params'): estimators.append(value) if len(estimators): @@ -100,26 +102,30 @@ def _get_visual_block(estimator): name_details=str(estimator)) -def _write_estimator_html(out, estimator, estimator_label, first_call=False): +def _write_estimator_html(out, estimator, estimator_label, + estimator_label_details, first_call=False): """Write estimator to html in serial, parallel, or by itself (single). """ - with config_context(print_changed_only=not first_call): + if first_call: est_block = _get_visual_block(estimator) + else: + # deeper calls will only show the changes + with config_context(print_changed_only=True): + est_block = _get_visual_block(estimator) if est_block.kind == 'serial': dashed_wrapped = first_call or est_block.dash_wrapped dash_cls = " sk-dashed-wrapped" if dashed_wrapped else "" out.write(f'

') - # write label of current if name is given if estimator_label: - with config_context(print_changed_only=True): - _write_label_html(out, estimator_label, str(estimator)) + _write_label_html(out, estimator_label, estimator_label_details) out.write('

') - est_infos = zip(est_block.estimators, est_block.names) - for est, name in est_infos: - _write_estimator_html(out, est, name) + est_infos = zip(est_block.estimators, est_block.names, + est_block.name_details) + for est, name, name_details in est_infos: + _write_estimator_html(out, est, name, name_details) out.write('

') # sk-serial sk-item elif est_block.kind == 'parallel': @@ -128,21 +134,17 @@ def _write_estimator_html(out, estimator, estimator_label, first_call=False): out.write(f'

') if estimator_label: - with config_context(print_changed_only=True): - _write_label_html(out, estimator_label, str(estimator)) + _write_label_html(out, estimator_label, estimator_label_details) out.write('

') est_infos = zip(est_block.estimators, est_block.names, est_block.name_details) - for est, name, name_details in est_infos: out.write('

') - # name is associated with the parallel element - if name: - _write_label_html(out, name, name_details) - out.write('

') - _write_estimator_html(out, est, '') - out.write('

') # sk-parallel-item sk-serial + # wrap element in a serial visualblock + serial_block = _VisualBlock('serial', [est], dash_wrapped=False) + _write_estimator_html(out, serial_block, name, name_details) + out.write('

') # sk-parallel-item out.write('

') # sk-parallel sk-item elif est_block.kind == 'single': @@ -311,7 +313,7 @@ def _estimator_repr_html(estimator): f'' f'

') _write_estimator_html(out, estimator, estimator.__class__.__name__, - first_call=True) + str(estimator), first_call=True) out.write('

') html_output = out.getvalue() diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 666b703f6c04f..f018f3c36ac9f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -643,8 +643,11 @@ def _get_name(name, est): # Is an estimator return f'{name}: {est.__class__.__name__}' names = [_get_name(name, est) for name, est in self.steps] + name_details = [str(est) for est in estimators] return _VisualBlock('serial', estimators, - names=names, dash_wrapped=False) + names=names, + name_details=name_details, + dash_wrapped=False) def _name_estimators(estimators): diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index c5dec08964d47..b040c40332fa5 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -3,6 +3,7 @@ import pytest +from sklearn import config_context from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.impute import SimpleImputer @@ -13,6 +14,8 @@ from sklearn.compose import ColumnTransformer from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectPercentile +from sklearn.cluster import Birch +from sklearn.cluster import AgglomerativeClustering from sklearn.preprocessing import OneHotEncoder from sklearn.svm import LinearSVC from sklearn.svm import LinearSVR @@ -72,7 +75,7 @@ def test_get_visual_block_pipeline(): 'do_nothing: passthrough', 'do_nothing_more: passthrough', 'classifier: LogisticRegression'] - assert est_html_info.name_details is None + assert est_html_info.name_details == [str(est) for _, est in pipe.steps] def test_get_visual_block_feature_union(): @@ -148,11 +151,8 @@ def test_display_estimator_pipeline(): expected_strings = [ 'passthrough', - 'div class=\"sk-toggleable__content\">

SimpleImputer'
-      '(strategy=\'median\')',
-      'SimpleImputer(missing_values=\'empty\', strategy=\'constant\')'
-      '',
-      '(\'one-hot\', OneHotEncoder',
+      'div class=\"sk-toggleable__content\">SimpleImputer(strategy',
+      '(\'one-hot\',',
       'preprocessor: ColumnTransformer',
       '[\'a\', \'b\', \'c\', \'d\', \'e\']',
       'LogisticRegression(random_state=1)',
@@ -164,6 +164,8 @@ def test_display_estimator_pipeline():
     for expected_string in expected_strings:
         assert expected_string in html_output
 
+    assert str(pipe) in html_output
+
 
 def test_display_estimator_ovo_classifier():
     ovo = OneVsOneClassifier(LinearSVC())
@@ -181,8 +183,7 @@ def test_stacking_classsifer(final_estimator):
 
     html_output = _estimator_repr_html(clf)
 
-    assert "('mlp', MLPClassifier(alpha=0.001)" in html_output
-    assert "('tree', DecisionTreeClassifier()" in html_output
+    assert str(clf) in html_output
     if final_estimator is None:
         assert "LogisticRegression()" in html_output
     else:
@@ -196,9 +197,29 @@ def test_stacking_regressor(final_estimator):
 
     html_output = _estimator_repr_html(reg)
 
-    assert "('svr', LinearSVR()" in html_output
-    print(html_output)
+    assert str(reg.estimators[0][0]) in html_output
+    assert "LinearSVR" in html_output
     if final_estimator is None:
         assert "RidgeCV" in html_output
     else:
         assert final_estimator.__class__.__name__ in html_output
+
+
+def test_estimator_birch():
+    # birch uses another estimator
+    birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
+    html_output = _estimator_repr_html(birch)
+
+    # inner estimator only prints the changes
+    assert 'AgglomerativeClustering(n_clusters=3)' in html_output
+    assert 'Birch(' in html_output
+
+
+@pytest.mark.parametrize('print_changed_only', [True, False])
+def test_one_estimator_print_change_only(print_changed_only):
+    pca = PCA(n_components=10)
+
+    with config_context(print_changed_only=print_changed_only):
+        pca_repr = str(pca)
+        html_output = _estimator_repr_html(pca)
+        assert pca_repr in html_output

From 0a30cedfe03921ead76605c034cf2def5aeef12a Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 22 Mar 2020 15:03:54 -0400
Subject: [PATCH 47/81] STY Flake8

---
 sklearn/_display_estimator.py | 1 -
 sklearn/base.py               | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 1bef4f61de410..3abd7724975c4 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -1,5 +1,4 @@
 from sklearn._config import config_context
-from sklearn._config import get_config
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO
diff --git a/sklearn/base.py b/sklearn/base.py
index 22bce1cd9d6f6..08b0fc820705d 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -21,7 +21,6 @@
 from .utils.validation import check_X_y
 from .utils.validation import check_array
 from ._display_estimator import _estimator_repr_html
-from ._display_estimator import _VisualBlock
 
 _DEFAULT_TAGS = {
     'non_deterministic': False,

From f74053752f6bbd6ac325c4625f67ffbee9390d4d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Thu, 26 Mar 2020 19:41:00 -0400
Subject: [PATCH 48/81] CLN More refactoring

---
 sklearn/_display_estimator.py | 36 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index 3abd7724975c4..cedcd38d870f6 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -112,7 +112,7 @@ def _write_estimator_html(out, estimator, estimator_label,
         with config_context(print_changed_only=True):
             est_block = _get_visual_block(estimator)
 
-    if est_block.kind == 'serial':
+    if est_block.kind in ('serial', 'parallel'):
         dashed_wrapped = first_call or est_block.dash_wrapped
         dash_cls = " sk-dashed-wrapped" if dashed_wrapped else ""
         out.write(f'')
@@ -120,32 +120,22 @@ def _write_estimator_html(out, estimator, estimator_label,
         if estimator_label:
             _write_label_html(out, estimator_label, estimator_label_details)
 
-        out.write('')
+        kind = est_block.kind
+        out.write(f'')
         est_infos = zip(est_block.estimators, est_block.names,
                         est_block.name_details)
-        for est, name, name_details in est_infos:
-            _write_estimator_html(out, est, name, name_details)
-        out.write('')  # sk-serial sk-item
-
-    elif est_block.kind == 'parallel':
-        dashed_wrapped = first_call or est_block.dash_wrapped
-        dash_cls = " sk-dashed-wrapped" if dashed_wrapped else ""
-        out.write(f'')
 
-        if estimator_label:
-            _write_label_html(out, estimator_label, estimator_label_details)
-
-        out.write('')
-        est_infos = zip(est_block.estimators, est_block.names,
-                        est_block.name_details)
         for est, name, name_details in est_infos:
-            out.write('')
-            # wrap element in a serial visualblock
-            serial_block = _VisualBlock('serial', [est], dash_wrapped=False)
-            _write_estimator_html(out, serial_block, name, name_details)
-            out.write('')  # sk-parallel-item
-        out.write('')  # sk-parallel sk-item
-
+            if kind == 'serial':
+                _write_estimator_html(out, est, name, name_details)
+            else:  # parallel
+                out.write('')
+                # wrap element in a serial visualblock
+                serial_block = _VisualBlock('serial', [est], dash_wrapped=False)
+                _write_estimator_html(out, serial_block, name, name_details)
+                out.write('')  # sk-parallel-item
+
+        out.write('

') elif est_block.kind == 'single': _write_label_html(out, est_block.names, est_block.name_details, outer_class="sk-item", inner_class="sk-estimator", From f656d8bb274f5c7afb6bce69e63e836dfaac2905 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 26 Mar 2020 21:54:16 -0400 Subject: [PATCH 49/81] CLN Outputs repr in latex --- doc/sphinxext/display_est_repr_html.py | 36 ++++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py index b856d1045d654..4868a10e2fb3b 100644 --- a/doc/sphinxext/display_est_repr_html.py +++ b/doc/sphinxext/display_est_repr_html.py @@ -2,22 +2,22 @@ Primary used to display the html output of `_repr_html_` of estimators """ import sys -from docutils.parsers.rst import Directive +from sphinx.util.docutils import SphinxDirective from docutils import nodes from io import StringIO -class DisplayEstimatorRepr(Directive): +class DisplayEstimatorRepr(SphinxDirective): "Execute Python code and includes stdout as HTML" has_content = True required_arguments = 0 optional_arguments = 0 - def execute(self, code): + def execute(self, code, format_str): code_parts = code.split('\n') final_output = code_parts[-1] - code_parts[-1] = f'print({final_output}._repr_html_())' + code_parts[-1] = format_str.format(final_output) code = '\n'.join(code_parts) orig_stdout, orig_stderr = sys.stdout, sys.stderr @@ -32,23 +32,31 @@ def execute(self, code): def run(self): output = [] code = "\n".join(self.content) - code_results = self.execute(code) + html_repr = self.execute(code, format_str='print({}._repr_html_())') input_code = nodes.literal_block(code, code) input_code['language'] = 'python' output.append(input_code) - html_node = nodes.raw('', code_results, format='html') + html_node = nodes.raw('', html_repr, format='html') output.append(html_node) - code_results_latex = r""" - \begin{sphinxadmonition}{note}{Note:} - The HTML output of this code snippet can only been seen on the HTML - version of the documentation - \end{sphinxadmonition} - """ - latex_node = nodes.raw('', code_results_latex, format='latex') - output.append(latex_node) + if self.env.app.builder.name == 'latex': + code_results_latex = r""" + \begin{sphinxadmonition}{note}{Note:} + The HTML output of this code snippet can only been seen on the HTML + version of the documentation. The following is a string + representation. + \end{sphinxadmonition} + """ + latex_node = nodes.raw('', code_results_latex, format='latex') + output.append(latex_node) + + str_repr = self.execute(code, format_str='print(repr({}))') + str_repr_node = nodes.literal_block(str_repr, str_repr) + str_repr_node['language'] = 'python' + output.append(str_repr_node) + return output From 856ce5d62cd6bcc4e46c211ccc6ae2c7237e9cda Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 27 Mar 2020 11:50:22 -0400 Subject: [PATCH 50/81] CLN Adds more tests --- sklearn/tests/test_display_estimator.py | 98 ++++++++++++++++++------- 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py index b040c40332fa5..9182a1e219044 100644 --- a/sklearn/tests/test_display_estimator.py +++ b/sklearn/tests/test_display_estimator.py @@ -1,5 +1,6 @@ from contextlib import closing from io import StringIO +from itertools import chain import pytest @@ -23,6 +24,8 @@ from sklearn.multiclass import OneVsOneClassifier from sklearn.ensemble import StackingClassifier from sklearn.ensemble import StackingRegressor +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RationalQuadratic from sklearn._display_estimator import _write_label_html from sklearn._display_estimator import _get_visual_block from sklearn._display_estimator import _estimator_repr_html @@ -30,6 +33,7 @@ @pytest.mark.parametrize("checked", [True, False]) def test_write_label_html(checked): + # Test checking logic and labeling name = "LogisticRegression" tool_tip = "hello-world" @@ -45,6 +49,7 @@ def test_write_label_html(checked): @pytest.mark.parametrize('est', ['passthrough', 'drop', None]) def test_get_visual_block_single_str_none(est): + # Test estimators that are represnted by strings est_html_info = _get_visual_block(est) assert est_html_info.kind == 'single' assert est_html_info.estimators == est @@ -125,7 +130,7 @@ def test_display_estimator_pipeline(): cat_trans = Pipeline(steps=[ ('imputer', SimpleImputer(strategy='constant', missing_values='empty')), - ('one-hot', OneHotEncoder()) + ('one-hot', OneHotEncoder(drop='first')) ]) preprocess = ColumnTransformer([ @@ -149,29 +154,38 @@ def test_display_estimator_pipeline(): ]) html_output = _estimator_repr_html(pipe) - expected_strings = [ - 'passthrough', - 'div class=\"sk-toggleable__content\">

SimpleImputer(strategy',
-      '(\'one-hot\',',
-      'preprocessor: ColumnTransformer',
-      '[\'a\', \'b\', \'c\', \'d\', \'e\']',
-      'LogisticRegression(random_state=1)',
-      'SelectPercentile()',
-      '>TruncatedSVD',
-      'TruncatedSVD(n_components=3)',
-    ]
-
-    for expected_string in expected_strings:
-        assert expected_string in html_output
-
+    # top level estimators show estimator with changes
     assert str(pipe) in html_output
+    for _, est in pipe.steps:
+        assert (f""
+                f"{str(est)}") in html_output
 
+    # all other estimators are shown with only its changes
+    with config_context(print_changed_only=True):
+        assert str(num_trans['pass']) in html_output
+        assert 'passthrough' in html_output
+        assert str(num_trans['imputer']) in html_output
 
-def test_display_estimator_ovo_classifier():
-    ovo = OneVsOneClassifier(LinearSVC())
-    html_output = _estimator_repr_html(ovo)
-    assert "pre>OneVsOneClassifier(estimator=LinearSVC" in html_output
-    assert "LinearSVC" in html_output
+        for _, _, cols in preprocess.transformers:
+            assert f"{cols}" in html_output
+
+        # feature union
+        for name, _ in feat_u.transformer_list:
+            assert f"{name}" in html_output
+
+        pca = feat_u.transformer_list[0][1]
+        assert f"{str(pca)}" in html_output
+
+        tsvd = feat_u.transformer_list[1][1]
+        first = tsvd['first']
+        select = tsvd['select']
+        assert f"{str(first)}" in html_output
+        assert f"{str(select)}" in html_output
+
+        # voting classifer
+        for name, est in clf.estimators:
+            assert f"{name}" in html_output
+            assert f"{str(est)}" in html_output
 
 
 @pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
@@ -189,12 +203,10 @@ def test_stacking_classsifer(final_estimator):
     else:
         assert final_estimator.__class__.__name__ in html_output
 
-
 @pytest.mark.parametrize("final_estimator", [None, LinearSVR()])
 def test_stacking_regressor(final_estimator):
     reg = StackingRegressor(
         estimators=[('svr', LinearSVR())], final_estimator=final_estimator)
-
     html_output = _estimator_repr_html(reg)
 
     assert str(reg.estimators[0][0]) in html_output
@@ -205,14 +217,44 @@ def test_stacking_regressor(final_estimator):
         assert final_estimator.__class__.__name__ in html_output
 
 
-def test_estimator_birch():
-    # birch uses another estimator
+def test_birch_duck_typing_meta():
+    # Test duck typing meta estimators with Birch
     birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
     html_output = _estimator_repr_html(birch)
 
-    # inner estimator only prints the changes
-    assert 'AgglomerativeClustering(n_clusters=3)' in html_output
-    assert 'Birch(' in html_output
+    # inner estimator shows only the changes
+    with config_context(print_changed_only=True):
+        assert f"{str(birch.n_clusters)}" in html_output
+        assert "AgglomerativeClustering" in html_output
+
+    # outer estimator contains all changes
+    assert f"{str(birch)}" in html_output
+
+
+def test_ovo_classifier_duck_typing_meta():
+    # Test duck typing metaestimators with OVO
+    ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
+    html_output = _estimator_repr_html(ovo)
+
+    # inner estimator shows only the changes
+    with config_context(print_changed_only=True):
+        assert f"{str(ovo.estimator)}" in html_output
+        assert "LinearSVC" in html_output
+
+    # outter estimator
+    assert f"{str(ovo)}" in html_output
+
+
+def test_duck_typing_nested_estimator():
+    # Test duck typing metaestimators with GP
+    kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1)
+    gp = GaussianProcessRegressor(kernel=kernel)
+    html_output = _estimator_repr_html(gp)
+
+    with config_context(print_changed_only=True):
+        assert f"{str(gp.kernel)}" in html_output
+
+    assert f"{str(gp)}" in html_output
 
 
 @pytest.mark.parametrize('print_changed_only', [True, False])

From b5c26b0ea05b315111fe6f16cc28b6e2ea226c1d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Fri, 27 Mar 2020 11:56:38 -0400
Subject: [PATCH 51/81] STY Update

---
 sklearn/_display_estimator.py           | 3 ++-
 sklearn/tests/test_display_estimator.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/_display_estimator.py b/sklearn/_display_estimator.py
index cedcd38d870f6..a930f00916951 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/_display_estimator.py
@@ -131,7 +131,8 @@ def _write_estimator_html(out, estimator, estimator_label,
             else:  # parallel
                 out.write('')
                 # wrap element in a serial visualblock
-                serial_block = _VisualBlock('serial', [est], dash_wrapped=False)
+                serial_block = _VisualBlock('serial', [est],
+                                            dash_wrapped=False)
                 _write_estimator_html(out, serial_block, name, name_details)
                 out.write('')  # sk-parallel-item
 
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py
index 9182a1e219044..da4cb929fc1c1 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/tests/test_display_estimator.py
@@ -1,6 +1,5 @@
 from contextlib import closing
 from io import StringIO
-from itertools import chain
 
 import pytest
 
@@ -203,6 +202,7 @@ def test_stacking_classsifer(final_estimator):
     else:
         assert final_estimator.__class__.__name__ in html_output
 
+
 @pytest.mark.parametrize("final_estimator", [None, LinearSVR()])
 def test_stacking_regressor(final_estimator):
     reg = StackingRegressor(

From f56060c73a4f2f5940f5da459f2e161a6c69bd17 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 30 Mar 2020 19:46:14 -0400
Subject: [PATCH 52/81] TST Fix

---
 sklearn/tests/test_display_estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/tests/test_display_estimator.py
index da4cb929fc1c1..bfd030dcbb604 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/tests/test_display_estimator.py
@@ -247,12 +247,12 @@ def test_ovo_classifier_duck_typing_meta():
 
 def test_duck_typing_nested_estimator():
     # Test duck typing metaestimators with GP
-    kernel = 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1)
+    kernel = RationalQuadratic(length_scale=1.0, alpha=0.1)
     gp = GaussianProcessRegressor(kernel=kernel)
     html_output = _estimator_repr_html(gp)
 
     with config_context(print_changed_only=True):
-        assert f"{str(gp.kernel)}" in html_output
+        assert f"{str(kernel)}" in html_output
 
     assert f"{str(gp)}" in html_output
 

From 52a640a529d304e7bd3f19a24f83530e4728eaa0 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 13 Apr 2020 14:19:30 -0400
Subject: [PATCH 53/81] CLN Move to utils

---
 sklearn/base.py                                     |  2 +-
 sklearn/compose/_column_transformer.py              |  2 +-
 sklearn/ensemble/_stacking.py                       |  2 +-
 sklearn/ensemble/_voting.py                         |  2 +-
 sklearn/pipeline.py                                 |  2 +-
 sklearn/utils/__init__.py                           |  3 ++-
 sklearn/{ => utils}/_display_estimator.py           | 11 ++++++-----
 sklearn/{ => utils}/tests/test_display_estimator.py |  6 +++---
 8 files changed, 16 insertions(+), 14 deletions(-)
 rename sklearn/{ => utils}/_display_estimator.py (97%)
 rename sklearn/{ => utils}/tests/test_display_estimator.py (98%)

diff --git a/sklearn/base.py b/sklearn/base.py
index 08b0fc820705d..f4bf765afc5e4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -20,7 +20,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from ._display_estimator import _estimator_repr_html
+from .utils._display_estimator import _estimator_repr_html
 
 _DEFAULT_TAGS = {
     'non_deterministic': False,
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index a501eaa145475..e1504cd91ff8f 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -15,7 +15,7 @@
 from joblib import Parallel, delayed
 
 from ..base import clone, TransformerMixin
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index 479dd4cc55fe8..520a52634e1c4 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -13,7 +13,7 @@
 from ..base import clone
 from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
 from ..base import is_classifier, is_regressor
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 
 from ._base import _fit_single_estimator
 from ._base import _BaseHeterogeneousEnsemble
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index 23c8e177bd3a0..6a7f465c5f77d 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -32,7 +32,7 @@
 from ..utils.validation import column_or_1d
 from ..utils.validation import _deprecate_positional_args
 from ..exceptions import NotFittedError
-from .._display_estimator import _VisualBlock
+from ..utils._display_estimator import _VisualBlock
 
 
 class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 0bc0087ace115..beb99a61fe44d 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -18,7 +18,7 @@
 from joblib import Parallel, delayed
 
 from .base import clone, TransformerMixin
-from ._display_estimator import _VisualBlock
+from .utils._display_estimator import _VisualBlock
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch, _print_elapsed_time
 from .utils.validation import check_memory
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index aac6e292a198a..559fb39a5406d 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -30,6 +30,7 @@
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric, check_scalar)
+from ._display_estimator import estimator_repr_html
 from .. import get_config
 
 
@@ -51,7 +52,7 @@
            "check_symmetric", "indices_to_mask", "deprecated",
            "parallel_backend", "register_parallel_backend",
            "resample", "shuffle", "check_matplotlib_support", "all_estimators",
-           "DataConversionWarning"
+           "DataConversionWarning", "estimator_repr_html"
            ]
 
 IS_PYPY = platform.python_implementation() == 'PyPy'
diff --git a/sklearn/_display_estimator.py b/sklearn/utils/_display_estimator.py
similarity index 97%
rename from sklearn/_display_estimator.py
rename to sklearn/utils/_display_estimator.py
index a930f00916951..44a2251566f4d 100644
--- a/sklearn/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -283,8 +283,10 @@ def _write_estimator_html(out, estimator, estimator_label,
 """.replace('  ', '').replace('\n', '')  # noqa
 
 
-def _estimator_repr_html(estimator):
-    """Build a HTML representation of an estimator
+def estimator_repr_html(estimator):
+    """Build a HTML representation of an estimator.
+
+    Read more in the :ref:`User Guide `.
 
     Parameters
     ----------
@@ -293,9 +295,8 @@ def _estimator_repr_html(estimator):
 
     Returns
     -------
-    html: str or iPython HTML object
-        HTML representation of estimator. When called in jupyter notebook or
-        lab, a iPython HTML object is returned.
+    html: str
+        HTML representation of estimator.
     """
     with closing(StringIO()) as out:
         out.write(f''
diff --git a/sklearn/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
similarity index 98%
rename from sklearn/tests/test_display_estimator.py
rename to sklearn/utils/tests/test_display_estimator.py
index bfd030dcbb604..b4305cde23537 100644
--- a/sklearn/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -25,9 +25,9 @@
 from sklearn.ensemble import StackingRegressor
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import RationalQuadratic
-from sklearn._display_estimator import _write_label_html
-from sklearn._display_estimator import _get_visual_block
-from sklearn._display_estimator import _estimator_repr_html
+from sklearn.utils._display_estimator import _write_label_html
+from sklearn.utils._display_estimator import _get_visual_block
+from sklearn.utils._display_estimator import _estimator_repr_html
 
 
 @pytest.mark.parametrize("checked", [True, False])

From 66ebce904c6581bfd148de9c8c28aa9a4e453376 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 13 Apr 2020 14:44:56 -0400
Subject: [PATCH 54/81] DOC Adds html representation in another example

---
 examples/compose/plot_column_transformer.py | 15 ++++++++++++++-
 sklearn/base.py                             |  9 +++++++++
 sklearn/utils/__init__.py                   |  1 -
 sklearn/utils/_display_estimator.py         |  2 +-
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 0cfc9f5de0054..aff1876cfdf3e 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -115,7 +115,20 @@ def transform(self, posts):
     ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
-# limit the list of categories to make running this example faster.
+###############################################################################
+# HTML representation of ``Pipeline``
+###############################################################################
+# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
+# representation of the estimator is displayed as follows:
+pipeline
+
+###############################################################################
+# Classification Report
+###############################################################################
+# Finally, the pipeline is trained and a classification report is generated
+# on a testing subset. We limit the list of categories to make running this
+# example faster.
+
 categories = ['alt.atheism', 'talk.religion.misc']
 X_train, y_train = fetch_20newsgroups(random_state=1,
                                       subset='train',
diff --git a/sklearn/base.py b/sklearn/base.py
index f4bf765afc5e4..35b6402494740 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -413,6 +413,15 @@ def _validate_data(self, X, y=None, reset=True, **check_params):
         return out
 
     def _repr_html_(self):
+        """Build a HTML representation of an estimator.
+
+        Read more in the :ref:`User Guide `.
+
+        Returns
+        -------
+        html: str
+            HTML representation of estimator.
+        """
         return _estimator_repr_html(self)
 
 
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 559fb39a5406d..7ff6df15f8fd8 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -30,7 +30,6 @@
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric, check_scalar)
-from ._display_estimator import estimator_repr_html
 from .. import get_config
 
 
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 44a2251566f4d..b61558a9dc90a 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -283,7 +283,7 @@ def _write_estimator_html(out, estimator, estimator_label,
 """.replace('  ', '').replace('\n', '')  # noqa
 
 
-def estimator_repr_html(estimator):
+def _estimator_repr_html(estimator):
     """Build a HTML representation of an estimator.
 
     Read more in the :ref:`User Guide `.

From 24029d3dd3b93da7a494ba8c75f069c9f05d7949 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Tue, 14 Apr 2020 13:19:51 -0400
Subject: [PATCH 55/81] DOC Adds reference to _repr_html_

---
 doc/modules/compose.rst                | 6 ++++++
 doc/sphinxext/display_est_repr_html.py | 7 ++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 39b3c4abcc177..44c542ecc0652 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -564,6 +564,12 @@ many estimators. An example of the HTML output can be seen below.
                           ('classifier', LogisticRegression())])
    clf
 
+As an alternative, the HTML can be written to a file using the `_repr_html_`
+method::
+
+        with open('my_estimator.html', 'w') as f:
+            f.write(clf._repr_html_())
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_compose_plot_column_transformer.py`
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
index 4868a10e2fb3b..710c334d99e24 100644
--- a/doc/sphinxext/display_est_repr_html.py
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -38,6 +38,7 @@ def run(self):
         input_code['language'] = 'python'
         output.append(input_code)
 
+        html_repr = f"{html_repr}"
         html_node = nodes.raw('', html_repr, format='html')
         output.append(html_node)
 
@@ -62,8 +63,4 @@ def run(self):
 
 def setup(app):
     app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr)
-
-    return {
-        'parallel_read_safe': True,
-        'parallel_write_safe': True,
-    }
+    return {'parallel_read_safe': True, 'parallel_write_safe': True}

From adc977be2c54594110395eacde786d0a8ff4b8b7 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Tue, 14 Apr 2020 13:44:05 -0400
Subject: [PATCH 56/81] FIx bug

---
 sklearn/utils/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 7ff6df15f8fd8..aac6e292a198a 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -51,7 +51,7 @@
            "check_symmetric", "indices_to_mask", "deprecated",
            "parallel_backend", "register_parallel_backend",
            "resample", "shuffle", "check_matplotlib_support", "all_estimators",
-           "DataConversionWarning", "estimator_repr_html"
+           "DataConversionWarning"
            ]
 
 IS_PYPY = platform.python_implementation() == 'PyPy'

From 9df7573099ffb38a8ae7d81e4a66f25fa81c5b00 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 19 Apr 2020 11:04:31 -0400
Subject: [PATCH 57/81] CLN Rename secret protocol

---
 doc/modules/compose.rst                |  4 ++--
 sklearn/compose/_column_transformer.py |  2 +-
 sklearn/ensemble/_stacking.py          | 10 +++++-----
 sklearn/ensemble/_voting.py            |  2 +-
 sklearn/pipeline.py                    |  4 ++--
 sklearn/utils/_display_estimator.py    |  4 ++--
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 44c542ecc0652..db4f05801038d 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -567,8 +567,8 @@ many estimators. An example of the HTML output can be seen below.
 As an alternative, the HTML can be written to a file using the `_repr_html_`
 method::
 
-        with open('my_estimator.html', 'w') as f:
-            f.write(clf._repr_html_())
+    with open('my_estimator.html', 'w') as f:
+        f.write(clf._repr_html_())
 
 .. topic:: Examples:
 
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index e1504cd91ff8f..93fcba1d3c0b8 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -631,7 +631,7 @@ def _hstack(self, Xs):
             Xs = [f.toarray() if sparse.issparse(f) else f for f in Xs]
             return np.hstack(Xs)
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         names, transformers, name_details = zip(*self.transformers)
         return _VisualBlock('parallel', transformers,
                             names=names, name_details=name_details)
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index 520a52634e1c4..ac2e79638096e 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -234,7 +234,7 @@ def predict(self, X, **predict_params):
             self.transform(X), **predict_params
         )
 
-    def _sk_repr_html(self, final_estimator):
+    def _sk_visual_block_(self, final_estimator):
         names, estimators = zip(*self.estimators)
         parallel = _VisualBlock('parallel', estimators, names=names,
                                 dash_wrapped=False)
@@ -505,12 +505,12 @@ def transform(self, X):
         """
         return self._transform(X)
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         if self.final_estimator is None:
             final_estimator = LogisticRegression()
         else:
             final_estimator = self.final_estimator
-        return super()._sk_repr_html(final_estimator)
+        return super()._sk_visual_block_(final_estimator)
 
 
 class StackingRegressor(RegressorMixin, _BaseStacking):
@@ -682,9 +682,9 @@ def transform(self, X):
         """
         return self._transform(X)
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         if self.final_estimator is None:
             final_estimator = RidgeCV()
         else:
             final_estimator = self.final_estimator
-        return super()._sk_repr_html(final_estimator)
+        return super()._sk_visual_block_(final_estimator)
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index 6a7f465c5f77d..31f7228d34091 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -105,7 +105,7 @@ def n_features_in_(self):
 
         return self.estimators_[0].n_features_in_
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         names, estimators = zip(*self.estimators)
         return _VisualBlock('parallel', estimators, names=names)
 
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index beb99a61fe44d..2ee07edb7f079 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -620,7 +620,7 @@ def n_features_in_(self):
         # delegate to first step (which will call _check_is_fitted)
         return self.steps[0][1].n_features_in_
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         _, estimators = zip(*self.steps)
 
         def _get_name(name, est):
@@ -1012,7 +1012,7 @@ def n_features_in_(self):
         # X is passed to all transformers so we just delegate to the first one
         return self.transformer_list[0][1].n_features_in_
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         names, transformers = zip(*self.transformer_list)
         return _VisualBlock('parallel', transformers, names=names)
 
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index b61558a9dc90a..3ab58ab44c5aa 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -47,7 +47,7 @@ def __init__(self, kind, estimators, *, names=None, name_details=None,
         self.names = names
         self.name_details = name_details
 
-    def _sk_repr_html(self):
+    def _sk_visual_block_(self):
         return self
 
 
@@ -77,7 +77,7 @@ def _get_visual_block(estimator):
     """Generate information about how to display an estimator.
     """
     with suppress(AttributeError):
-        return estimator._sk_repr_html()
+        return estimator._sk_visual_block_()
 
     if isinstance(estimator, str):
         return _VisualBlock('single', estimator,

From ef025749c0eb925a0209e8a064c683fcde733c51 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Wed, 22 Apr 2020 19:56:13 -0400
Subject: [PATCH 58/81] CLN Address comments

---
 doc/sphinxext/display_est_repr_html.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
index 710c334d99e24..4dc87e95a6bf2 100644
--- a/doc/sphinxext/display_est_repr_html.py
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -1,14 +1,17 @@
 """
-Primary used to display the html output of `_repr_html_` of estimators
+Primarily used to display the html output of `_repr_html_` of estimators
 """
-import sys
 from sphinx.util.docutils import SphinxDirective
+from contextlib import redirect_stderr, redirect_stdout
 from docutils import nodes
 from io import StringIO
 
 
 class DisplayEstimatorRepr(SphinxDirective):
-    "Execute Python code and includes stdout as HTML"
+    """Execute Python and runs `_repr_html_` on the last element on the code
+    block. The last element in the code block should be an estimator with
+    support for `_repr_html_`.
+    """
 
     has_content = True
     required_arguments = 0
@@ -19,13 +22,10 @@ def execute(self, code, format_str):
         final_output = code_parts[-1]
         code_parts[-1] = format_str.format(final_output)
         code = '\n'.join(code_parts)
-        orig_stdout, orig_stderr = sys.stdout, sys.stderr
 
         output, err = StringIO(), StringIO()
-
-        sys.stdout, sys.stderr = output, err
-        exec(code)
-        sys.stdout, sys.stderr = orig_stdout, orig_stderr
+        with redirect_stdout(output), redirect_stderr(err):
+            exec(code)
 
         return f"{output.getvalue()}{err.getvalue()}"
 
@@ -34,7 +34,7 @@ def run(self):
         code = "\n".join(self.content)
         html_repr = self.execute(code, format_str='print({}._repr_html_())')
 
-        input_code = nodes.literal_block(code, code)
+        input_code = nodes.literal_block(rawsource=code, text=code)
         input_code['language'] = 'python'
         output.append(input_code)
 
@@ -54,7 +54,8 @@ def run(self):
             output.append(latex_node)
 
             str_repr = self.execute(code, format_str='print(repr({}))')
-            str_repr_node = nodes.literal_block(str_repr, str_repr)
+            str_repr_node = nodes.literal_block(rawsource=str_repr,
+                                                text=str_repr)
             str_repr_node['language'] = 'python'
             output.append(str_repr_node)
 

From c1b451c4bcab68f6d91d9002d2bd2a27543ac150 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 21:43:11 -0400
Subject: [PATCH 59/81] CLN Address comments

---
 doc/modules/classes.rst                       |  1 +
 doc/modules/compose.rst                       | 10 ++-
 doc/sphinxext/display_est_repr_html.py        | 10 ++-
 examples/compose/plot_column_transformer.py   |  7 --
 .../plot_column_transformer_mixed_types.py    |  2 +
 sklearn/_config.py                            | 17 ++++-
 sklearn/base.py                               | 15 ++--
 sklearn/tests/test_config.py                  |  8 +-
 sklearn/utils/__init__.py                     |  3 +-
 sklearn/utils/_display_estimator.py           |  9 +--
 sklearn/utils/tests/test_display_estimator.py | 74 +++++++++----------
 11 files changed, 83 insertions(+), 73 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 3d9924638b69b..f9904ad56b100 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1569,6 +1569,7 @@ Plotting
    utils.deprecated
    utils.estimator_checks.check_estimator
    utils.estimator_checks.parametrize_with_checks
+   utils.estimator_repr_html
    utils.extmath.safe_sparse_dot
    utils.extmath.randomized_range_finder
    utils.extmath.randomized_svd
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index db4f05801038d..78f7dc2f63c29 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -539,6 +539,7 @@ many estimators. An example of the HTML output can be seen below.
 
 .. display_estimator_repr_html::
 
+   from sklearn import config_context
    from sklearn.compose import ColumnTransformer
    from sklearn.pipeline import Pipeline
    from sklearn.impute import SimpleImputer
@@ -552,7 +553,7 @@ many estimators. An example of the HTML output can be seen below.
 
    categorical_features = ['embarked', 'sex', 'pclass']
    categorical_transformer = Pipeline(steps=[
-       ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
+    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))])
 
    preprocessor = ColumnTransformer(
@@ -562,13 +563,14 @@ many estimators. An example of the HTML output can be seen below.
 
    clf = Pipeline(steps=[('preprocessor', preprocessor),
                           ('classifier', LogisticRegression())])
-   clf
+   with config_context(repr_html=True):
+       clf
 
 As an alternative, the HTML can be written to a file using the `_repr_html_`
 method::
 
-    with open('my_estimator.html', 'w') as f:
-        f.write(clf._repr_html_())
+   with config_context(repr_html=True), open('my_estimator.html', 'w') as f:
+       f.write(clf._repr_html_())
 
 .. topic:: Examples:
 
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
index 4dc87e95a6bf2..676b87c284b6d 100644
--- a/doc/sphinxext/display_est_repr_html.py
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -20,7 +20,9 @@ class DisplayEstimatorRepr(SphinxDirective):
     def execute(self, code, format_str):
         code_parts = code.split('\n')
         final_output = code_parts[-1]
-        code_parts[-1] = format_str.format(final_output)
+        final_est = final_output.lstrip(' ')
+        n_whitespace = len(final_output) - len(final_est)
+        code_parts[-1] = " " * n_whitespace + format_str.format(final_est)
         code = '\n'.join(code_parts)
 
         output, err = StringIO(), StringIO()
@@ -32,14 +34,14 @@ def execute(self, code, format_str):
     def run(self):
         output = []
         code = "\n".join(self.content)
-        html_repr = self.execute(code, format_str='print({}._repr_html_())')
+        repr_html = self.execute(code, format_str='print({}._repr_html_())')
 
         input_code = nodes.literal_block(rawsource=code, text=code)
         input_code['language'] = 'python'
         output.append(input_code)
 
-        html_repr = f"{html_repr}"
-        html_node = nodes.raw('', html_repr, format='html')
+        repr_html = f"{repr_html}"
+        html_node = nodes.raw('', repr_html, format='html')
         output.append(html_node)
 
         if self.env.app.builder.name == 'latex':
diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index aff1876cfdf3e..09877114124ac 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -115,13 +115,6 @@ def transform(self, posts):
     ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
-###############################################################################
-# HTML representation of ``Pipeline``
-###############################################################################
-# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
-# representation of the estimator is displayed as follows:
-pipeline
-
 ###############################################################################
 # Classification Report
 ###############################################################################
diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 920ef37bfb333..6c32d394aaa78 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -27,6 +27,7 @@
 
 import numpy as np
 
+from sklearn import set_config
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import fetch_openml
 from sklearn.pipeline import Pipeline
@@ -92,6 +93,7 @@
 ###############################################################################
 # When the ``Pipeline`` is printed out in a jupyter notebook an HTML
 # representation of the estimator is displayed as follows:
+set_config(repr_html=True)
 clf
 
 ###############################################################################
diff --git a/sklearn/_config.py b/sklearn/_config.py
index c7f3934ee1cb3..4b81bddc0e517 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -7,6 +7,7 @@
     'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)),
     'working_memory': int(os.environ.get('SKLEARN_WORKING_MEMORY', 1024)),
     'print_changed_only': False,
+    'repr_html': False,
 }
 
 
@@ -27,7 +28,7 @@ def get_config():
 
 
 def set_config(assume_finite=None, working_memory=None,
-               print_changed_only=None):
+               print_changed_only=None, repr_html=None):
     """Set global scikit-learn configuration
 
     .. versionadded:: 0.19
@@ -59,6 +60,12 @@ def set_config(assume_finite=None, working_memory=None,
 
         .. versionadded:: 0.21
 
+    repr_html : bool, optional
+        If True, estimators will be displayed with HTML in a jupyter lab or
+        notebook context.
+
+        .. versionadded:: 0.23
+
     See Also
     --------
     config_context: Context manager for global scikit-learn configuration
@@ -70,6 +77,8 @@ def set_config(assume_finite=None, working_memory=None,
         _global_config['working_memory'] = working_memory
     if print_changed_only is not None:
         _global_config['print_changed_only'] = print_changed_only
+    if repr_html is not None:
+        _global_config['repr_html'] = repr_html
 
 
 @contextmanager
@@ -97,6 +106,12 @@ def config_context(**new_config):
         behaviour would be to print 'SVC(C=1.0, cache_size=200, ...)' with
         all the non-changed parameters.
 
+    repr_html : bool, optional
+        If True, estimators will be displayed with HTML in a jupyter lab or
+        notebook context.
+
+        .. versionadded:: 0.23
+
     Notes
     -----
     All settings, not just those presently modified, will be returned to
diff --git a/sklearn/base.py b/sklearn/base.py
index 4db7ff4ae8715..fd2d5b9bfe110 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -17,10 +17,11 @@
 import numpy as np
 
 from . import __version__
+from ._config import get_config
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from .utils._display_estimator import _estimator_repr_html
+from .utils._display_estimator import estimator_repr_html
 from .utils.validation import _deprecate_positional_args
 
 _DEFAULT_TAGS = {
@@ -437,16 +438,20 @@ def _validate_data(self, X, y=None, reset=True,
         return out
 
     def _repr_html_(self):
-        """Build a HTML representation of an estimator.
+        """HTML or string representation of an estimator depending on
+        global configuration flag `repr_html`.
 
         Read more in the :ref:`User Guide `.
 
         Returns
         -------
-        html: str
-            HTML representation of estimator.
+        repr: str
+            HTML or string representation of estimator.
         """
-        return _estimator_repr_html(self)
+        repr_html = get_config()["repr_html"]
+        if repr_html:
+            return estimator_repr_htmlself)
+        return repr(self)
 
 
 class ClassifierMixin:
diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py
index dfa944110ad7a..983dbb0277b8b 100644
--- a/sklearn/tests/test_config.py
+++ b/sklearn/tests/test_config.py
@@ -4,7 +4,7 @@
 
 def test_config_context():
     assert get_config() == {'assume_finite': False, 'working_memory': 1024,
-                            'print_changed_only': False}
+                            'print_changed_only': False, 'repr_html': False}
 
     # Not using as a context manager affects nothing
     config_context(assume_finite=True)
@@ -12,7 +12,8 @@ def test_config_context():
 
     with config_context(assume_finite=True):
         assert get_config() == {'assume_finite': True, 'working_memory': 1024,
-                                'print_changed_only': False}
+                                'print_changed_only': False,
+                                'repr_html': False}
     assert get_config()['assume_finite'] is False
 
     with config_context(assume_finite=True):
@@ -37,7 +38,8 @@ def test_config_context():
         assert get_config()['assume_finite'] is True
 
     assert get_config() == {'assume_finite': False, 'working_memory': 1024,
-                            'print_changed_only': False}
+                            'print_changed_only': False,
+                            'repr_html': False}
 
     # No positional arguments
     assert_raises(TypeError, config_context, True)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index aac6e292a198a..acffddf7ccf56 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -25,6 +25,7 @@
 from ..exceptions import DataConversionWarning
 from .deprecation import deprecated
 from .fixes import np_version
+from ._display_estimator import estimator_repr_html
 from .validation import (as_float_array,
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
@@ -51,7 +52,7 @@
            "check_symmetric", "indices_to_mask", "deprecated",
            "parallel_backend", "register_parallel_backend",
            "resample", "shuffle", "check_matplotlib_support", "all_estimators",
-           "DataConversionWarning"
+           "DataConversionWarning", "estimator_repr_html"
            ]
 
 IS_PYPY = platform.python_implementation() == 'PyPy'
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 3ab58ab44c5aa..ab5c9745ca929 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -105,12 +105,7 @@ def _write_estimator_html(out, estimator, estimator_label,
                           estimator_label_details, first_call=False):
     """Write estimator to html in serial, parallel, or by itself (single).
     """
-    if first_call:
-        est_block = _get_visual_block(estimator)
-    else:
-        # deeper calls will only show the changes
-        with config_context(print_changed_only=True):
-            est_block = _get_visual_block(estimator)
+    est_block = _get_visual_block(estimator)
 
     if est_block.kind in ('serial', 'parallel'):
         dashed_wrapped = first_call or est_block.dash_wrapped
@@ -283,7 +278,7 @@ def _write_estimator_html(out, estimator, estimator_label,
 """.replace('  ', '').replace('\n', '')  # noqa
 
 
-def _estimator_repr_html(estimator):
+def estimator_repr_html(estimator):
     """Build a HTML representation of an estimator.
 
     Read more in the :ref:`User Guide `.
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index b4305cde23537..152062f5ec168 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -27,7 +27,7 @@
 from sklearn.gaussian_process.kernels import RationalQuadratic
 from sklearn.utils._display_estimator import _write_label_html
 from sklearn.utils._display_estimator import _get_visual_block
-from sklearn.utils._display_estimator import _estimator_repr_html
+from sklearn.utils._display_estimator import estimator_repr_html
 
 
 @pytest.mark.parametrize("checked", [True, False])
@@ -151,7 +151,7 @@ def test_display_estimator_pipeline():
     pipe = Pipeline([
         ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf)
     ])
-    html_output = _estimator_repr_html(pipe)
+    html_output = estimator_repr_html(pipe)
 
     # top level estimators show estimator with changes
     assert str(pipe) in html_output
@@ -159,32 +159,30 @@ def test_display_estimator_pipeline():
         assert (f""
                 f"{str(est)}") in html_output
 
-    # all other estimators are shown with only its changes
-    with config_context(print_changed_only=True):
-        assert str(num_trans['pass']) in html_output
-        assert 'passthrough' in html_output
-        assert str(num_trans['imputer']) in html_output
+    assert str(num_trans['pass']) in html_output
+    assert 'passthrough' in html_output
+    assert str(num_trans['imputer']) in html_output
 
-        for _, _, cols in preprocess.transformers:
-            assert f"{cols}" in html_output
+    for _, _, cols in preprocess.transformers:
+        assert f"{cols}" in html_output
 
-        # feature union
-        for name, _ in feat_u.transformer_list:
-            assert f"{name}" in html_output
+    # feature union
+    for name, _ in feat_u.transformer_list:
+        assert f"{name}" in html_output
 
-        pca = feat_u.transformer_list[0][1]
-        assert f"{str(pca)}" in html_output
+    pca = feat_u.transformer_list[0][1]
+    assert f"{str(pca)}" in html_output
 
-        tsvd = feat_u.transformer_list[1][1]
-        first = tsvd['first']
-        select = tsvd['select']
-        assert f"{str(first)}" in html_output
-        assert f"{str(select)}" in html_output
+    tsvd = feat_u.transformer_list[1][1]
+    first = tsvd['first']
+    select = tsvd['select']
+    assert f"{str(first)}" in html_output
+    assert f"{str(select)}" in html_output
 
-        # voting classifer
-        for name, est in clf.estimators:
-            assert f"{name}" in html_output
-            assert f"{str(est)}" in html_output
+    # voting classifer
+    for name, est in clf.estimators:
+        assert f"{name}" in html_output
+        assert f"{str(est)}" in html_output
 
 
 @pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
@@ -194,11 +192,11 @@ def test_stacking_classsifer(final_estimator):
     clf = StackingClassifier(
         estimators=estimators, final_estimator=final_estimator)
 
-    html_output = _estimator_repr_html(clf)
+    html_output = estimator_repr_html(clf)
 
     assert str(clf) in html_output
     if final_estimator is None:
-        assert "LogisticRegression()" in html_output
+        assert "LogisticRegression(" in html_output
     else:
         assert final_estimator.__class__.__name__ in html_output
 
@@ -207,7 +205,7 @@ def test_stacking_classsifer(final_estimator):
 def test_stacking_regressor(final_estimator):
     reg = StackingRegressor(
         estimators=[('svr', LinearSVR())], final_estimator=final_estimator)
-    html_output = _estimator_repr_html(reg)
+    html_output = estimator_repr_html(reg)
 
     assert str(reg.estimators[0][0]) in html_output
     assert "LinearSVR" in html_output
@@ -220,12 +218,10 @@ def test_stacking_regressor(final_estimator):
 def test_birch_duck_typing_meta():
     # Test duck typing meta estimators with Birch
     birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
-    html_output = _estimator_repr_html(birch)
+    html_output = estimator_repr_html(birch)
 
-    # inner estimator shows only the changes
-    with config_context(print_changed_only=True):
-        assert f"{str(birch.n_clusters)}" in html_output
-        assert "AgglomerativeClustering" in html_output
+    assert f"{str(birch.n_clusters)}" in html_output
+    assert "AgglomerativeClustering" in html_output
 
     # outer estimator contains all changes
     assert f"{str(birch)}" in html_output
@@ -234,12 +230,10 @@ def test_birch_duck_typing_meta():
 def test_ovo_classifier_duck_typing_meta():
     # Test duck typing metaestimators with OVO
     ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
-    html_output = _estimator_repr_html(ovo)
+    html_output = estimator_repr_html(ovo)
 
-    # inner estimator shows only the changes
-    with config_context(print_changed_only=True):
-        assert f"{str(ovo.estimator)}" in html_output
-        assert "LinearSVC" in html_output
+    assert f"{str(ovo.estimator)}" in html_output
+    assert "LinearSVC" in html_output
 
     # outter estimator
     assert f"{str(ovo)}" in html_output
@@ -249,11 +243,9 @@ def test_duck_typing_nested_estimator():
     # Test duck typing metaestimators with GP
     kernel = RationalQuadratic(length_scale=1.0, alpha=0.1)
     gp = GaussianProcessRegressor(kernel=kernel)
-    html_output = _estimator_repr_html(gp)
-
-    with config_context(print_changed_only=True):
-        assert f"{str(kernel)}" in html_output
+    html_output = estimator_repr_html(gp)
 
+    assert f"{str(kernel)}" in html_output
     assert f"{str(gp)}" in html_output
 
 
@@ -263,5 +255,5 @@ def test_one_estimator_print_change_only(print_changed_only):
 
     with config_context(print_changed_only=print_changed_only):
         pca_repr = str(pca)
-        html_output = _estimator_repr_html(pca)
+        html_output = estimator_repr_html(pca)
         assert pca_repr in html_output

From 17f05e848798b2b5387a77e4be693fc374a5a509 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:39:18 -0400
Subject: [PATCH 60/81] BUG Fix

---
 sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index fd2d5b9bfe110..19e8310f583eb 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -450,7 +450,7 @@ def _repr_html_(self):
         """
         repr_html = get_config()["repr_html"]
         if repr_html:
-            return estimator_repr_htmlself)
+            return estimator_repr_html(self)
         return repr(self)
 
 

From 47d72ba85123c6cf5a7c459fde6367382f35d175 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:40:15 -0400
Subject: [PATCH 61/81] DOC Use function

---
 doc/modules/compose.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 78f7dc2f63c29..71d371963b646 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -569,8 +569,9 @@ many estimators. An example of the HTML output can be seen below.
 As an alternative, the HTML can be written to a file using the `_repr_html_`
 method::
 
-   with config_context(repr_html=True), open('my_estimator.html', 'w') as f:
-       f.write(clf._repr_html_())
+   from sklearn.utils import estimator_repr_html
+   with open('my_estimator.html', 'w') as f:
+       f.write(estimator_repr_html(clf))
 
 .. topic:: Examples:
 

From bae645bd786d697c19c77ea876d35bc52c76838d Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Sun, 26 Apr 2020 22:40:55 -0400
Subject: [PATCH 62/81] REV Less diffs

---
 examples/compose/plot_column_transformer.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index 09877114124ac..0cfc9f5de0054 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -115,13 +115,7 @@ def transform(self, posts):
     ('svc', LinearSVC(dual=False)),
 ], verbose=True)
 
-###############################################################################
-# Classification Report
-###############################################################################
-# Finally, the pipeline is trained and a classification report is generated
-# on a testing subset. We limit the list of categories to make running this
-# example faster.
-
+# limit the list of categories to make running this example faster.
 categories = ['alt.atheism', 'talk.religion.misc']
 X_train, y_train = fetch_20newsgroups(random_state=1,
                                       subset='train',

From c616802df4a6865f3862ea97550382ea4c35ad8f Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 08:07:55 -0400
Subject: [PATCH 63/81] REV Remove

---
 sklearn/utils/_display_estimator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index ab5c9745ca929..5f4438d8f6ac4 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -1,4 +1,3 @@
-from sklearn._config import config_context
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO

From 1fe69faaa436ca10cf402535b159062122785750 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 08:38:11 -0400
Subject: [PATCH 64/81] BUG Fix

---
 sklearn/base.py                               | 2 +-
 sklearn/utils/tests/test_display_estimator.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 19e8310f583eb..8b415cc3d5ed4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -451,7 +451,7 @@ def _repr_html_(self):
         repr_html = get_config()["repr_html"]
         if repr_html:
             return estimator_repr_html(self)
-        return repr(self)
+        return f"{repr(self)}"
 
 
 class ClassifierMixin:
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index 152062f5ec168..c4593ce3cc730 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -64,6 +64,10 @@ def test_get_visual_block_single_estimator():
     assert est_html_info.names == est.__class__.__name__
     assert est_html_info.name_details == str(est)
 
+    # wraps with code block
+    expected_str = f"{repr(est)}"
+    assert est._repr_html_() == expected_str
+
 
 def test_get_visual_block_pipeline():
     pipe = Pipeline([

From 8d23d5b3e23cf625739a620679a74f031bdb9929 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 09:28:17 -0400
Subject: [PATCH 65/81] REV Inner estimators do not show changes

---
 sklearn/utils/_display_estimator.py           |  7 ++-
 sklearn/utils/tests/test_display_estimator.py | 52 +++++++++++--------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 5f4438d8f6ac4..01c417c170d92 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -1,3 +1,4 @@
+from sklearn import config_context
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO
@@ -104,7 +105,11 @@ def _write_estimator_html(out, estimator, estimator_label,
                           estimator_label_details, first_call=False):
     """Write estimator to html in serial, parallel, or by itself (single).
     """
-    est_block = _get_visual_block(estimator)
+    if first_call:
+        est_block = _get_visual_block(estimator)
+    else:
+        with config_context(print_changed_only=True):
+            est_block = _get_visual_block(estimator)
 
     if est_block.kind in ('serial', 'parallel'):
         dashed_wrapped = first_call or est_block.dash_wrapped
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index c4593ce3cc730..5eb6ad732d2ce 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -163,30 +163,32 @@ def test_display_estimator_pipeline():
         assert (f""
                 f"{str(est)}") in html_output
 
-    assert str(num_trans['pass']) in html_output
-    assert 'passthrough' in html_output
-    assert str(num_trans['imputer']) in html_output
+    # low level estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert str(num_trans['pass']) in html_output
+        assert 'passthrough' in html_output
+        assert str(num_trans['imputer']) in html_output
 
-    for _, _, cols in preprocess.transformers:
-        assert f"{cols}" in html_output
+        for _, _, cols in preprocess.transformers:
+            assert f"{cols}" in html_output
 
-    # feature union
-    for name, _ in feat_u.transformer_list:
-        assert f"{name}" in html_output
+        # feature union
+        for name, _ in feat_u.transformer_list:
+            assert f"{name}" in html_output
 
-    pca = feat_u.transformer_list[0][1]
-    assert f"{str(pca)}" in html_output
+        pca = feat_u.transformer_list[0][1]
+        assert f"{str(pca)}" in html_output
 
-    tsvd = feat_u.transformer_list[1][1]
-    first = tsvd['first']
-    select = tsvd['select']
-    assert f"{str(first)}" in html_output
-    assert f"{str(select)}" in html_output
+        tsvd = feat_u.transformer_list[1][1]
+        first = tsvd['first']
+        select = tsvd['select']
+        assert f"{str(first)}" in html_output
+        assert f"{str(select)}" in html_output
 
-    # voting classifer
-    for name, est in clf.estimators:
-        assert f"{name}" in html_output
-        assert f"{str(est)}" in html_output
+        # voting classifer
+        for name, est in clf.estimators:
+            assert f"{name}" in html_output
+            assert f"{str(est)}" in html_output
 
 
 @pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
@@ -224,8 +226,10 @@ def test_birch_duck_typing_meta():
     birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
     html_output = estimator_repr_html(birch)
 
-    assert f"{str(birch.n_clusters)}" in html_output
-    assert "AgglomerativeClustering" in html_output
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"{str(birch.n_clusters)}" in html_output
+        assert "AgglomerativeClustering" in html_output
 
     # outer estimator contains all changes
     assert f"{str(birch)}" in html_output
@@ -236,8 +240,10 @@ def test_ovo_classifier_duck_typing_meta():
     ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
     html_output = estimator_repr_html(ovo)
 
-    assert f"{str(ovo.estimator)}" in html_output
-    assert "LinearSVC" in html_output
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"{str(ovo.estimator)}" in html_output
+        assert "LinearSVC" in html_output
 
     # outter estimator
     assert f"{str(ovo)}" in html_output

From 3d41cafccea2cc9376ef06cbc9580328ebf263ea Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 11:35:18 -0400
Subject: [PATCH 66/81] ENH Uses _repr_mimebundle_

---
 sklearn/base.py                               | 20 +++++--------------
 sklearn/utils/_display_estimator.py           |  3 +--
 sklearn/utils/tests/test_display_estimator.py |  4 ----
 3 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 8b415cc3d5ed4..0e2fff59007ef 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -437,21 +437,11 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
-    def _repr_html_(self):
-        """HTML or string representation of an estimator depending on
-        global configuration flag `repr_html`.
-
-        Read more in the :ref:`User Guide `.
-
-        Returns
-        -------
-        repr: str
-            HTML or string representation of estimator.
-        """
-        repr_html = get_config()["repr_html"]
-        if repr_html:
-            return estimator_repr_html(self)
-        return f"{repr(self)}"
+    def _repr_mimebundle_(self, **kwargs):
+        output = {"text/plain": repr(self)}
+        if get_config()["repr_html"]:
+            output["text/html"] = estimator_repr_html(self)
+        return output
 
 
 class ClassifierMixin:
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_display_estimator.py
index 01c417c170d92..914ac0fe6fefb 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_display_estimator.py
@@ -299,8 +299,7 @@ def estimator_repr_html(estimator):
     """
     with closing(StringIO()) as out:
         out.write(f''
-                  f'sklearn-viz'
-                  f''
+                  f''
                   f'')
         _write_estimator_html(out, estimator, estimator.__class__.__name__,
                               str(estimator), first_call=True)
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_display_estimator.py
index 5eb6ad732d2ce..97869aa8ca2b4 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_display_estimator.py
@@ -64,10 +64,6 @@ def test_get_visual_block_single_estimator():
     assert est_html_info.names == est.__class__.__name__
     assert est_html_info.name_details == str(est)
 
-    # wraps with code block
-    expected_str = f"{repr(est)}"
-    assert est._repr_html_() == expected_str
-
 
 def test_get_visual_block_pipeline():
     pipe = Pipeline([

From 1cc87b6a9a3c5d0886d8c84762229b94082cafd7 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 11:47:31 -0400
Subject: [PATCH 67/81] CLN Updates file names

---
 sklearn/base.py                                    |  2 +-
 sklearn/compose/_column_transformer.py             |  2 +-
 sklearn/ensemble/_stacking.py                      |  2 +-
 sklearn/ensemble/_voting.py                        |  2 +-
 sklearn/pipeline.py                                |  2 +-
 sklearn/tests/test_base.py                         | 14 ++++++++++++++
 sklearn/utils/__init__.py                          |  2 +-
 ...isplay_estimator.py => _estimator_html_repr.py} |  9 ++++++---
 ...ay_estimator.py => test_estimator_html_repr.py} |  8 ++++----
 9 files changed, 30 insertions(+), 13 deletions(-)
 rename sklearn/utils/{_display_estimator.py => _estimator_html_repr.py} (97%)
 rename sklearn/utils/tests/{test_display_estimator.py => test_estimator_html_repr.py} (97%)

diff --git a/sklearn/base.py b/sklearn/base.py
index 0e2fff59007ef..ae74368851226 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -21,7 +21,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from .utils._display_estimator import estimator_repr_html
+from .utils._estimator_html_repr import estimator_repr_html
 from .utils.validation import _deprecate_positional_args
 
 _DEFAULT_TAGS = {
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 261128fcfd3bf..f148633021a97 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -15,7 +15,7 @@
 from joblib import Parallel, delayed
 
 from ..base import clone, TransformerMixin
-from ..utils._display_estimator import _VisualBlock
+from ..utils._estimator_html_repr import _VisualBlock
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index ac2e79638096e..91431f805aa0f 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -13,7 +13,7 @@
 from ..base import clone
 from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
 from ..base import is_classifier, is_regressor
-from ..utils._display_estimator import _VisualBlock
+from ..utils._estimator_html_repr import _VisualBlock
 
 from ._base import _fit_single_estimator
 from ._base import _BaseHeterogeneousEnsemble
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index b866962ed53b2..5105619e17d17 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -32,7 +32,7 @@
 from ..utils.validation import column_or_1d
 from ..utils.validation import _deprecate_positional_args
 from ..exceptions import NotFittedError
-from ..utils._display_estimator import _VisualBlock
+from ..utils._estimator_html_repr import _VisualBlock
 
 
 class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 22471a76b3b67..6f02cb565e15c 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -18,7 +18,7 @@
 from joblib import Parallel, delayed
 
 from .base import clone, TransformerMixin
-from .utils._display_estimator import _VisualBlock
+from .utils._estimator_html_repr import _VisualBlock
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch, _print_elapsed_time
 from .utils.validation import check_memory
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 95f7b01f27058..f12c07d49090e 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -23,6 +23,7 @@
 
 from sklearn.base import TransformerMixin
 from sklearn.utils._mocking import MockDataFrame
+from sklearn import config_context
 import pickle
 
 
@@ -511,3 +512,16 @@ def fit(self, X, y=None):
         params = est.get_params()
 
     assert params['param'] is None
+
+
+def test_repr_mimebundle_():
+    # Checks the repr_html configuration flag controls the json output
+    tree = DecisionTreeClassifier()
+    output = tree._repr_mimebundle_()
+    assert "text/plain" in output
+    assert "text/html" not in output
+
+    with config_context(repr_html=True):
+        output = tree._repr_mimebundle_()
+        assert "text/plain" in output
+        assert "text/html" in output
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index acffddf7ccf56..1c36c69919ce3 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -25,7 +25,7 @@
 from ..exceptions import DataConversionWarning
 from .deprecation import deprecated
 from .fixes import np_version
-from ._display_estimator import estimator_repr_html
+from ._estimator_html_repr import estimator_repr_html
 from .validation import (as_float_array,
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
diff --git a/sklearn/utils/_display_estimator.py b/sklearn/utils/_estimator_html_repr.py
similarity index 97%
rename from sklearn/utils/_display_estimator.py
rename to sklearn/utils/_estimator_html_repr.py
index 914ac0fe6fefb..84b071f61a955 100644
--- a/sklearn/utils/_display_estimator.py
+++ b/sklearn/utils/_estimator_html_repr.py
@@ -1,8 +1,10 @@
-from sklearn import config_context
 from contextlib import closing
 from contextlib import suppress
 from io import StringIO
 import uuid
+import html
+
+from sklearn import config_context
 
 
 class _VisualBlock:
@@ -98,7 +100,7 @@ def _get_visual_block(estimator):
 
     return _VisualBlock('single', estimator,
                         names=estimator.__class__.__name__,
-                        name_details=str(estimator))
+                        name_details=html.escape(str(estimator), quote=False))
 
 
 def _write_estimator_html(out, estimator, estimator_label,
@@ -302,7 +304,8 @@ def estimator_repr_html(estimator):
                   f''
                   f'')
         _write_estimator_html(out, estimator, estimator.__class__.__name__,
-                              str(estimator), first_call=True)
+                              html.escape(str(estimator), quote=False),
+                              first_call=True)
         out.write('')
 
         html_output = out.getvalue()
diff --git a/sklearn/utils/tests/test_display_estimator.py b/sklearn/utils/tests/test_estimator_html_repr.py
similarity index 97%
rename from sklearn/utils/tests/test_display_estimator.py
rename to sklearn/utils/tests/test_estimator_html_repr.py
index 97869aa8ca2b4..af6e6bcb44c8c 100644
--- a/sklearn/utils/tests/test_display_estimator.py
+++ b/sklearn/utils/tests/test_estimator_html_repr.py
@@ -25,9 +25,9 @@
 from sklearn.ensemble import StackingRegressor
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import RationalQuadratic
-from sklearn.utils._display_estimator import _write_label_html
-from sklearn.utils._display_estimator import _get_visual_block
-from sklearn.utils._display_estimator import estimator_repr_html
+from sklearn.utils._estimator_html_repr import _write_label_html
+from sklearn.utils._estimator_html_repr import _get_visual_block
+from sklearn.utils._estimator_html_repr import estimator_repr_html
 
 
 @pytest.mark.parametrize("checked", [True, False])
@@ -120,7 +120,7 @@ def test_get_visual_block_column_transformer():
     assert est_html_info.name_details == (['num1', 'num2'], [0, 3])
 
 
-def test_display_estimator_pipeline():
+def test_estimator_html_repr_pipeline():
     num_trans = Pipeline(steps=[
         ('pass', 'passthrough'),
         ('imputer', SimpleImputer(strategy='median'))

From 3af9151de16436b40f0a2f8766ef73d9a5ef7ba4 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 12:24:43 -0400
Subject: [PATCH 68/81] DOC Remove sphinx extension

---
 doc/conf.py                            |  1 -
 doc/modules/compose.rst                | 39 +++------------
 doc/sphinxext/display_est_repr_html.py | 69 --------------------------
 3 files changed, 6 insertions(+), 103 deletions(-)
 delete mode 100644 doc/sphinxext/display_est_repr_html.py

diff --git a/doc/conf.py b/doc/conf.py
index 22f9061c8a9c3..a824ab21b33e5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -39,7 +39,6 @@
     'sphinx.ext.imgconverter',
     'sphinx_gallery.gen_gallery',
     'sphinx_issues',
-    'display_est_repr_html'
 ]
 
 # this is needed for some reason...
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 71d371963b646..805dba4334f36 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -535,41 +535,14 @@ Visualizing Composite Estimators
 
 By default, estimators are displayed with a HTML representation when shown in a
 jupyter notebook. This can be useful to diagnose or visualize a Pipeline with
-many estimators. An example of the HTML output can be seen below.
-
-.. display_estimator_repr_html::
-
-   from sklearn import config_context
-   from sklearn.compose import ColumnTransformer
-   from sklearn.pipeline import Pipeline
-   from sklearn.impute import SimpleImputer
-   from sklearn.preprocessing import StandardScaler, OneHotEncoder
-   from sklearn.linear_model import LogisticRegression
-
-   numeric_features = ['age', 'fare']
-   numeric_transformer = Pipeline(steps=[
-       ('imputer', SimpleImputer(strategy='median')),
-       ('scaler', StandardScaler())])
-
-   categorical_features = ['embarked', 'sex', 'pclass']
-   categorical_transformer = Pipeline(steps=[
-    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
-       ('onehot', OneHotEncoder(handle_unknown='ignore'))])
-
-   preprocessor = ColumnTransformer(
-       transformers=[
-           ('num', numeric_transformer, numeric_features),
-           ('cat', categorical_transformer, categorical_features)])
-
-   clf = Pipeline(steps=[('preprocessor', preprocessor),
-                          ('classifier', LogisticRegression())])
-   with config_context(repr_html=True):
-       clf
-
-As an alternative, the HTML can be written to a file using the `_repr_html_`
-method::
+many estimators. An example of the HTML output is shown in the
+**HTML representation of Pipeline** section of 
+:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
+As an alternative, the HTML can be written to a file using
+:func:`~sklearn.utils.estimator_repr_html`::
 
    from sklearn.utils import estimator_repr_html
+
    with open('my_estimator.html', 'w') as f:
        f.write(estimator_repr_html(clf))
 
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
deleted file mode 100644
index 676b87c284b6d..0000000000000
--- a/doc/sphinxext/display_est_repr_html.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Primarily used to display the html output of `_repr_html_` of estimators
-"""
-from sphinx.util.docutils import SphinxDirective
-from contextlib import redirect_stderr, redirect_stdout
-from docutils import nodes
-from io import StringIO
-
-
-class DisplayEstimatorRepr(SphinxDirective):
-    """Execute Python and runs `_repr_html_` on the last element on the code
-    block. The last element in the code block should be an estimator with
-    support for `_repr_html_`.
-    """
-
-    has_content = True
-    required_arguments = 0
-    optional_arguments = 0
-
-    def execute(self, code, format_str):
-        code_parts = code.split('\n')
-        final_output = code_parts[-1]
-        final_est = final_output.lstrip(' ')
-        n_whitespace = len(final_output) - len(final_est)
-        code_parts[-1] = " " * n_whitespace + format_str.format(final_est)
-        code = '\n'.join(code_parts)
-
-        output, err = StringIO(), StringIO()
-        with redirect_stdout(output), redirect_stderr(err):
-            exec(code)
-
-        return f"{output.getvalue()}{err.getvalue()}"
-
-    def run(self):
-        output = []
-        code = "\n".join(self.content)
-        repr_html = self.execute(code, format_str='print({}._repr_html_())')
-
-        input_code = nodes.literal_block(rawsource=code, text=code)
-        input_code['language'] = 'python'
-        output.append(input_code)
-
-        repr_html = f"{repr_html}"
-        html_node = nodes.raw('', repr_html, format='html')
-        output.append(html_node)
-
-        if self.env.app.builder.name == 'latex':
-            code_results_latex = r"""
-            \begin{sphinxadmonition}{note}{Note:}
-            The HTML output of this code snippet can only been seen on the HTML
-            version of the documentation. The following is a string
-            representation.
-            \end{sphinxadmonition}
-            """
-            latex_node = nodes.raw('', code_results_latex, format='latex')
-            output.append(latex_node)
-
-            str_repr = self.execute(code, format_str='print(repr({}))')
-            str_repr_node = nodes.literal_block(rawsource=str_repr,
-                                                text=str_repr)
-            str_repr_node['language'] = 'python'
-            output.append(str_repr_node)
-
-        return output
-
-
-def setup(app):
-    app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr)
-    return {'parallel_read_safe': True, 'parallel_write_safe': True}

From 0b4a64dc4dc122f809250978c79cd1647dde6e75 Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 16:06:51 -0400
Subject: [PATCH 69/81] CLN Uses None

---
 doc/modules/classes.rst                       |  2 +-
 doc/modules/compose.rst                       |  6 ++---
 sklearn/base.py                               | 11 +++++-----
 sklearn/compose/_column_transformer.py        |  2 +-
 sklearn/ensemble/_stacking.py                 |  2 +-
 sklearn/ensemble/_voting.py                   |  2 +-
 sklearn/pipeline.py                           |  2 +-
 sklearn/tests/test_base.py                    | 12 +++++-----
 sklearn/utils/__init__.py                     |  4 ++--
 ...tml_repr.py => _display_estimator_html.py} | 13 +++++------
 ...repr.py => test_display_estimator_html.py} | 22 +++++++++----------
 11 files changed, 39 insertions(+), 39 deletions(-)
 rename sklearn/utils/{_estimator_html_repr.py => _display_estimator_html.py} (95%)
 rename sklearn/utils/tests/{test_estimator_html_repr.py => test_display_estimator_html.py} (93%)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index f9904ad56b100..be12641ab4a7b 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1569,7 +1569,7 @@ Plotting
    utils.deprecated
    utils.estimator_checks.check_estimator
    utils.estimator_checks.parametrize_with_checks
-   utils.estimator_repr_html
+   utils.display_estimator_html
    utils.extmath.safe_sparse_dot
    utils.extmath.randomized_range_finder
    utils.extmath.randomized_svd
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 805dba4334f36..3d9a08fe395aa 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -539,12 +539,12 @@ many estimators. An example of the HTML output is shown in the
 **HTML representation of Pipeline** section of 
 :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
 As an alternative, the HTML can be written to a file using
-:func:`~sklearn.utils.estimator_repr_html`::
+:func:`~sklearn.utils.display_estimator_html`::
 
-   from sklearn.utils import estimator_repr_html
+   from sklearn.utils import display_estimator_html
 
    with open('my_estimator.html', 'w') as f:
-       f.write(estimator_repr_html(clf))
+       f.write(display_estimator_html(clf))
 
 .. topic:: Examples:
 
diff --git a/sklearn/base.py b/sklearn/base.py
index ae74368851226..a4074e1d42d3b 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -13,6 +13,7 @@
 import platform
 import inspect
 import re
+import html
 
 import numpy as np
 
@@ -21,7 +22,7 @@
 from .utils import _IS_32BIT
 from .utils.validation import check_X_y
 from .utils.validation import check_array
-from .utils._estimator_html_repr import estimator_repr_html
+from .utils._display_estimator_html import display_estimator_html
 from .utils.validation import _deprecate_positional_args
 
 _DEFAULT_TAGS = {
@@ -437,11 +438,11 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
-    def _repr_mimebundle_(self, **kwargs):
-        output = {"text/plain": repr(self)}
+    def _repr_html_(self):
+        """HTML representation of estimator."""
         if get_config()["repr_html"]:
-            output["text/html"] = estimator_repr_html(self)
-        return output
+            return display_estimator_html(self)
+        return None
 
 
 class ClassifierMixin:
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index f148633021a97..f354672c42e9b 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -15,7 +15,7 @@
 from joblib import Parallel, delayed
 
 from ..base import clone, TransformerMixin
-from ..utils._estimator_html_repr import _VisualBlock
+from ..utils._display_estimator_html import _VisualBlock
 from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
 from ..preprocessing import FunctionTransformer
 from ..utils import Bunch
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index 91431f805aa0f..870757f5b9497 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -13,7 +13,7 @@
 from ..base import clone
 from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
 from ..base import is_classifier, is_regressor
-from ..utils._estimator_html_repr import _VisualBlock
+from ..utils._display_estimator_html import _VisualBlock
 
 from ._base import _fit_single_estimator
 from ._base import _BaseHeterogeneousEnsemble
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index 5105619e17d17..c85a6c86d2dfd 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -32,7 +32,7 @@
 from ..utils.validation import column_or_1d
 from ..utils.validation import _deprecate_positional_args
 from ..exceptions import NotFittedError
-from ..utils._estimator_html_repr import _VisualBlock
+from ..utils._display_estimator_html import _VisualBlock
 
 
 class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 6f02cb565e15c..b289607c54be9 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -18,7 +18,7 @@
 from joblib import Parallel, delayed
 
 from .base import clone, TransformerMixin
-from .utils._estimator_html_repr import _VisualBlock
+from .utils._display_estimator_html import _VisualBlock
 from .utils.metaestimators import if_delegate_has_method
 from .utils import Bunch, _print_elapsed_time
 from .utils.validation import check_memory
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index f12c07d49090e..96c82c7b0ef3f 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -1,5 +1,6 @@
 # Author: Gael Varoquaux
 # License: BSD 3 clause
+import html
 
 import numpy as np
 import scipy.sparse as sp
@@ -517,11 +518,10 @@ def fit(self, X, y=None):
 def test_repr_mimebundle_():
     # Checks the repr_html configuration flag controls the json output
     tree = DecisionTreeClassifier()
-    output = tree._repr_mimebundle_()
-    assert "text/plain" in output
-    assert "text/html" not in output
+    output = tree._repr_html_()
+    assert output == f"{html.escape(str(tree))}"
 
     with config_context(repr_html=True):
-        output = tree._repr_mimebundle_()
-        assert "text/plain" in output
-        assert "text/html" in output
+        output = tree._repr_html_()
+        # html output
+        assert "'
+        out.write(f''
                   f'')
         _write_estimator_html(out, estimator, estimator.__class__.__name__,
-                              html.escape(str(estimator), quote=False),
-                              first_call=True)
-        out.write('')
+                              str(estimator), first_call=True)
+        out.write('')
 
         html_output = out.getvalue()
         return html_output
diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_display_estimator_html.py
similarity index 93%
rename from sklearn/utils/tests/test_estimator_html_repr.py
rename to sklearn/utils/tests/test_display_estimator_html.py
index af6e6bcb44c8c..77bad73d753e0 100644
--- a/sklearn/utils/tests/test_estimator_html_repr.py
+++ b/sklearn/utils/tests/test_display_estimator_html.py
@@ -25,9 +25,9 @@
 from sklearn.ensemble import StackingRegressor
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import RationalQuadratic
-from sklearn.utils._estimator_html_repr import _write_label_html
-from sklearn.utils._estimator_html_repr import _get_visual_block
-from sklearn.utils._estimator_html_repr import estimator_repr_html
+from sklearn.utils._display_estimator_html import _write_label_html
+from sklearn.utils._display_estimator_html import _get_visual_block
+from sklearn.utils._display_estimator_html import display_estimator_html
 
 
 @pytest.mark.parametrize("checked", [True, False])
@@ -120,7 +120,7 @@ def test_get_visual_block_column_transformer():
     assert est_html_info.name_details == (['num1', 'num2'], [0, 3])
 
 
-def test_estimator_html_repr_pipeline():
+def test_display_estimator_html_pipeline():
     num_trans = Pipeline(steps=[
         ('pass', 'passthrough'),
         ('imputer', SimpleImputer(strategy='median'))
@@ -151,7 +151,7 @@ def test_estimator_html_repr_pipeline():
     pipe = Pipeline([
         ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf)
     ])
-    html_output = estimator_repr_html(pipe)
+    html_output = display_estimator_html(pipe)
 
     # top level estimators show estimator with changes
     assert str(pipe) in html_output
@@ -194,7 +194,7 @@ def test_stacking_classsifer(final_estimator):
     clf = StackingClassifier(
         estimators=estimators, final_estimator=final_estimator)
 
-    html_output = estimator_repr_html(clf)
+    html_output = display_estimator_html(clf)
 
     assert str(clf) in html_output
     if final_estimator is None:
@@ -207,7 +207,7 @@ def test_stacking_classsifer(final_estimator):
 def test_stacking_regressor(final_estimator):
     reg = StackingRegressor(
         estimators=[('svr', LinearSVR())], final_estimator=final_estimator)
-    html_output = estimator_repr_html(reg)
+    html_output = display_estimator_html(reg)
 
     assert str(reg.estimators[0][0]) in html_output
     assert "LinearSVR" in html_output
@@ -220,7 +220,7 @@ def test_stacking_regressor(final_estimator):
 def test_birch_duck_typing_meta():
     # Test duck typing meta estimators with Birch
     birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
-    html_output = estimator_repr_html(birch)
+    html_output = display_estimator_html(birch)
 
     # inner estimators do not show changes
     with config_context(print_changed_only=True):
@@ -234,7 +234,7 @@ def test_birch_duck_typing_meta():
 def test_ovo_classifier_duck_typing_meta():
     # Test duck typing metaestimators with OVO
     ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
-    html_output = estimator_repr_html(ovo)
+    html_output = display_estimator_html(ovo)
 
     # inner estimators do not show changes
     with config_context(print_changed_only=True):
@@ -249,7 +249,7 @@ def test_duck_typing_nested_estimator():
     # Test duck typing metaestimators with GP
     kernel = RationalQuadratic(length_scale=1.0, alpha=0.1)
     gp = GaussianProcessRegressor(kernel=kernel)
-    html_output = estimator_repr_html(gp)
+    html_output = display_estimator_html(gp)
 
     assert f"{str(kernel)}" in html_output
     assert f"{str(gp)}" in html_output
@@ -261,5 +261,5 @@ def test_one_estimator_print_change_only(print_changed_only):
 
     with config_context(print_changed_only=print_changed_only):
         pca_repr = str(pca)
-        html_output = estimator_repr_html(pca)
+        html_output = display_estimator_html(pca)
         assert pca_repr in html_output

From 689d3f29c42a608a00d504f1c3a442d6d9691abe Mon Sep 17 00:00:00 2001
From: Thomas J Fan 
Date: Mon, 27 Apr 2020 16:37:02 -0400
Subject: [PATCH 70/81] ENH Uses _repr_mimebundle_

---
 doc/conf.py                                   |  1 +
 doc/modules/compose.rst                       | 38 ++++++++--
 doc/sphinxext/display_est_repr_html.py        | 70 +++++++++++++++++++
 .../plot_column_transformer_mixed_types.py    |  8 ---
 sklearn/base.py                               |  9 ++-
 sklearn/tests/test_base.py                    | 12 ++--
 6 files changed, 114 insertions(+), 24 deletions(-)
 create mode 100644 doc/sphinxext/display_est_repr_html.py

diff --git a/doc/conf.py b/doc/conf.py
index a824ab21b33e5..f081f16ef3f47 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -39,6 +39,7 @@
     'sphinx.ext.imgconverter',
     'sphinx_gallery.gen_gallery',
     'sphinx_issues',
+    'display_est_repr_html',
 ]
 
 # this is needed for some reason...
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 3d9a08fe395aa..665842fd40cd7 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -535,11 +535,39 @@ Visualizing Composite Estimators
 
 By default, estimators are displayed with a HTML representation when shown in a
 jupyter notebook. This can be useful to diagnose or visualize a Pipeline with
-many estimators. An example of the HTML output is shown in the
-**HTML representation of Pipeline** section of 
-:ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
-As an alternative, the HTML can be written to a file using
-:func:`~sklearn.utils.display_estimator_html`::
+many estimators. An example of the HTML output can be seen below.
+
+.. display_estimator_repr_html::
+
+    from sklearn import config_context
+    from sklearn.compose import ColumnTransformer
+    from sklearn.pipeline import Pipeline
+    from sklearn.impute import SimpleImputer
+    from sklearn.preprocessing import StandardScaler, OneHotEncoder
+    from sklearn.linear_model import LogisticRegression
+
+    numeric_features = ['age', 'fare']
+    numeric_transformer = Pipeline(steps=[
+        ('imputer', SimpleImputer(strategy='median')),
+        ('scaler', StandardScaler())])
+
+    categorical_features = ['embarked', 'sex', 'pclass']
+    categorical_transformer = Pipeline(steps=[
+     ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
+        ('onehot', OneHotEncoder(handle_unknown='ignore'))])
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ('num', numeric_transformer, numeric_features),
+            ('cat', categorical_transformer, categorical_features)])
+
+    clf = Pipeline(steps=[('preprocessor', preprocessor),
+                           ('classifier', LogisticRegression())])
+    with config_context(repr_html=True):
+        clf
+
+As an alternative, the HTML can be written to a file using the `_repr_html_`
+method::
 
    from sklearn.utils import display_estimator_html
 
diff --git a/doc/sphinxext/display_est_repr_html.py b/doc/sphinxext/display_est_repr_html.py
new file mode 100644
index 0000000000000..6ef8801758b74
--- /dev/null
+++ b/doc/sphinxext/display_est_repr_html.py
@@ -0,0 +1,70 @@
+"""
+Primarily used to display the html output of `_repr_html_` of estimators
+"""
+from sphinx.util.docutils import SphinxDirective
+from contextlib import redirect_stderr, redirect_stdout
+from docutils import nodes
+from io import StringIO
+
+
+class DisplayEstimatorRepr(SphinxDirective):
+    """Execute Python and runs `_repr_html_` on the last element on the code
+    block. The last element in the code block should be an estimator with
+    support for `_repr_html_`.
+    """
+
+    has_content = True
+    required_arguments = 0
+    optional_arguments = 0
+
+    def execute(self, code, format_str):
+        code_parts = code.split('\n')
+        final_output = code_parts[-1]
+        final_est = final_output.lstrip(' ')
+        n_whitespace = len(final_output) - len(final_est)
+        code_parts[-1] = " " * n_whitespace + format_str.format(final_est)
+        code = '\n'.join(code_parts)
+
+        output, err = StringIO(), StringIO()
+        with redirect_stdout(output), redirect_stderr(err):
+            exec(code)
+
+        return f"{output.getvalue()}{err.getvalue()}"
+
+    def run(self):
+        output = []
+        code = "\n".join(self.content)
+        repr_html = self.execute(
+            code, format_str='print({}._repr_mimebundle_()["text/html"])')
+
+        input_code = nodes.literal_block(rawsource=code, text=code)
+        input_code['language'] = 'python'
+        output.append(input_code)
+
+        repr_html = f"{repr_html}"
+        html_node = nodes.raw('', repr_html, format='html')
+        output.append(html_node)
+
+        if self.env.app.builder.name == 'latex':
+            code_results_latex = r"""
+            \begin{sphinxadmonition}{note}{Note:}
+            The HTML output of this code snippet can only been seen on the HTML
+            version of the documentation. The following is a string
+            representation.
+            \end{sphinxadmonition}
+            """
+            latex_node = nodes.raw('', code_results_latex, format='latex')
+            output.append(latex_node)
+
+            str_repr = self.execute(code, format_str='print(repr({}))')
+            str_repr_node = nodes.literal_block(rawsource=str_repr,
+                                                text=str_repr)
+            str_repr_node['language'] = 'python'
+            output.append(str_repr_node)
+
+        return output
+
+
+def setup(app):
+    app.add_directive('display_estimator_repr_html', DisplayEstimatorRepr)
+    return {'parallel_read_safe': True, 'parallel_write_safe': True}
diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 6c32d394aaa78..1d4023437917f 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -88,14 +88,6 @@
 clf.fit(X_train, y_train)
 print("model score: %.3f" % clf.score(X_test, y_test))
 
-###############################################################################
-# HTML representation of ``Pipeline``
-###############################################################################
-# When the ``Pipeline`` is printed out in a jupyter notebook an HTML
-# representation of the estimator is displayed as follows:
-set_config(repr_html=True)
-clf
-
 ###############################################################################
 # Use ``ColumnTransformer`` by selecting column by data types
 ###############################################################################
diff --git a/sklearn/base.py b/sklearn/base.py
index a4074e1d42d3b..b843a3b06eec0 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -13,7 +13,6 @@
 import platform
 import inspect
 import re
-import html
 
 import numpy as np
 
@@ -438,11 +437,11 @@ def _validate_data(self, X, y=None, reset=True,
 
         return out
 
-    def _repr_html_(self):
-        """HTML representation of estimator."""
+    def _repr_mimebundle_(self, **kwargs):
+        output = {"text/plain": repr(self)}
         if get_config()["repr_html"]:
-            return display_estimator_html(self)
-        return None
+            output["text/html"] = display_estimator_html(self)
+        return output
 
 
 class ClassifierMixin:
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 96c82c7b0ef3f..f12c07d49090e 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -1,6 +1,5 @@
 # Author: Gael Varoquaux
 # License: BSD 3 clause
-import html
 
 import numpy as np
 import scipy.sparse as sp
@@ -518,10 +517,11 @@ def fit(self, X, y=None):
 def test_repr_mimebundle_():
     # Checks the repr_html configuration flag controls the json output
     tree = DecisionTreeClassifier()
-    output = tree._repr_html_()
-    assert output == f"{html.escape(str(tree))}"
+    output = tree._repr_mimebundle_()
+    assert "text/plain" in output
+    assert "text/html" not in output
 
     with config_context(repr_html=True):
-        output = tree._repr_html_()
-        # html output
-        assert "