diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst index 4c54c496490f0..2a2b007783f27 100644 --- a/doc/modules/compose.rst +++ b/doc/modules/compose.rst @@ -567,14 +567,18 @@ will use the column names to select the columns:: Visualizing Composite Estimators ================================ -Estimators can be displayed with a HTML representation when shown in a -jupyter notebook. This can be useful to diagnose or visualize a Pipeline with -many estimators. This visualization is activated by setting the -`display` option in :func:`~sklearn.set_config`:: +Estimators are displayed with an HTML representation when shown in a +jupyter notebook. This is useful to diagnose or visualize a Pipeline with +many estimators. This visualization is activated by default:: + + >>> column_trans # doctest: +SKIP + +It can be deactivated by setting the `display` option in :func:`~sklearn.set_config` +to 'text':: >>> from sklearn import set_config - >>> set_config(display='diagram') # doctest: +SKIP - >>> # displays HTML representation in a jupyter context + >>> set_config(display='text') # doctest: +SKIP + >>> # displays text representation in a jupyter context >>> column_trans # doctest: +SKIP An example of the HTML output can be seen in the diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index abadb71b0c86d..963284a64bd5b 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -889,6 +889,11 @@ Changelog removed in version 1.3. Use :func:`utils.metaestimators.available_if` instead. :pr:`22830` by :user:`Jérémie du Boisberranger `. +- |API| Rich html representation of estimators is now enabled by default in Jupyter + notebooks. It can be deactivated by setting `display='text'` in + :func:`~sklearn.set_config`. + :pr:`22856` by `Jérémie du Boisberranger `. + Code and Documentation Contributors ----------------------------------- diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py index 307f0384ab1d2..afc0b07f5d126 100644 --- a/examples/compose/plot_column_transformer_mixed_types.py +++ b/examples/compose/plot_column_transformer_mixed_types.py @@ -28,6 +28,7 @@ # # License: BSD 3 clause +# %% import numpy as np from sklearn.compose import ColumnTransformer @@ -40,6 +41,7 @@ np.random.seed(0) +# %% # Load data from https://www.openml.org/d/40945 X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True) @@ -49,7 +51,7 @@ # %% # Use ``ColumnTransformer`` by selecting column by names -############################################################################### +# # We will train our classifier with the following features: # # Numeric Features: @@ -82,6 +84,7 @@ ] ) +# %% # Append classifier to preprocessing pipeline. # Now we have a full prediction pipeline. clf = Pipeline( @@ -95,17 +98,14 @@ # %% # HTML representation of ``Pipeline`` (display diagram) -############################################################################### +# # When the ``Pipeline`` is printed out in a jupyter notebook an HTML -# representation of the estimator is displayed as follows: -from sklearn import set_config - -set_config(display="diagram") +# representation of the estimator is displayed: clf # %% # Use ``ColumnTransformer`` by selecting column by data types -############################################################################### +# # When dealing with a cleaned dataset, the preprocessing can be automatic by # using the data types of the column to decide whether to treat a column as a # numerical or categorical feature. @@ -150,6 +150,7 @@ clf.fit(X_train, y_train) print("model score: %.3f" % clf.score(X_test, y_test)) +clf # %% # The resulting score is not exactly the same as the one from the previous @@ -164,7 +165,7 @@ # %% # Using the prediction pipeline in a grid search -############################################################################## +# # Grid search can also be performed on the different preprocessing steps # defined in the ``ColumnTransformer`` object, together with the classifier's # hyperparameters as part of the ``Pipeline``. diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py index 53c5d9b1a233c..409396a0376b8 100644 --- a/examples/ensemble/plot_feature_transformation.py +++ b/examples/ensemble/plot_feature_transformation.py @@ -25,10 +25,6 @@ # # License: BSD 3 clause -from sklearn import set_config - -set_config(display="diagram") - # %% # First, we will create a large dataset and split it into three sets: # diff --git a/examples/ensemble/plot_stack_predictors.py b/examples/ensemble/plot_stack_predictors.py index 959adfbcf4dfc..7737e91a0fdec 100644 --- a/examples/ensemble/plot_stack_predictors.py +++ b/examples/ensemble/plot_stack_predictors.py @@ -20,10 +20,6 @@ # Maria Telenczuk # License: BSD 3 clause -from sklearn import set_config - -set_config(display="diagram") - # %% # Download the dataset ############################################################################## diff --git a/examples/feature_selection/plot_feature_selection_pipeline.py b/examples/feature_selection/plot_feature_selection_pipeline.py index 22a8bfc8dd5c3..7a9a621aad5ee 100644 --- a/examples/feature_selection/plot_feature_selection_pipeline.py +++ b/examples/feature_selection/plot_feature_selection_pipeline.py @@ -10,10 +10,6 @@ """ -from sklearn import set_config - -set_config(display="diagram") - # %% # We will start by generating a binary classification dataset. Subsequently, we # will divide the dataset into two subsets. diff --git a/examples/linear_model/plot_lasso_lars_ic.py b/examples/linear_model/plot_lasso_lars_ic.py index 2f5392696ecc9..31a15b2ba582c 100644 --- a/examples/linear_model/plot_lasso_lars_ic.py +++ b/examples/linear_model/plot_lasso_lars_ic.py @@ -28,11 +28,6 @@ # Guillaume Lemaitre # License: BSD 3 clause -# %% -import sklearn - -sklearn.set_config(display="diagram") - # %% # We will use the diabetes dataset. from sklearn.datasets import load_diabetes diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py index 7cc05055b22d9..bf2111e32b427 100644 --- a/examples/linear_model/plot_lasso_model_selection.py +++ b/examples/linear_model/plot_lasso_model_selection.py @@ -19,11 +19,6 @@ # Guillaume Lemaitre # License: BSD 3 clause -# %% -import sklearn - -sklearn.set_config(display="diagram") - # %% # Dataset # ------- diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py index 0ba638afb0593..3e785551132b5 100755 --- a/examples/miscellaneous/plot_pipeline_display.py +++ b/examples/miscellaneous/plot_pipeline_display.py @@ -3,9 +3,9 @@ Displaying Pipelines ================================================================= -The default configuration for displaying a pipeline is `'text'` where -`set_config(display='text')`. To visualize the diagram in Jupyter Notebook, -use `set_config(display='diagram')` and then output the pipeline object. +The default configuration for displaying a pipeline in a Jupyter Notebook is +`'diagram'` where `set_config(display='diagram')`. To deactivate HTML representation, +use `set_config(display='text')`. To see more detailed steps in the visualization of the pipeline, click on the steps in the pipeline. @@ -31,14 +31,18 @@ pipe = Pipeline(steps) # %% -# To view the text pipeline, the default is `display='text'`. +# To visualize the diagram, the default is `display='diagram'`. +set_config(display="diagram") +pipe # click on the diagram below to see the details of each step + +# %% +# To view the text pipeline, change to `display='text'`. set_config(display="text") pipe # %% -# To visualize the diagram, change `display='diagram'`. +# Put back the default display set_config(display="diagram") -pipe # click on the diagram below to see the details of each step # %% # Displaying a Pipeline Chaining Multiple Preprocessing Steps & Classifier @@ -52,7 +56,6 @@ from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.linear_model import LogisticRegression -from sklearn import set_config steps = [ ("standard_scaler", StandardScaler()), @@ -60,10 +63,6 @@ ("classifier", LogisticRegression(C=2.0)), ] pipe = Pipeline(steps) - -# %% -# To visualize the diagram, change to display='diagram' -set_config(display="diagram") pipe # click on the diagram below to see the details of each step # %% @@ -77,14 +76,9 @@ from sklearn.pipeline import Pipeline from sklearn.svm import SVC from sklearn.decomposition import PCA -from sklearn import set_config steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))] pipe = Pipeline(steps) - -# %% -# To visualize the diagram, change to `display='diagram'`. -set_config(display="diagram") pipe # click on the diagram below to see the details of each step # %% @@ -102,7 +96,6 @@ from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.linear_model import LogisticRegression -from sklearn import set_config numeric_preprocessor = Pipeline( steps=[ @@ -129,10 +122,6 @@ ) pipe = make_pipeline(preprocessor, LogisticRegression(max_iter=500)) - -# %% -# To visualize the diagram, change to `display='diagram'` -set_config(display="diagram") pipe # click on the diagram below to see the details of each step # %% @@ -151,7 +140,6 @@ from sklearn.preprocessing import OneHotEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV -from sklearn import set_config numeric_preprocessor = Pipeline( steps=[ @@ -189,8 +177,4 @@ } grid_search = GridSearchCV(pipe, param_grid=param_grid, n_jobs=1) - -# %% -# To visualize the diagram, change to `display='diagram'`. -set_config(display="diagram") grid_search # click on the diagram below to see the details of each step diff --git a/sklearn/_config.py b/sklearn/_config.py index 6248025b05aa0..c865b879dbea3 100644 --- a/sklearn/_config.py +++ b/sklearn/_config.py @@ -8,7 +8,7 @@ "assume_finite": bool(os.environ.get("SKLEARN_ASSUME_FINITE", False)), "working_memory": int(os.environ.get("SKLEARN_WORKING_MEMORY", 1024)), "print_changed_only": True, - "display": "text", + "display": "diagram", "pairwise_dist_chunk_size": int( os.environ.get("SKLEARN_PAIRWISE_DIST_CHUNK_SIZE", 256) ), @@ -85,7 +85,7 @@ def set_config( display : {'text', 'diagram'}, default=None If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as - text. Default is 'text'. + text. Default is 'diagram'. .. versionadded:: 0.23 @@ -173,7 +173,7 @@ def config_context( If 'diagram', estimators will be displayed as a diagram in a Jupyter lab or notebook context. If 'text', estimators will be displayed as text. If None, the existing value won't change. - The default value is 'text'. + The default value is 'diagram'. .. versionadded:: 0.23 diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 2608b77622e9a..88a065fe79657 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -539,24 +539,25 @@ def test_repr_mimebundle_(): tree = DecisionTreeClassifier() output = tree._repr_mimebundle_() assert "text/plain" in output - assert "text/html" not in output + assert "text/html" in output - with config_context(display="diagram"): + with config_context(display="text"): output = tree._repr_mimebundle_() assert "text/plain" in output - assert "text/html" in output + assert "text/html" not in output def test_repr_html_wraps(): # Checks the display configuration flag controls the html output tree = DecisionTreeClassifier() - msg = "_repr_html_ is only defined when" - with pytest.raises(AttributeError, match=msg): - output = tree._repr_html_() - with config_context(display="diagram"): - output = tree._repr_html_() - assert "