MNT replace fetch_california_housing with make_regression in getting_started.rst and compose.rst (scikit-learn#31579)

virchan · web-flow · commit 4afccb9ecd10 · 2025-06-18T14:10:22.000+02:00
diff --git a/doc/getting_started.rst b/doc/getting_started.rst
@@ -167,13 +167,17 @@ a :class:`~sklearn.ensemble.RandomForestRegressor` that has been fitted with
 the best set of parameters. Read more in the :ref:`User Guide
 <grid_search>`::
 
-  >>> from sklearn.datasets import fetch_california_housing
+  >>> from sklearn.datasets import make_regression
   >>> from sklearn.ensemble import RandomForestRegressor
   >>> from sklearn.model_selection import RandomizedSearchCV
   >>> from sklearn.model_selection import train_test_split
   >>> from scipy.stats import randint
   ...
-  >>> X, y = fetch_california_housing(return_X_y=True)
+  >>> # create a synthetic dataset
+  >>> X, y = make_regression(n_samples=20640,
+  ...                        n_features=8,
+  ...                        noise=0.1,
+  ...                        random_state=0)
   >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
   ...
   >>> # define the parameter space that will be searched over
@@ -196,7 +200,7 @@ the best set of parameters. Read more in the :ref:`User Guide
   >>> # the search object now acts like a normal random forest estimator
   >>> # with max_depth=9 and n_estimators=4
   >>> search.score(X_test, y_test)
-  0.73...
+  0.84...
 
 .. note::
 
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
@@ -286,12 +286,17 @@ the regressor that will be used for prediction, and the transformer that will
 be applied to the target variable::
 
   >>> import numpy as np
-  >>> from sklearn.datasets import fetch_california_housing
+  >>> from sklearn.datasets import make_regression
   >>> from sklearn.compose import TransformedTargetRegressor
   >>> from sklearn.preprocessing import QuantileTransformer
   >>> from sklearn.linear_model import LinearRegression
   >>> from sklearn.model_selection import train_test_split
-  >>> X, y = fetch_california_housing(return_X_y=True)
+  >>> # create a synthetic dataset
+  >>> X, y = make_regression(n_samples=20640,
+  ...                        n_features=8,
+  ...                        noise=100.0,
+  ...                        random_state=0)
+  >>> y = np.exp( 1 + (y - y.min()) * (4 / (y.max() - y.min())))
   >>> X, y = X[:2000, :], y[:2000]  # select a subset of data
   >>> transformer = QuantileTransformer(output_distribution='normal')
   >>> regressor = LinearRegression()
@@ -300,11 +305,11 @@ be applied to the target variable::
   >>> X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
-  >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: 0.61
+  >>> print(f"R2 score: {regr.score(X_test, y_test):.2f}")
+  R2 score: 0.67
   >>> raw_target_regr = LinearRegression().fit(X_train, y_train)
-  >>> print('R2 score: {0:.2f}'.format(raw_target_regr.score(X_test, y_test)))
-  R2 score: 0.59
+  >>> print(f"R2 score: {raw_target_regr.score(X_test, y_test):.2f}")
+  R2 score: 0.64
 
 For simple transformations, instead of a Transformer object, a pair of
 functions can be passed, defining the transformation and its inverse mapping::
@@ -321,8 +326,8 @@ Subsequently, the object is created as::
   ...                                   inverse_func=inverse_func)
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
-  >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: 0.51
+  >>> print(f"R2 score: {regr.score(X_test, y_test):.2f}")
+  R2 score: 0.67
 
 By default, the provided functions are checked at each fit to be the inverse of
 each other. However, it is possible to bypass this checking by setting
@@ -336,8 +341,8 @@ each other. However, it is possible to bypass this checking by setting
   ...                                   check_inverse=False)
   >>> regr.fit(X_train, y_train)
   TransformedTargetRegressor(...)
-  >>> print('R2 score: {0:.2f}'.format(regr.score(X_test, y_test)))
-  R2 score: -1.57
+  >>> print(f"R2 score: {regr.score(X_test, y_test):.2f}")
+  R2 score: -3.02
 
 .. note::