diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py index 741a92767e953..a1f7a699b71c9 100644 --- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py +++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py @@ -110,7 +110,9 @@ ("passthrough_numeric", "passthrough", ["BonusMalus"]), ( "binned_numeric", - KBinsDiscretizer(n_bins=10, random_state=0), + KBinsDiscretizer( + n_bins=10, quantile_method="averaged_inverted_cdf", random_state=0 + ), ["VehAge", "DrivAge"], ), ("log_scaled_numeric", log_scale_transformer, ["Density"]), diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py index e479e78ba37b7..3acc2b5f1472f 100644 --- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py +++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py @@ -239,7 +239,9 @@ def score_estimator( [ ( "binned_numeric", - KBinsDiscretizer(n_bins=10, random_state=0), + KBinsDiscretizer( + n_bins=10, quantile_method="averaged_inverted_cdf", random_state=0 + ), ["VehAge", "DrivAge"], ), ( @@ -689,8 +691,7 @@ def lorenz_curve(y_true, y_pred, exposure): ax.set( title="Lorenz Curves", xlabel=( - "Cumulative proportion of exposure\n" - "(ordered by model from safest to riskiest)" + "Cumulative proportion of exposure\n(ordered by model from safest to riskiest)" ), ylabel="Cumulative proportion of claim amounts", ) diff --git a/examples/preprocessing/plot_discretization.py b/examples/preprocessing/plot_discretization.py index 0e64a3efd4465..833d456f5b5f6 100644 --- a/examples/preprocessing/plot_discretization.py +++ b/examples/preprocessing/plot_discretization.py @@ -44,7 +44,9 @@ X = X.reshape(-1, 1) # transform the dataset with KBinsDiscretizer -enc = KBinsDiscretizer(n_bins=10, encode="onehot") +enc = KBinsDiscretizer( + n_bins=10, encode="onehot", quantile_method="averaged_inverted_cdf" +) X_binned = enc.fit_transform(X) # predict with original dataset diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py index 1eeb9f169bf3b..9f1dccb6a0275 100644 --- a/examples/preprocessing/plot_discretization_classification.py +++ b/examples/preprocessing/plot_discretization_classification.py @@ -72,7 +72,9 @@ def get_name(estimator): ( make_pipeline( StandardScaler(), - KBinsDiscretizer(encode="onehot", random_state=0), + KBinsDiscretizer( + encode="onehot", quantile_method="averaged_inverted_cdf", random_state=0 + ), LogisticRegression(random_state=0), ), { @@ -83,7 +85,9 @@ def get_name(estimator): ( make_pipeline( StandardScaler(), - KBinsDiscretizer(encode="onehot", random_state=0), + KBinsDiscretizer( + encode="onehot", quantile_method="averaged_inverted_cdf", random_state=0 + ), LinearSVC(random_state=0), ), { diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py index d2a967e884eee..6a201b642d3c3 100644 --- a/examples/preprocessing/plot_discretization_strategies.py +++ b/examples/preprocessing/plot_discretization_strategies.py @@ -76,7 +76,12 @@ i += 1 # transform the dataset with KBinsDiscretizer for strategy in strategies: - enc = KBinsDiscretizer(n_bins=4, encode="ordinal", strategy=strategy) + enc = KBinsDiscretizer( + n_bins=4, + encode="ordinal", + quantile_method="averaged_inverted_cdf", + strategy=strategy, + ) enc.fit(X) grid_encoded = enc.transform(grid) diff --git a/examples/release_highlights/plot_release_highlights_1_2_0.py b/examples/release_highlights/plot_release_highlights_1_2_0.py index 4a501e8d8c1dc..e01372650b016 100644 --- a/examples/release_highlights/plot_release_highlights_1_2_0.py +++ b/examples/release_highlights/plot_release_highlights_1_2_0.py @@ -42,7 +42,11 @@ preprocessor = ColumnTransformer( [ ("scaler", StandardScaler(), sepal_cols), - ("kbin", KBinsDiscretizer(encode="ordinal"), petal_cols), + ( + "kbin", + KBinsDiscretizer(encode="ordinal", quantile_method="averaged_inverted_cdf"), + petal_cols, + ), ], verbose_feature_names_out=False, ).set_output(transform="pandas")