Skip to content

Commit a2eb04d

Browse files
TST Adjust learning rate to compare with LightGBM 3.X (#21082)
Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com>
1 parent 9b90126 commit a2eb04d

File tree

5 files changed

+26
-12
lines changed

5 files changed

+26
-12
lines changed

benchmarks/bench_hist_gradient_boosting.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ def one_run(n_samples):
140140
lightgbm_score_duration = None
141141
if args.lightgbm:
142142
print("Fitting a LightGBM model...")
143-
lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
143+
lightgbm_est = get_equivalent_estimator(
144+
est, lib="lightgbm", n_classes=args.n_classes
145+
)
144146

145147
tic = time()
146148
lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)

benchmarks/bench_hist_gradient_boosting_threading.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ def get_estimator_and_data():
135135
for libname in ["lightgbm", "xgboost", "catboost"]:
136136
if getattr(args, libname):
137137
print(libname)
138-
est = get_equivalent_estimator(sklearn_est, lib=libname)
138+
est = get_equivalent_estimator(
139+
sklearn_est, lib=libname, n_classes=args.n_classes
140+
)
139141
pprint(est.get_params())
140142

141143

@@ -169,7 +171,9 @@ def one_run(n_threads, n_samples):
169171
lightgbm_score_duration = None
170172
if args.lightgbm:
171173
print("Fitting a LightGBM model...")
172-
lightgbm_est = get_equivalent_estimator(est, lib="lightgbm")
174+
lightgbm_est = get_equivalent_estimator(
175+
est, lib="lightgbm", n_classes=args.n_classes
176+
)
173177
lightgbm_est.set_params(num_threads=n_threads)
174178

175179
tic = time()

build_tools/azure/install.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,8 @@ elif [[ "$DISTRIB" == "conda-pip-latest" ]]; then
113113
python -m pip install --only-binary :all: scikit-image || true
114114

115115
python -m pip install pandas matplotlib pyamg
116-
# do not install dependencies for lightgbm since it requires scikit-learn
117-
# and install a version less than 3.0.0 until the issue #18316 is solved.
118-
python -m pip install "lightgbm<3.0.0" --no-deps
116+
# do not install dependencies for lightgbm since it requires scikit-learn.
117+
python -m pip install "lightgbm>=3.0.0" --no-deps
119118
elif [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
120119
# FIXME: temporary fix to link against system libraries on linux
121120
export LDFLAGS="$LDFLAGS -Wl,--sysroot=/"

sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,12 @@ def test_same_predictions_classification(
104104

105105
rng = np.random.RandomState(seed=seed)
106106
max_iter = 1
107+
n_classes = 2
107108
max_bins = 255
108109

109110
X, y = make_classification(
110111
n_samples=n_samples,
111-
n_classes=2,
112+
n_classes=n_classes,
112113
n_features=5,
113114
n_informative=5,
114115
n_redundant=0,
@@ -174,13 +175,14 @@ def test_same_predictions_multiclass_classification(
174175
pytest.importorskip("lightgbm")
175176

176177
rng = np.random.RandomState(seed=seed)
178+
n_classes = 3
177179
max_iter = 1
178180
max_bins = 255
179181
lr = 1
180182

181183
X, y = make_classification(
182184
n_samples=n_samples,
183-
n_classes=3,
185+
n_classes=n_classes,
184186
n_features=5,
185187
n_informative=5,
186188
n_redundant=0,
@@ -204,7 +206,9 @@ def test_same_predictions_multiclass_classification(
204206
min_samples_leaf=min_samples_leaf,
205207
max_leaf_nodes=max_leaf_nodes,
206208
)
207-
est_lightgbm = get_equivalent_estimator(est_sklearn, lib="lightgbm")
209+
est_lightgbm = get_equivalent_estimator(
210+
est_sklearn, lib="lightgbm", n_classes=n_classes
211+
)
208212

209213
est_lightgbm.fit(X_train, y_train)
210214
est_sklearn.fit(X_train, y_train)
@@ -224,7 +228,8 @@ def test_same_predictions_multiclass_classification(
224228

225229
acc_lightgbm = accuracy_score(y_train, pred_lightgbm)
226230
acc_sklearn = accuracy_score(y_train, pred_sklearn)
227-
np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)
231+
232+
np.testing.assert_allclose(acc_lightgbm, acc_sklearn, rtol=0, atol=5e-2)
228233

229234
if max_leaf_nodes < 10 and n_samples >= 1000:
230235

sklearn/ensemble/_hist_gradient_boosting/utils.pyx

+6-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ from .common cimport G_H_DTYPE_C
1313
from .common cimport Y_DTYPE_C
1414

1515

16-
def get_equivalent_estimator(estimator, lib='lightgbm'):
16+
def get_equivalent_estimator(estimator, lib='lightgbm', n_classes=None):
1717
"""Return an unfitted estimator from another lib with matching hyperparams.
1818
1919
This utility function takes care of renaming the sklearn parameters into
@@ -70,7 +70,11 @@ def get_equivalent_estimator(estimator, lib='lightgbm'):
7070
if sklearn_params['loss'] == 'categorical_crossentropy':
7171
# LightGBM multiplies hessians by 2 in multiclass loss.
7272
lightgbm_params['min_sum_hessian_in_leaf'] *= 2
73-
lightgbm_params['learning_rate'] *= 2
73+
# LightGBM 3.0 introduced a different scaling of the hessian for the multiclass case.
74+
# It is equivalent of scaling the learning rate.
75+
# See https://github.com/microsoft/LightGBM/pull/3256.
76+
if n_classes is not None:
77+
lightgbm_params['learning_rate'] *= n_classes / (n_classes - 1)
7478

7579
# XGB
7680
xgboost_loss_mapping = {

0 commit comments

Comments
 (0)