Skip to content

API Rename force_all_finite into ensure_all_finite #29404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 25, 2024
13 changes: 13 additions & 0 deletions doc/whats_new/v1.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ Changelog
scoring="max_error" which is now deprecated.
:pr:`29462` by :user:`Farid "Freddie" Taba <artificialfintelligence>`.

- |API| the `assert_all_finite` parameter of functions
:func:`metrics.pairwise.check_pairwise_arrays` and :func:`metrics.pairwise_distances`
is renamed into `ensure_all_finite`. `force_all_finite` will be removed in 1.8.
:pr:`29404` by :user:`Jérémie du Boisberranger <jeremiedb>`.

:mod:`sklearn.model_selection`
..............................

Expand Down Expand Up @@ -272,6 +277,14 @@ Changelog
traversed.
:pr:`27966` by :user:`Adam Li <adam2392>`.

:mod:`sklearn.utils`
....................

- |API| the `assert_all_finite` parameter of functions :func:`utils.check_array`,
:func:`utils.check_X_y`, :func:`utils.as_float_array` is renamed into
`ensure_all_finite`. `force_all_finite` will be removed in 1.8.
:pr:`29404` by :user:`Jérémie du Boisberranger <jeremiedb>`.

.. rubric:: Code and documentation contributors

Thanks to everyone who has contributed to the maintenance and improvement of
Expand Down
4 changes: 2 additions & 2 deletions sklearn/cluster/_hdbscan/hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ def fit(self, X, y=None):
X = self._validate_data(
X,
accept_sparse=["csr", "lil"],
force_all_finite=False,
ensure_all_finite=False,
dtype=np.float64,
)
self._raw_data = X
Expand Down Expand Up @@ -782,7 +782,7 @@ def fit(self, X, y=None):
# Perform data validation after removing infinite values (numpy.inf)
# from the given distance matrix.
X = self._validate_data(
X, force_all_finite=False, dtype=np.float64, force_writeable=True
X, ensure_all_finite=False, dtype=np.float64, force_writeable=True
)
if np.isnan(X).any():
# TODO: Support np.nan in Cython implementation for precomputed
Expand Down
4 changes: 2 additions & 2 deletions sklearn/compose/_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ def _hstack(self, Xs, *, n_samples):
# in a sparse matrix, `check_array` is used for the
# dtype conversion if necessary.
converted_Xs = [
check_array(X, accept_sparse=True, force_all_finite=False)
check_array(X, accept_sparse=True, ensure_all_finite=False)
for X in Xs
]
except ValueError as e:
Expand Down Expand Up @@ -1325,7 +1325,7 @@ def _check_X(X):
"""Use check_array only when necessary, e.g. on lists and other non-array-likes."""
if hasattr(X, "__array__") or hasattr(X, "__dataframe__") or sparse.issparse(X):
return X
return check_array(X, force_all_finite="allow-nan", dtype=object)
return check_array(X, ensure_all_finite="allow-nan", dtype=object)


def _is_empty_column_selection(column):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/compose/_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def fit(self, X, y, **fit_params):
y,
input_name="y",
accept_sparse=False,
force_all_finite=True,
ensure_all_finite=True,
ensure_2d=False,
dtype="numeric",
allow_nd=True,
Expand Down
2 changes: 1 addition & 1 deletion sklearn/covariance/_empirical_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def empirical_covariance(X, *, assume_centered=False):
[0.25, 0.25, 0.25],
[0.25, 0.25, 0.25]])
"""
X = check_array(X, ensure_2d=False, force_all_finite=False)
X = check_array(X, ensure_2d=False, ensure_all_finite=False)

if X.ndim == 1:
X = np.reshape(X, (1, -1))
Expand Down
10 changes: 5 additions & 5 deletions sklearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def fit(self, X, y, *, sample_weight=None, **fit_params):
y,
accept_sparse=["csr", "csc"],
dtype=None,
force_all_finite=False,
ensure_all_finite=False,
multi_output=True,
)

Expand Down Expand Up @@ -941,7 +941,7 @@ def predict_proba(self, X):
X,
accept_sparse=["csr", "csc"],
dtype=None,
force_all_finite=False,
ensure_all_finite=False,
reset=False,
)

Expand Down Expand Up @@ -991,7 +991,7 @@ def predict_log_proba(self, X):
X,
accept_sparse=["csr", "csc"],
dtype=None,
force_all_finite=False,
ensure_all_finite=False,
reset=False,
)

Expand Down Expand Up @@ -1046,7 +1046,7 @@ def decision_function(self, X):
X,
accept_sparse=["csr", "csc"],
dtype=None,
force_all_finite=False,
ensure_all_finite=False,
reset=False,
)

Expand Down Expand Up @@ -1279,7 +1279,7 @@ def predict(self, X):
X,
accept_sparse=["csr", "csc"],
dtype=None,
force_all_finite=False,
ensure_all_finite=False,
reset=False,
)

Expand Down
8 changes: 4 additions & 4 deletions sklearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def fit(self, X, y, sample_weight=None):
multi_output=True,
accept_sparse="csc",
dtype=DTYPE,
force_all_finite=False,
ensure_all_finite=False,
)
# _compute_missing_values_in_feature_mask checks if X has missing values and
# will raise an error if the underlying tree base estimator can't handle missing
Expand Down Expand Up @@ -630,16 +630,16 @@ def _validate_X_predict(self, X):
Validate X whenever one tries to predict, apply, predict_proba."""
check_is_fitted(self)
if self.estimators_[0]._support_missing_values(X):
force_all_finite = "allow-nan"
ensure_all_finite = "allow-nan"
else:
force_all_finite = True
ensure_all_finite = True

X = self._validate_data(
X,
dtype=DTYPE,
accept_sparse="csr",
reset=False,
force_all_finite=force_all_finite,
ensure_all_finite=ensure_all_finite,
)
if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):
raise ValueError("No support for np.int64 index based sparse matrices")
Expand Down
2 changes: 1 addition & 1 deletion sklearn/ensemble/_gb.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
dtype=DTYPE,
order="C",
accept_sparse="csr",
force_all_finite=False,
ensure_all_finite=False,
)
raw_predictions = self._raw_predict(X_train)
self._resize_state()
Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/_hist_gradient_boosting/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def fit(self, X, y=None):
)
)

X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)
X = check_array(X, dtype=[X_DTYPE], ensure_all_finite=False)
max_bins = self.n_bins - 1

rng = check_random_state(self.random_state)
Expand Down Expand Up @@ -275,7 +275,7 @@ def transform(self, X):
X_binned : array-like of shape (n_samples, n_features)
The binned data (fortran-aligned).
"""
X = check_array(X, dtype=[X_DTYPE], force_all_finite=False)
X = check_array(X, dtype=[X_DTYPE], ensure_all_finite=False)
check_is_fitted(self)
if X.shape[1] != self.n_bins_non_missing_.shape[0]:
raise ValueError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def _preprocess_X(self, X, *, reset):
"""
# If there is a preprocessor, we let the preprocessor handle the validation.
# Otherwise, we validate the data ourselves.
check_X_kwargs = dict(dtype=[X_DTYPE], force_all_finite=False)
check_X_kwargs = dict(dtype=[X_DTYPE], ensure_all_finite=False)
if not reset:
if self._preprocessor is None:
return self._validate_data(X, reset=False, **check_X_kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from sklearn.model_selection import train_test_split


# TODO(1.8) remove the filterwarnings decorator
@pytest.mark.filterwarnings(
"ignore:'force_all_finite' was renamed to 'ensure_all_finite':FutureWarning"
)
Comment on lines +16 to +18
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this coming from lightgbm?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, technically from scikit-learn as a dependency of lightgbm.

@pytest.mark.parametrize("seed", range(5))
@pytest.mark.parametrize(
"loss",
Expand Down Expand Up @@ -118,6 +122,10 @@ def test_same_predictions_regression(
assert np.mean(np.isclose(pred_lightgbm, pred_sklearn, rtol=1e-4)) > 1 - 0.01


# TODO(1.8) remove the filterwarnings decorator
@pytest.mark.filterwarnings(
"ignore:'force_all_finite' was renamed to 'ensure_all_finite':FutureWarning"
)
@pytest.mark.parametrize("seed", range(5))
@pytest.mark.parametrize("min_samples_leaf", (1, 20))
@pytest.mark.parametrize(
Expand Down Expand Up @@ -191,6 +199,10 @@ def test_same_predictions_classification(
np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)


# TODO(1.8) remove the filterwarnings decorator
@pytest.mark.filterwarnings(
"ignore:'force_all_finite' was renamed to 'ensure_all_finite':FutureWarning"
)
@pytest.mark.parametrize("seed", range(5))
@pytest.mark.parametrize("min_samples_leaf", (1, 20))
@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions sklearn/ensemble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def fit(self, X, y=None, sample_weight=None):
Fitted estimator.
"""
X = self._validate_data(
X, accept_sparse=["csc"], dtype=tree_dtype, force_all_finite=False
X, accept_sparse=["csc"], dtype=tree_dtype, ensure_all_finite=False
)
if issparse(X):
# Pre-sort indices to avoid that each individual tree of the
Expand Down Expand Up @@ -522,7 +522,7 @@ def score_samples(self, X):
accept_sparse="csr",
dtype=tree_dtype,
reset=False,
force_all_finite=False,
ensure_all_finite=False,
)

return self._score_samples(X)
Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def transform(self, X):
X,
dtype=None,
accept_sparse="csr",
force_all_finite=not _safe_tags(self, key="allow_nan"),
ensure_all_finite=not _safe_tags(self, key="allow_nan"),
cast_to_ndarray=not preserve_X,
reset=False,
)
Expand Down
4 changes: 2 additions & 2 deletions sklearn/feature_selection/_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def _fit(self, X, y, step_score=None, **fit_params):
y,
accept_sparse="csc",
ensure_min_features=2,
force_all_finite=False,
ensure_all_finite=False,
multi_output=True,
)

Expand Down Expand Up @@ -725,7 +725,7 @@ def fit(self, X, y, groups=None):
y,
accept_sparse="csr",
ensure_min_features=2,
force_all_finite=False,
ensure_all_finite=False,
multi_output=True,
)

Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/_sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def fit(self, X, y=None):
X,
accept_sparse="csc",
ensure_min_features=2,
force_all_finite=not tags.get("allow_nan", True),
ensure_all_finite=not tags.get("allow_nan", True),
)
n_features = X.shape[1]

Expand Down
2 changes: 1 addition & 1 deletion sklearn/feature_selection/_variance_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def fit(self, X, y=None):
X,
accept_sparse=("csr", "csc"),
dtype=np.float64,
force_all_finite="allow-nan",
ensure_all_finite="allow-nan",
)

if hasattr(X, "toarray"): # sparse matrix
Expand Down
12 changes: 6 additions & 6 deletions sklearn/impute/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def _validate_input(self, X, in_fit):
dtype = self._fit_dtype

if is_pandas_na(self.missing_values) or is_scalar_nan(self.missing_values):
force_all_finite = "allow-nan"
ensure_all_finite = "allow-nan"
else:
force_all_finite = True
ensure_all_finite = True

try:
X = self._validate_data(
Expand All @@ -334,7 +334,7 @@ def _validate_input(self, X, in_fit):
accept_sparse="csc",
dtype=dtype,
force_writeable=True if not in_fit else None,
force_all_finite=force_all_finite,
ensure_all_finite=ensure_all_finite,
copy=self.copy,
)
except ValueError as ve:
Expand Down Expand Up @@ -893,15 +893,15 @@ def _get_missing_features_info(self, X):

def _validate_input(self, X, in_fit):
if not is_scalar_nan(self.missing_values):
force_all_finite = True
ensure_all_finite = True
else:
force_all_finite = "allow-nan"
ensure_all_finite = "allow-nan"
X = self._validate_data(
X,
reset=in_fit,
accept_sparse=("csc", "csr"),
dtype=None,
force_all_finite=force_all_finite,
ensure_all_finite=ensure_all_finite,
)
_check_inputs_dtype(X, self.missing_values)
if X.dtype.kind not in ("i", "u", "f", "O"):
Expand Down
8 changes: 4 additions & 4 deletions sklearn/impute/_iterative.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,16 +614,16 @@ def _initial_imputation(self, X, in_fit=False):
number of features.
"""
if is_scalar_nan(self.missing_values):
force_all_finite = "allow-nan"
ensure_all_finite = "allow-nan"
else:
force_all_finite = True
ensure_all_finite = True

X = self._validate_data(
X,
dtype=FLOAT_DTYPES,
order="F",
reset=in_fit,
force_all_finite=force_all_finite,
ensure_all_finite=ensure_all_finite,
)
_check_inputs_dtype(X, self.missing_values)

Expand Down Expand Up @@ -680,7 +680,7 @@ def _validate_limit(limit, limit_type, n_features):
limit = limit_bound if limit is None else limit
if np.isscalar(limit):
limit = np.full(n_features, limit)
limit = check_array(limit, force_all_finite=False, copy=False, ensure_2d=False)
limit = check_array(limit, ensure_all_finite=False, copy=False, ensure_2d=False)
if not limit.shape[0] == n_features:
raise ValueError(
f"'{limit_type}_value' should be of "
Expand Down
Loading