Closed
Description
See #11480
Git blame shows that we've introduced regression in #11211
ping the author @jeremiedbb and the reviewers @jnothman @glemaitre @ogrisel @jorisvandenbossche
Below are the logs from test_bagging_regressor_with_missing_inputs:
__________________ test_bagging_regressor_with_missing_inputs __________________
def test_bagging_regressor_with_missing_inputs():
# Check that BaggingRegressor can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y_values = [
np.array([2, 3, 3, 3, 3]),
np.array([
[2, 1, 9],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8],
])
]
for y in y_values:
regressor = DecisionTreeRegressor()
pipeline = make_pipeline(
SimpleImputer(),
SimpleImputer(missing_values=np.inf),
SimpleImputer(missing_values=np.NINF),
regressor
)
> pipeline.fit(X, y).predict(X)
X = array([[1, 3, 5],
[2, None, 6],
[2, nan, 6],
[2, inf, 6],
[2, -inf, 6]], dtype=object)
pipeline = Pipeline(memory=None,
steps=[('simpleimputer-1', SimpleImputer(copy=True,...tion_leaf=0.0,
presort=False, random_state=None, splitter='best'))])
regressor = DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
...raction_leaf=0.0,
presort=False, random_state=None, splitter='best')
y = array([2, 3, 3, 3, 3])
y_values = [array([2, 3, 3, 3, 3]), array([[2, 1, 9],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8]])]
/home/travis/build/scikit-learn/scikit-learn/sklearn/ensemble/tests/test_bagging.py:785:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:253: in fit
Xt, fit_params = self._fit(X, y, **fit_params)
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:218: in _fit
**fit_params_steps[name])
/home/travis/build/scikit-learn/scikit-learn/sklearn/externals/_joblib/memory.py:362: in __call__
return self.func(*args, **kwargs)
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:602: in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
/home/travis/build/scikit-learn/scikit-learn/sklearn/base.py:462: in fit_transform
return self.fit(X, y, **fit_params).transform(X)
/home/travis/build/scikit-learn/scikit-learn/sklearn/impute.py:209: in fit
X = self._validate_input(X)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = SimpleImputer(copy=True, fill_value=None, missing_values=nan, strategy='mean',
verbose=0)
X = array([[1, 3, 5],
[2, None, 6],
[2, nan, 6],
[2, inf, 6],
[2, -inf, 6]], dtype=object)
def _validate_input(self, X):
allowed_strategies = ["mean", "median", "most_frequent", "constant"]
if self.strategy not in allowed_strategies:
raise ValueError("Can only use these strategies: {0} "
" got strategy={1}".format(allowed_strategies,
self.strategy))
if self.strategy in ("most_frequent", "constant"):
dtype = None
else:
dtype = FLOAT_DTYPES
if not is_scalar_nan(self.missing_values):
force_all_finite = True
else:
force_all_finite = "allow-nan"
try:
X = check_array(X, accept_sparse='csc', dtype=dtype,
force_all_finite=force_all_finite, copy=self.copy)
except ValueError as ve:
if "could not convert" in str(ve):
raise ValueError("Cannot use {0} strategy with non-numeric "
"data. Received datatype :{1}."
"".format(self.strategy, X.dtype.kind))
else:
> raise ve
E ValueError: Input contains infinity or a value too large for dtype('float64').
X = array([[1, 3, 5],
[2, None, 6],
[2, nan, 6],
[2, inf, 6],
[2, -inf, 6]], dtype=object)
allowed_strategies = ['mean', 'median', 'most_frequent', 'constant']
dtype = (<type 'numpy.float64'>, <type 'numpy.float32'>, <type 'numpy.float16'>)
force_all_finite = 'allow-nan'
self = SimpleImputer(copy=True, fill_value=None, missing_values=nan, strategy='mean',
verbose=0)
ve = ValueError("Input contains infinity or a value too large for dtype('float64').",)