Skip to content

TST test_bagging_regressor/classifier_with_missing_inputs fails with SimpleImputer #11482

Closed
@qinhanmin2014

Description

@qinhanmin2014

See #11480
Git blame shows that we've introduced regression in #11211
ping the author @jeremiedbb and the reviewers @jnothman @glemaitre @ogrisel @jorisvandenbossche
Below are the logs from test_bagging_regressor_with_missing_inputs:

__________________ test_bagging_regressor_with_missing_inputs __________________
    def test_bagging_regressor_with_missing_inputs():
        # Check that BaggingRegressor can accept X with missing/infinite data
        X = np.array([
            [1, 3, 5],
            [2, None, 6],
            [2, np.nan, 6],
            [2, np.inf, 6],
            [2, np.NINF, 6],
        ])
        y_values = [
            np.array([2, 3, 3, 3, 3]),
            np.array([
                [2, 1, 9],
                [3, 6, 8],
                [3, 6, 8],
                [3, 6, 8],
                [3, 6, 8],
            ])
        ]
        for y in y_values:
            regressor = DecisionTreeRegressor()
            pipeline = make_pipeline(
                SimpleImputer(),
                SimpleImputer(missing_values=np.inf),
                SimpleImputer(missing_values=np.NINF),
                regressor
            )
>           pipeline.fit(X, y).predict(X)
X          = array([[1, 3, 5],
       [2, None, 6],
       [2, nan, 6],
       [2, inf, 6],
       [2, -inf, 6]], dtype=object)
pipeline   = Pipeline(memory=None,
     steps=[('simpleimputer-1', SimpleImputer(copy=True,...tion_leaf=0.0,
           presort=False, random_state=None, splitter='best'))])
regressor  = DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
    ...raction_leaf=0.0,
           presort=False, random_state=None, splitter='best')
y          = array([2, 3, 3, 3, 3])
y_values   = [array([2, 3, 3, 3, 3]), array([[2, 1, 9],
       [3, 6, 8],
       [3, 6, 8],
       [3, 6, 8],
       [3, 6, 8]])]
/home/travis/build/scikit-learn/scikit-learn/sklearn/ensemble/tests/test_bagging.py:785: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:253: in fit
    Xt, fit_params = self._fit(X, y, **fit_params)
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:218: in _fit
    **fit_params_steps[name])
/home/travis/build/scikit-learn/scikit-learn/sklearn/externals/_joblib/memory.py:362: in __call__
    return self.func(*args, **kwargs)
/home/travis/build/scikit-learn/scikit-learn/sklearn/pipeline.py:602: in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
/home/travis/build/scikit-learn/scikit-learn/sklearn/base.py:462: in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
/home/travis/build/scikit-learn/scikit-learn/sklearn/impute.py:209: in fit
    X = self._validate_input(X)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
self = SimpleImputer(copy=True, fill_value=None, missing_values=nan, strategy='mean',
       verbose=0)
X = array([[1, 3, 5],
       [2, None, 6],
       [2, nan, 6],
       [2, inf, 6],
       [2, -inf, 6]], dtype=object)
    def _validate_input(self, X):
        allowed_strategies = ["mean", "median", "most_frequent", "constant"]
        if self.strategy not in allowed_strategies:
            raise ValueError("Can only use these strategies: {0} "
                             " got strategy={1}".format(allowed_strategies,
                                                        self.strategy))
    
        if self.strategy in ("most_frequent", "constant"):
            dtype = None
        else:
            dtype = FLOAT_DTYPES
    
        if not is_scalar_nan(self.missing_values):
            force_all_finite = True
        else:
            force_all_finite = "allow-nan"
    
        try:
            X = check_array(X, accept_sparse='csc', dtype=dtype,
                            force_all_finite=force_all_finite, copy=self.copy)
        except ValueError as ve:
            if "could not convert" in str(ve):
                raise ValueError("Cannot use {0} strategy with non-numeric "
                                 "data. Received datatype :{1}."
                                 "".format(self.strategy, X.dtype.kind))
            else:
>               raise ve
E               ValueError: Input contains infinity or a value too large for dtype('float64').
X          = array([[1, 3, 5],
       [2, None, 6],
       [2, nan, 6],
       [2, inf, 6],
       [2, -inf, 6]], dtype=object)
allowed_strategies = ['mean', 'median', 'most_frequent', 'constant']
dtype      = (<type 'numpy.float64'>, <type 'numpy.float32'>, <type 'numpy.float16'>)
force_all_finite = 'allow-nan'
self       = SimpleImputer(copy=True, fill_value=None, missing_values=nan, strategy='mean',
       verbose=0)
ve         = ValueError("Input contains infinity or a value too large for dtype('float64').",)

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions