Skip to content

Commit 3468e00

Browse files
committed
Merge pull request #4057 from amueller/dtype_object_conversion
[MRG + 2] make check_array convert object to float.
2 parents 0cf9314 + 96d4b3e commit 3468e00

File tree

15 files changed

+87
-36
lines changed

15 files changed

+87
-36
lines changed

doc/whats_new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,10 @@ API changes summary
382382
- `thresh` parameter is deprecated in favor of new `tol` parameter in
383383
:class:`GMM`. See `Enhancements` section for details. By `Hervé Bredin`_.
384384

385+
- Estimators will treat input with dtype object as numeric when possible.
386+
By `Andreas Müller`_
387+
388+
385389

386390
.. _changes_0_15_2:
387391

sklearn/ensemble/gradient_boosting.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ class in the training data.
126126
"""
127127
def fit(self, X, y, sample_weight=None):
128128
if sample_weight is None:
129-
sample_weight = np.ones_like(y, dtype=np.float)
129+
sample_weight = np.ones_like(y, dtype=np.float64)
130130
class_counts = bincount(y, weights=sample_weight)
131131
self.priors = class_counts / class_counts.sum()
132132

@@ -1146,7 +1146,8 @@ def feature_importances_(self):
11461146

11471147
def _validate_y(self, y):
11481148
self.n_classes_ = 1
1149-
1149+
if y.dtype.kind == 'O':
1150+
y = y.astype(np.float64)
11501151
# Default implementation
11511152
return y
11521153

sklearn/gaussian_process/gaussian_process.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
from ..base import BaseEstimator, RegressorMixin
1313
from ..metrics.pairwise import manhattan_distances
14-
from ..utils import check_random_state, check_array, check_consistent_length
15-
from ..utils.validation import check_is_fitted
14+
from ..utils import check_random_state, check_array, check_X_y
15+
from ..utils.validation import check_is_fitted
1616
from . import regression_models as regression
1717
from . import correlation_models as correlation
1818

@@ -264,12 +264,10 @@ def fit(self, X, y):
264264
self.random_state = check_random_state(self.random_state)
265265

266266
# Force data to 2D numpy.array
267-
X = check_array(X)
268-
y = np.asarray(y)
267+
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
269268
self.y_ndim_ = y.ndim
270269
if y.ndim == 1:
271270
y = y[:, np.newaxis]
272-
check_consistent_length(X, y)
273271

274272
# Check shapes of DOE & observations
275273
n_samples, n_features = X.shape
@@ -883,7 +881,7 @@ def _check_params(self, n_samples=None):
883881
"or array of length n_samples.")
884882

885883
# Check optimizer
886-
if not self.optimizer in self._optimizer_types:
884+
if self.optimizer not in self._optimizer_types:
887885
raise ValueError("optimizer should be one of %s"
888886
% self._optimizer_types)
889887

sklearn/linear_model/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from ..externals import six
2626
from ..externals.joblib import Parallel, delayed
2727
from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
28-
from ..utils import as_float_array, check_array
28+
from ..utils import as_float_array, check_array, check_X_y
2929
from ..utils.extmath import safe_sparse_dot
3030
from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
3131
from ..utils.fixes import sparse_lsqr
@@ -372,8 +372,8 @@ def fit(self, X, y, n_jobs=1):
372372
n_jobs_ = n_jobs
373373
else:
374374
n_jobs_ = self.n_jobs
375-
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
376-
y = np.asarray(y)
375+
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
376+
y_numeric=True, multi_output=True)
377377

378378
X, y, X_mean, y_mean, X_std = self._center_data(
379379
X, y, self.fit_intercept, self.normalize, self.copy_X)

sklearn/linear_model/bayes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def fit(self, X, y):
132132
-------
133133
self : returns an instance of self.
134134
"""
135-
X, y = check_X_y(X, y, dtype=np.float)
135+
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
136136
X, y, X_mean, y_mean, X_std = self._center_data(
137137
X, y, self.fit_intercept, self.normalize, self.copy_X)
138138
n_samples, n_features = X.shape
@@ -342,7 +342,7 @@ def fit(self, X, y):
342342
-------
343343
self : returns an instance of self.
344344
"""
345-
X, y = check_X_y(X, y, dtype=np.float)
345+
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
346346

347347
n_samples, n_features = X.shape
348348
coef_ = np.zeros(n_features)

sklearn/linear_model/coordinate_descent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ def fit(self, X, y):
627627

628628
X, y = check_X_y(X, y, accept_sparse='csc', dtype=np.float64,
629629
order='F', copy=self.copy_X and self.fit_intercept,
630-
multi_output=True)
630+
multi_output=True, y_numeric=True)
631631

632632
X, y, X_mean, y_mean, X_std, precompute, Xy = \
633633
_pre_fit(X, y, None, self.precompute, self.normalize,

sklearn/linear_model/least_angle.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
from .base import LinearModel
2323
from ..base import RegressorMixin
24-
from ..utils import arrayfuncs, as_float_array, check_array, check_X_y
24+
from ..utils import arrayfuncs, as_float_array, check_X_y
2525
from ..cross_validation import _check_cv as check_cv
2626
from ..utils import ConvergenceWarning
2727
from ..externals.joblib import Parallel, delayed
@@ -422,7 +422,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500,
422422
for ii in idx:
423423
for i in range(ii, n_active):
424424
indices[i], indices[i + 1] = indices[i + 1], indices[i]
425-
Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i+1])
425+
Gram[i], Gram[i + 1] = swap(Gram[i], Gram[i + 1])
426426
Gram[:, i], Gram[:, i + 1] = swap(Gram[:, i],
427427
Gram[:, i + 1])
428428

@@ -589,8 +589,7 @@ def fit(self, X, y, Xy=None):
589589
self : object
590590
returns an instance of self.
591591
"""
592-
X = check_array(X)
593-
y = np.asarray(y)
592+
X, y = check_X_y(X, y, y_numeric=True, multi_output=True)
594593
n_features = X.shape[1]
595594

596595
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
@@ -1268,8 +1267,7 @@ def fit(self, X, y, copy_X=True):
12681267
returns an instance of self.
12691268
"""
12701269
self.fit_path = True
1271-
X = check_array(X)
1272-
y = np.asarray(y)
1270+
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
12731271

12741272
X, y, Xmean, ymean, Xstd = LinearModel._center_data(
12751273
X, y, self.fit_intercept, self.normalize, self.copy_X)

sklearn/linear_model/logistic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
529529
"dual=False, got dual=%s" % dual)
530530
# Preprocessing.
531531
X = check_array(X, accept_sparse='csr', dtype=np.float64)
532-
y = check_array(y, ensure_2d=False, copy=copy)
532+
y = check_array(y, ensure_2d=False, copy=copy, dtype=None)
533533
_, n_features = X.shape
534534
check_consistent_length(X, y)
535535
classes = np.unique(y)
@@ -1318,7 +1318,7 @@ def fit(self, X, y):
13181318
"the primal form.")
13191319

13201320
X = check_array(X, accept_sparse='csr', dtype=np.float64)
1321-
y = check_array(y, ensure_2d=False)
1321+
y = check_array(y, ensure_2d=False, dtype=None)
13221322

13231323
if self.multi_class not in ['ovr', 'multinomial']:
13241324
raise ValueError("multi_class backend should be either "

sklearn/linear_model/omp.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,8 +609,7 @@ def fit(self, X, y):
609609
self : object
610610
returns an instance of self.
611611
"""
612-
X = check_array(X)
613-
y = np.asarray(y)
612+
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
614613
n_features = X.shape[1]
615614

616615
X, y, X_mean, y_mean, X_std, Gram, Xy = \
@@ -805,7 +804,7 @@ def fit(self, X, y):
805804
self : object
806805
returns an instance of self.
807806
"""
808-
X, y = check_X_y(X, y)
807+
X, y = check_X_y(X, y, y_numeric=True)
809808
X = as_float_array(X, copy=False, force_all_finite=False)
810809
cv = check_cv(self.cv, X, y, classifier=False)
811810
max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])

sklearn/linear_model/randomized_l1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def fit(self, X, y):
8888
self : object
8989
Returns an instance of self.
9090
"""
91-
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
91+
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True)
9292
X = as_float_array(X, copy=False)
9393
n_samples, n_features = X.shape
9494

0 commit comments

Comments
 (0)