Skip to content

Commit 83a9151

Browse files
GaelVaroquauxamueller
authored andcommitted
BUG: Ridge: sample_weights in intercept
1 parent cc22dc8 commit 83a9151

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

sklearn/linear_model/base.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# License: BSD Style.
1414

1515
from abc import ABCMeta, abstractmethod
16+
import numbers
1617

1718
import numpy as np
1819
import scipy.sparse as sp
@@ -67,27 +68,46 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
6768
return X_data, y, X_mean, y_mean, X_std
6869

6970

70-
def center_data(X, y, fit_intercept, normalize=False, copy=True):
71+
def center_data(X, y, fit_intercept, normalize=False, copy=True,
72+
sample_weight=None):
7173
"""
7274
Centers data to have mean zero along axis 0. This is here because
7375
nearly all linear models will want their data to be centered.
76+
77+
If sample_weight is not None, then the weighted mean of X and y
78+
is zero, and not the mean itself
7479
"""
7580
X = as_float_array(X, copy)
81+
no_sample_weight = (sample_weight is None
82+
or isinstance(sample_weight, numbers.Number))
7683

7784
if fit_intercept:
7885
if sp.issparse(X):
7986
X_mean = np.zeros(X.shape[1])
8087
X_std = np.ones(X.shape[1])
8188
else:
82-
X_mean = X.mean(axis=0)
89+
if no_sample_weight:
90+
X_mean = X.mean(axis=0)
91+
else:
92+
X_mean = (np.sum(X * sample_weight[:, np.newaxis], axis=0)
93+
/ np.sum(sample_weight))
8394
X -= X_mean
8495
if normalize:
8596
X_std = np.sqrt(np.sum(X ** 2, axis=0))
8697
X_std[X_std == 0] = 1
8798
X /= X_std
8899
else:
89100
X_std = np.ones(X.shape[1])
90-
y_mean = y.mean(axis=0)
101+
if no_sample_weight:
102+
y_mean = y.mean(axis=0)
103+
else:
104+
if y.ndim <= 1:
105+
y_mean = (np.sum(y * sample_weight, axis=0)
106+
/ np.sum(sample_weight))
107+
else:
108+
# cater for multi-output problems
109+
y_mean = (np.sum(y * sample_weight[:, np.newaxis], axis=0)
110+
/ np.sum(sample_weight))
91111
y = y - y_mean
92112
else:
93113
X_mean = np.zeros(X.shape[1])

sklearn/linear_model/ridge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,8 @@ def fit(self, X, y, sample_weight=1.0, solver=None):
210210
y = np.asarray(y, dtype=np.float)
211211

212212
X, y, X_mean, y_mean, X_std = self._center_data(
213-
X, y, self.fit_intercept, self.normalize, self.copy_X)
213+
X, y, self.fit_intercept, self.normalize, self.copy_X,
214+
sample_weight=sample_weight)
214215

215216
self.coef_ = ridge_regression(X, y,
216217
alpha=self.alpha,

0 commit comments

Comments
 (0)