BUG: Ridge: sample_weights in intercept

GaelVaroquaux · amueller · commit 83a91519b50b · 2013-01-20T19:16:19.000+01:00
diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py
@@ -13,6 +13,7 @@
 # License: BSD Style.
 
 from abc import ABCMeta, abstractmethod
+import numbers
 
 import numpy as np
 import scipy.sparse as sp
@@ -67,27 +68,46 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
     return X_data, y, X_mean, y_mean, X_std
 
 
-def center_data(X, y, fit_intercept, normalize=False, copy=True):
+def center_data(X, y, fit_intercept, normalize=False, copy=True,
+                sample_weight=None):
     """
     Centers data to have mean zero along axis 0. This is here because
     nearly all linear models will want their data to be centered.
+
+    If sample_weight is not None, then the weighted mean of X and y
+    is zero, and not the mean itself
     """
     X = as_float_array(X, copy)
+    no_sample_weight = (sample_weight is None
+                        or isinstance(sample_weight, numbers.Number))
 
     if fit_intercept:
         if sp.issparse(X):
             X_mean = np.zeros(X.shape[1])
             X_std = np.ones(X.shape[1])
         else:
-            X_mean = X.mean(axis=0)
+            if no_sample_weight:
+                X_mean = X.mean(axis=0)
+            else:
+                X_mean = (np.sum(X * sample_weight[:, np.newaxis], axis=0)
+                            / np.sum(sample_weight))
             X -= X_mean
             if normalize:
                 X_std = np.sqrt(np.sum(X ** 2, axis=0))
                 X_std[X_std == 0] = 1
                 X /= X_std
             else:
                 X_std = np.ones(X.shape[1])
-        y_mean = y.mean(axis=0)
+        if no_sample_weight:
+            y_mean = y.mean(axis=0)
+        else:
+            if y.ndim <= 1:
+                y_mean = (np.sum(y * sample_weight, axis=0)
+                            / np.sum(sample_weight))
+            else:
+                # cater for multi-output problems
+                y_mean = (np.sum(y * sample_weight[:, np.newaxis], axis=0)
+                            / np.sum(sample_weight))
         y = y - y_mean
     else:
         X_mean = np.zeros(X.shape[1])
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
@@ -210,7 +210,8 @@ def fit(self, X, y, sample_weight=1.0, solver=None):
         y = np.asarray(y, dtype=np.float)
 
         X, y, X_mean, y_mean, X_std = self._center_data(
-            X, y, self.fit_intercept, self.normalize, self.copy_X)
+            X, y, self.fit_intercept, self.normalize, self.copy_X,
+            sample_weight=sample_weight)
 
         self.coef_ = ridge_regression(X, y,
                                       alpha=self.alpha,