|
13 | 13 | # License: BSD Style.
|
14 | 14 |
|
15 | 15 | from abc import ABCMeta, abstractmethod
|
| 16 | +import numbers |
16 | 17 |
|
17 | 18 | import numpy as np
|
18 | 19 | import scipy.sparse as sp
|
@@ -67,27 +68,46 @@ def sparse_center_data(X, y, fit_intercept, normalize=False):
|
67 | 68 | return X_data, y, X_mean, y_mean, X_std
|
68 | 69 |
|
69 | 70 |
|
70 |
| -def center_data(X, y, fit_intercept, normalize=False, copy=True): |
| 71 | +def center_data(X, y, fit_intercept, normalize=False, copy=True, |
| 72 | + sample_weight=None): |
71 | 73 | """
|
72 | 74 | Centers data to have mean zero along axis 0. This is here because
|
73 | 75 | nearly all linear models will want their data to be centered.
|
| 76 | +
|
| 77 | + If sample_weight is not None, then the weighted mean of X and y |
| 78 | + is zero, and not the mean itself |
74 | 79 | """
|
75 | 80 | X = as_float_array(X, copy)
|
| 81 | + no_sample_weight = (sample_weight is None |
| 82 | + or isinstance(sample_weight, numbers.Number)) |
76 | 83 |
|
77 | 84 | if fit_intercept:
|
78 | 85 | if sp.issparse(X):
|
79 | 86 | X_mean = np.zeros(X.shape[1])
|
80 | 87 | X_std = np.ones(X.shape[1])
|
81 | 88 | else:
|
82 |
| - X_mean = X.mean(axis=0) |
| 89 | + if no_sample_weight: |
| 90 | + X_mean = X.mean(axis=0) |
| 91 | + else: |
| 92 | + X_mean = (np.sum(X * sample_weight[:, np.newaxis], axis=0) |
| 93 | + / np.sum(sample_weight)) |
83 | 94 | X -= X_mean
|
84 | 95 | if normalize:
|
85 | 96 | X_std = np.sqrt(np.sum(X ** 2, axis=0))
|
86 | 97 | X_std[X_std == 0] = 1
|
87 | 98 | X /= X_std
|
88 | 99 | else:
|
89 | 100 | X_std = np.ones(X.shape[1])
|
90 |
| - y_mean = y.mean(axis=0) |
| 101 | + if no_sample_weight: |
| 102 | + y_mean = y.mean(axis=0) |
| 103 | + else: |
| 104 | + if y.ndim <= 1: |
| 105 | + y_mean = (np.sum(y * sample_weight, axis=0) |
| 106 | + / np.sum(sample_weight)) |
| 107 | + else: |
| 108 | + # cater for multi-output problems |
| 109 | + y_mean = (np.sum(y * sample_weight[:, np.newaxis], axis=0) |
| 110 | + / np.sum(sample_weight)) |
91 | 111 | y = y - y_mean
|
92 | 112 | else:
|
93 | 113 | X_mean = np.zeros(X.shape[1])
|
|
0 commit comments