Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions sklearn/neural_network/_multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ..preprocessing import LabelBinarizer
from ..utils import gen_batches, check_random_state
from ..utils import shuffle
from ..utils import _safe_indexing
from ..utils import check_array, check_X_y, column_or_1d
from ..exceptions import ConvergenceWarning
from ..utils.extmath import safe_sparse_dot
Expand Down Expand Up @@ -503,6 +504,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads,
y_val = None

n_samples = X.shape[0]
sample_idx = np.arange(n_samples, dtype=int)

if self.batch_size == 'auto':
batch_size = min(200, n_samples)
Expand All @@ -512,12 +514,24 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads,
try:
for it in range(self.max_iter):
if self.shuffle:
X, y = shuffle(X, y, random_state=self._random_state)
# Only shuffle the sample indices instead of X and y to
# reduce the memory footprint. These indices will be used
# to slice the X and y.
sample_idx = shuffle(sample_idx,
random_state=self._random_state)

accumulated_loss = 0.0
for batch_slice in gen_batches(n_samples, batch_size):
activations[0] = X[batch_slice]
if self.shuffle:
X_batch = _safe_indexing(X, sample_idx[batch_slice])
y_batch = y[sample_idx[batch_slice]]
else:
X_batch = X[batch_slice]
y_batch = y[batch_slice]

activations[0] = X_batch
batch_loss, coef_grads, intercept_grads = self._backprop(
X[batch_slice], y[batch_slice], activations, deltas,
X_batch, y_batch, activations, deltas,
coef_grads, intercept_grads)
accumulated_loss += batch_loss * (batch_slice.stop -
batch_slice.start)
Expand Down Expand Up @@ -664,7 +678,7 @@ def _predict(self, X):
y_pred : ndarray of shape (n_samples,) or (n_samples, n_outputs)
The decision function of the samples for each class in the model.
"""
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
X = check_array(X, accept_sparse=['csr', 'csc'])

# Make sure self.hidden_layer_sizes is a list
hidden_layer_sizes = self.hidden_layer_sizes
Expand Down Expand Up @@ -928,7 +942,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu",
n_iter_no_change=n_iter_no_change, max_fun=max_fun)

def _validate_input(self, X, y, incremental):
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'],
multi_output=True)
if y.ndim == 2 and y.shape[1] == 1:
y = column_or_1d(y, warn=True)
Expand Down Expand Up @@ -1336,7 +1350,7 @@ def predict(self, X):
return y_pred

def _validate_input(self, X, y, incremental):
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'],
multi_output=True, y_numeric=True)
if y.ndim == 2 and y.shape[1] == 1:
y = column_or_1d(y, warn=True)
Expand Down