From fa2adcf61576ed0a86c829f449f951a75db3f086 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Wed, 12 Jun 2019 16:47:04 +0200 Subject: [PATCH 01/13] Reduce memory footprint when using stochastic optimizers with shuffle --- sklearn/neural_network/multilayer_perceptron.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 8a5469df54897..aabfcbaa95764 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -498,6 +498,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, y_val = None n_samples = X.shape[0] + idx = np.arange(n_samples, dtype=int) if self.batch_size == 'auto': batch_size = min(200, n_samples) @@ -507,12 +508,12 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, try: for it in range(self.max_iter): if self.shuffle: - X, y = shuffle(X, y, random_state=self._random_state) + idx = shuffle(idx, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): - activations[0] = X[batch_slice] + activations[0] = X[idx[batch_slice]] batch_loss, coef_grads, intercept_grads = self._backprop( - X[batch_slice], y[batch_slice], activations, deltas, + X[idx[batch_slice]], y[idx[batch_slice]], activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) From a575c726981bc5f783eead8f6470e7dc8a7e8a2f Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Wed, 12 Jun 2019 17:02:58 +0200 Subject: [PATCH 02/13] Reduce the line length for flake8 --- sklearn/neural_network/multilayer_perceptron.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index aabfcbaa95764..0d3cb2e4f4db5 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -513,7 +513,8 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[idx[batch_slice]] batch_loss, coef_grads, intercept_grads = self._backprop( - X[idx[batch_slice]], y[idx[batch_slice]], activations, deltas, + X[idx[batch_slice]], y[idx[batch_slice]], + activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) From e66cfa66325d89900b8457bb84383fe1f358c426 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Mon, 17 Jun 2019 15:09:59 +0200 Subject: [PATCH 03/13] Apply patch for fixed unit tests. Drop scipy.sparse.coo_matrix support --- sklearn/neural_network/multilayer_perceptron.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 0d3cb2e4f4db5..349117ae46029 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -21,6 +21,7 @@ from ..utils import gen_batches, check_random_state from ..utils import shuffle from ..utils import check_array, check_X_y, column_or_1d +from ..utils import safe_indexing from ..exceptions import ConvergenceWarning from ..utils.extmath import safe_sparse_dot from ..utils.validation import check_is_fitted @@ -511,9 +512,10 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, idx = shuffle(idx, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): - activations[0] = X[idx[batch_slice]] + activations[0] = safe_indexing(X, idx[batch_slice]) batch_loss, coef_grads, intercept_grads = self._backprop( - X[idx[batch_slice]], y[idx[batch_slice]], + safe_indexing(X, idx[batch_slice]), + y[idx[batch_slice]], activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - @@ -661,7 +663,7 @@ def _predict(self, X): y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs) The decision function of the samples for each class in the model. """ - X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) + X = check_array(X, accept_sparse=['csr', 'csc']) # Make sure self.hidden_layer_sizes is a list hidden_layer_sizes = self.hidden_layer_sizes @@ -917,7 +919,7 @@ def __init__(self, hidden_layer_sizes=(100,), activation="relu", n_iter_no_change=n_iter_no_change) def _validate_input(self, X, y, incremental): - X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], + X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], multi_output=True) if y.ndim == 2 and y.shape[1] == 1: y = column_or_1d(y, warn=True) @@ -1317,7 +1319,7 @@ def predict(self, X): return y_pred def _validate_input(self, X, y, incremental): - X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], + X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], multi_output=True, y_numeric=True) if y.ndim == 2 and y.shape[1] == 1: y = column_or_1d(y, warn=True) From 85bbab5bb246b52fe90ff6a00df707fe87a7f76d Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Thu, 20 Jun 2019 21:55:19 +0200 Subject: [PATCH 04/13] Do not use safe_indexing due to low speed. Handle Pandas DataFrame inside of check_X_y instead. --- sklearn/neural_network/multilayer_perceptron.py | 14 +++++++++++--- sklearn/utils/validation.py | 4 ++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 349117ae46029..8269b84cfebe6 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -512,10 +512,18 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, idx = shuffle(idx, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): - activations[0] = safe_indexing(X, idx[batch_slice]) + # only use integer indexing when it is needed, otherwise use fast-path + if self.shuffle: + X_batch = X[idx[batch_slice]] + y_batch = y[idx[batch_size]] + else: + X_batch = X[batch_slice] + y_batch = y[batch_slice] + + activations[0] = X_batch batch_loss, coef_grads, intercept_grads = self._backprop( - safe_indexing(X, idx[batch_slice]), - y[idx[batch_slice]], + X_batch, + y_batch, activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 32cad0197317b..84c765d82f77a 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -449,6 +449,10 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, if hasattr(array, "dtypes") and hasattr(array.dtypes, '__array__'): dtypes_orig = np.array(array.dtypes) + # extract the underlying numpy ndarray from a Pandas DataFrame + if hasattr(array, "iloc"): + array = array.values + if dtype_numeric: if dtype_orig is not None and dtype_orig.kind == "O": # if input is object, convert to float. From 0400df21aa695afe35592c0dc5635d3d73e08d96 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Thu, 20 Jun 2019 21:58:49 +0200 Subject: [PATCH 05/13] Shorter comment for flake8 --- sklearn/neural_network/multilayer_perceptron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 8269b84cfebe6..392048f3451a1 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -512,7 +512,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, idx = shuffle(idx, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): - # only use integer indexing when it is needed, otherwise use fast-path + # only use integer indexing when it is needed if self.shuffle: X_batch = X[idx[batch_slice]] y_batch = y[idx[batch_size]] From d53194e0c5e06efd83e6f406988cde30d59d38ee Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Thu, 20 Jun 2019 22:03:10 +0200 Subject: [PATCH 06/13] Do not import unused function safe_indexing --- sklearn/neural_network/multilayer_perceptron.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 392048f3451a1..69b27ebdb4570 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -21,7 +21,6 @@ from ..utils import gen_batches, check_random_state from ..utils import shuffle from ..utils import check_array, check_X_y, column_or_1d -from ..utils import safe_indexing from ..exceptions import ConvergenceWarning from ..utils.extmath import safe_sparse_dot from ..utils.validation import check_is_fitted From df615a712dff464557e589cd7b4ef5fe7de1b337 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Sat, 22 Jun 2019 12:55:42 +0200 Subject: [PATCH 07/13] Use safe_indexing without take (should fix tests) --- sklearn/neural_network/multilayer_perceptron.py | 9 ++++----- sklearn/utils/__init__.py | 8 ++------ 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 69b27ebdb4570..17c718c47c1a9 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -20,6 +20,7 @@ from ..preprocessing import LabelBinarizer from ..utils import gen_batches, check_random_state from ..utils import shuffle +from ..utils import safe_indexing from ..utils import check_array, check_X_y, column_or_1d from ..exceptions import ConvergenceWarning from ..utils.extmath import safe_sparse_dot @@ -513,17 +514,15 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, for batch_slice in gen_batches(n_samples, batch_size): # only use integer indexing when it is needed if self.shuffle: - X_batch = X[idx[batch_slice]] - y_batch = y[idx[batch_size]] + X_batch = safe_indexing(X, idx[batch_slice]) + y_batch = y[idx[batch_slice]] else: X_batch = X[batch_slice] y_batch = y[batch_slice] activations[0] = X_batch batch_loss, coef_grads, intercept_grads = self._backprop( - X_batch, - y_batch, - activations, deltas, + X_batch, y_batch, activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index daf7e7763235d..d5671181ddb99 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -213,12 +213,8 @@ def safe_indexing(X, indices): DataConversionWarning) return X.copy().iloc[indices] elif hasattr(X, "shape"): - if hasattr(X, 'take') and (hasattr(indices, 'dtype') and - indices.dtype.kind == 'i'): - # This is often substantially faster than X[indices] - return X.take(indices, axis=0) - else: - return X[indices] + # just use fancy indexing, which is faster than numpy take (see PR 14075) + return X[indices] else: return [X[idx] for idx in indices] From 2fa153571b3266bbd9d60dfb6d33bf195b470c05 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Sat, 22 Jun 2019 13:38:18 +0200 Subject: [PATCH 08/13] Use slightly shorter comment for flake8 --- sklearn/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index d5671181ddb99..456582c61e105 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -213,7 +213,7 @@ def safe_indexing(X, indices): DataConversionWarning) return X.copy().iloc[indices] elif hasattr(X, "shape"): - # just use fancy indexing, which is faster than numpy take (see PR 14075) + # just use fancy indexing, which is faster than numpy take (PR 14075) return X[indices] else: return [X[idx] for idx in indices] From 62f662d517f8e29673bdf595852f093eded90091 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Mon, 24 Jun 2019 11:21:02 +0200 Subject: [PATCH 09/13] Remove not-required pandas Dataframe handling --- sklearn/utils/validation.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 84c765d82f77a..32cad0197317b 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -449,10 +449,6 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, if hasattr(array, "dtypes") and hasattr(array.dtypes, '__array__'): dtypes_orig = np.array(array.dtypes) - # extract the underlying numpy ndarray from a Pandas DataFrame - if hasattr(array, "iloc"): - array = array.values - if dtype_numeric: if dtype_orig is not None and dtype_orig.kind == "O": # if input is object, convert to float. From dd2e79693de45f3e01cabde96b83b0dab417689f Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Tue, 17 Dec 2019 13:44:04 +0100 Subject: [PATCH 10/13] Update import to avoid FutureWarning --- sklearn/neural_network/_multilayer_perceptron.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index df7583ec9ff5d..9684cb0b5018f 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -21,7 +21,7 @@ from ..preprocessing import LabelBinarizer from ..utils import gen_batches, check_random_state from ..utils import shuffle -from ..utils import safe_indexing +from ..utils import _safe_indexing from ..utils import check_array, check_X_y, column_or_1d from ..exceptions import ConvergenceWarning from ..utils.extmath import safe_sparse_dot @@ -519,7 +519,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, for batch_slice in gen_batches(n_samples, batch_size): # only use integer indexing when it is needed if self.shuffle: - X_batch = safe_indexing(X, idx[batch_slice]) + X_batch = _safe_indexing(X, idx[batch_slice]) y_batch = y[idx[batch_slice]] else: X_batch = X[batch_slice] From 516958fafab1f5d925f90e9f1200e9fdd50a535b Mon Sep 17 00:00:00 2001 From: meyer89 Date: Mon, 13 Jan 2020 15:20:56 +0100 Subject: [PATCH 11/13] Apply suggestions from code review Co-Authored-By: Guillaume Lemaitre --- sklearn/neural_network/_multilayer_perceptron.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index 9684cb0b5018f..a3fdfbb921f11 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -504,7 +504,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, y_val = None n_samples = X.shape[0] - idx = np.arange(n_samples, dtype=int) + sample_idx = np.arange(n_samples, dtype=int) if self.batch_size == 'auto': batch_size = min(200, n_samples) @@ -514,13 +514,15 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, try: for it in range(self.max_iter): if self.shuffle: - idx = shuffle(idx, random_state=self._random_state) + # Only shuffle the sample indices instead of X and y to + # reduce the memory footprint. These indices will be used + # to slice the X and y. + sample_idx = shuffle(sample_idx, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): - # only use integer indexing when it is needed if self.shuffle: - X_batch = _safe_indexing(X, idx[batch_slice]) - y_batch = y[idx[batch_slice]] + X_batch = _safe_indexing(X, sample_idx[batch_slice]) + y_batch = y[sample_idx[batch_slice]] else: X_batch = X[batch_slice] y_batch = y[batch_slice] From 05f1f551a50bb0c9da0399654919097173b82386 Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Mon, 13 Jan 2020 15:25:44 +0100 Subject: [PATCH 12/13] Reduce line length for flake8 --- sklearn/neural_network/_multilayer_perceptron.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index e8805db8ccbf1..bf2f70de3df50 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -517,7 +517,9 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, # Only shuffle the sample indices instead of X and y to # reduce the memory footprint. These indices will be used # to slice the X and y. - sample_idx = shuffle(sample_idx, random_state=self._random_state) + sample_idx = shuffle(sample_idx, + random_state=self._random_state) + accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): if self.shuffle: From 3872d4e7256cd7ed186a543fcc90e419de0a88ff Mon Sep 17 00:00:00 2001 From: Marian Meyer Date: Mon, 13 Jan 2020 15:30:45 +0100 Subject: [PATCH 13/13] Remove whitespace in empty line (flake8) --- sklearn/neural_network/_multilayer_perceptron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py index bf2f70de3df50..9cc66bedb46ce 100644 --- a/sklearn/neural_network/_multilayer_perceptron.py +++ b/sklearn/neural_network/_multilayer_perceptron.py @@ -519,7 +519,7 @@ def _fit_stochastic(self, X, y, activations, deltas, coef_grads, # to slice the X and y. sample_idx = shuffle(sample_idx, random_state=self._random_state) - + accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): if self.shuffle: