Skip to content

fix lbfgs rename #7503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 27, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions doc/modules/neural_networks_supervised.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ training samples::
>>> from sklearn.neural_network import MLPClassifier
>>> X = [[0., 0.], [1., 1.]]
>>> y = [0, 1]
>>> clf = MLPClassifier(solver='lbgfs', alpha=1e-5,
>>> clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
... hidden_layer_sizes=(5, 2), random_state=1)
...
>>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
Expand All @@ -95,7 +95,7 @@ training samples::
epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant',
learning_rate_init=0.001, max_iter=200, momentum=0.9,
nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
solver='lbgfs', tol=0.0001, validation_fraction=0.1, verbose=False,
solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
warm_start=False)

After fitting (training), the model can predict labels for new samples::
Expand Down Expand Up @@ -134,7 +134,7 @@ indices where the value is `1` represents the assigned classes of that sample::

>>> X = [[0., 0.], [1., 1.]]
>>> y = [[0, 1], [1, 1]]
>>> clf = MLPClassifier(solver='lbgfs', alpha=1e-5,
>>> clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
... hidden_layer_sizes=(15,), random_state=1)
...
>>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
Expand All @@ -143,7 +143,7 @@ indices where the value is `1` represents the assigned classes of that sample::
epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant',
learning_rate_init=0.001, max_iter=200, momentum=0.9,
nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
solver='lbgfs', tol=0.0001, validation_fraction=0.1, verbose=False,
solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
warm_start=False)
>>> clf.predict([1., 2.])
array([[1, 1]])
Expand Down
26 changes: 13 additions & 13 deletions sklearn/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
with respect to the different parameters given in the initialization.

Returned gradients are packed in a single vector so it can be used
in lbgfs
in lbfgs

Parameters
----------
Expand Down Expand Up @@ -345,8 +345,8 @@ def _fit(self, X, y, incremental=False):
# First time training the model
self._initialize(y, layer_units)

# lbgfs does not support mini-batches
if self.solver == 'lbgfs':
# lbfgs does not support mini-batches
if self.solver == 'lbfgs':
batch_size = n_samples
elif self.batch_size == 'auto':
batch_size = min(200, n_samples)
Expand Down Expand Up @@ -375,7 +375,7 @@ def _fit(self, X, y, incremental=False):
intercept_grads, layer_units, incremental)

# Run the LBFGS solver
elif self.solver == 'lbgfs':
elif self.solver == 'lbfgs':
self._fit_lbfgs(X, y, activations, deltas, coef_grads,
intercept_grads, layer_units)
return self
Expand Down Expand Up @@ -422,7 +422,7 @@ def _validate_hyperparameters(self):
if self.learning_rate not in ["constant", "invscaling", "adaptive"]:
raise ValueError("learning rate %s is not supported. " %
self.learning_rate)
supported_solvers = _STOCHASTIC_SOLVERS + ["lbgfs"]
supported_solvers = _STOCHASTIC_SOLVERS + ["lbfgs"]
if self.solver not in supported_solvers:
raise ValueError("The solver %s is not supported. "
" Expected one of: %s" %
Expand Down Expand Up @@ -704,10 +704,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
- 'relu', the rectified linear unit function,
returns f(x) = max(0, x)

solver : {'lbgfs', 'sgd', 'adam'}, default 'adam'
solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
The solver for weight optimization.

- 'lbgfs' is an optimizer in the family of quasi-Newton methods.
- 'lbfgs' is an optimizer in the family of quasi-Newton methods.

- 'sgd' refers to stochastic gradient descent.

Expand All @@ -717,15 +717,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
Note: The default solver 'adam' works pretty well on relatively
large datasets (with thousands of training samples or more) in terms of
both training time and validation score.
For small datasets, however, 'lbgfs' can converge faster and perform
For small datasets, however, 'lbfgs' can converge faster and perform
better.

alpha : float, optional, default 0.0001
L2 penalty (regularization term) parameter.

batch_size : int, optional, default 'auto'
Size of minibatches for stochastic optimizers.
If the solver is 'lbgfs', the classifier will not use minibatch.
If the solver is 'lbfgs', the classifier will not use minibatch.
When set to "auto", `batch_size=min(200, n_samples)`

learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
Expand Down Expand Up @@ -1046,10 +1046,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
- 'relu', the rectified linear unit function,
returns f(x) = max(0, x)

solver : {'lbgfs', 'sgd', 'adam'}, default 'adam'
solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
The solver for weight optimization.

- 'lbgfs' is an optimizer in the family of quasi-Newton methods.
- 'lbfgs' is an optimizer in the family of quasi-Newton methods.

- 'sgd' refers to stochastic gradient descent.

Expand All @@ -1059,15 +1059,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
Note: The default solver 'adam' works pretty well on relatively
large datasets (with thousands of training samples or more) in terms of
both training time and validation score.
For small datasets, however, 'lbgfs' can converge faster and perform
For small datasets, however, 'lbfgs' can converge faster and perform
better.

alpha : float, optional, default 0.0001
L2 penalty (regularization term) parameter.

batch_size : int, optional, default 'auto'
Size of minibatches for stochastic optimizers.
If the solver is 'lbgfs', the classifier will not use minibatch.
If the solver is 'lbfgs', the classifier will not use minibatch.
When set to "auto", `batch_size=min(200, n_samples)`

learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
Expand Down
18 changes: 9 additions & 9 deletions sklearn/neural_network/tests/test_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_gradient():

for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
solver='lbgfs', alpha=1e-5,
solver='lbfgs', alpha=1e-5,
learning_rate_init=0.2, max_iter=1,
random_state=1)
mlp.fit(X, y)
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_lbfgs_classification():
expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)

for activation in ACTIVATION_TYPES:
mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=50,
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X_train, y_train)
Expand All @@ -250,7 +250,7 @@ def test_lbfgs_regression():
X = Xboston
y = yboston
for activation in ACTIVATION_TYPES:
mlp = MLPRegressor(solver='lbgfs', hidden_layer_sizes=50,
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
max_iter=150, shuffle=True, random_state=1,
activation=activation)
mlp.fit(X, y)
Expand Down Expand Up @@ -287,7 +287,7 @@ def test_multilabel_classification():
# test fit method
X, y = make_multilabel_classification(n_samples=50, random_state=0,
return_indicator=True)
mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=50, alpha=1e-5,
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
max_iter=150, random_state=0, activation='logistic',
learning_rate_init=0.2)
mlp.fit(X, y)
Expand All @@ -305,7 +305,7 @@ def test_multilabel_classification():
def test_multioutput_regression():
# Test that multi-output regression works as expected
X, y = make_regression(n_samples=200, n_targets=5)
mlp = MLPRegressor(solver='lbgfs', hidden_layer_sizes=50, max_iter=200,
mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
random_state=1)
mlp.fit(X, y)
assert_greater(mlp.score(X, y), 0.9)
Expand Down Expand Up @@ -388,8 +388,8 @@ def test_partial_fit_errors():
assert_raises(ValueError,
MLPClassifier(solver='sgd').partial_fit, X, y, classes=[2])

# lbgfs doesn't support partial_fit
assert_false(hasattr(MLPClassifier(solver='lbgfs'), 'partial_fit'))
# lbfgs doesn't support partial_fit
assert_false(hasattr(MLPClassifier(solver='lbfgs'), 'partial_fit'))


def test_params_errors():
Expand Down Expand Up @@ -471,7 +471,7 @@ def test_predict_proba_multilabel():
return_indicator=True)
n_samples, n_classes = Y.shape

clf = MLPClassifier(solver='lbgfs', hidden_layer_sizes=30,
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=30,
random_state=0)
clf.fit(X, Y)
y_proba = clf.predict_proba(X)
Expand All @@ -493,7 +493,7 @@ def test_sparse_matrices():
X = X_digits_binary[:50]
y = y_digits_binary[:50]
X_sparse = csr_matrix(X)
mlp = MLPClassifier(solver='lbgfs', hidden_layer_sizes=15,
mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=15,
random_state=1)
mlp.fit(X, y)
pred1 = mlp.predict(X)
Expand Down