Skip to content

[MRG] added leaky_relu activation and derivative to multilayer_perceptron #10665

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions sklearn/neural_network/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@ def relu(X):
return X


def leaky_relu(X, alpha=0.01):
"""Compute the leaky rectified linear unit function inplace.

Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The input data.

alpha : float
The slope of the function when x < 0, default value is 0.01

Returns
-------
X_new : {array-like, sparse matrix}, shape (n_samples, n_features)
The transformed data.
"""
np.clip(X, alpha*X, np.finfo(X.dtype).max, out=X)
return X


def softmax(X):
"""Compute the K-way softmax function inplace.

Expand All @@ -96,7 +116,7 @@ def softmax(X):


ACTIVATIONS = {'identity': identity, 'tanh': tanh, 'logistic': logistic,
'relu': relu, 'softmax': softmax}
'relu': relu, 'leaky_relu': leaky_relu, 'softmax': softmax}


def inplace_identity_derivative(Z, delta):
Expand Down Expand Up @@ -169,10 +189,29 @@ def inplace_relu_derivative(Z, delta):
delta[Z == 0] = 0


def inplace_leaky_relu_derivative(Z, delta, alpha=0.01):
"""Apply the derivative of the leaky relu function.

It exploits the fact that the derivative is a simple function of the output
value from leaky rectified linear units activation function.

Parameters
----------
Z : {array-like, sparse matrix}, shape (n_samples, n_features)
The data which was output from the rectified linear units activation
function during the forward pass.

delta : {array-like}, shape (n_samples, n_features)
The backpropagated error signal to be modified inplace.
"""
delta[Z < 0] = alpha


DERIVATIVES = {'identity': inplace_identity_derivative,
'tanh': inplace_tanh_derivative,
'logistic': inplace_logistic_derivative,
'relu': inplace_relu_derivative}
'relu': inplace_relu_derivative,
'leaky_relu': inplace_leaky_relu_derivative}


def squared_loss(y_true, y_pred):
Expand Down
9 changes: 7 additions & 2 deletions sklearn/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,8 @@ def _validate_hyperparameters(self):
% self.n_iter_no_change)

# raise ValueError if not registered
supported_activations = ('identity', 'logistic', 'tanh', 'relu')
supported_activations = ('identity', 'logistic', 'tanh',
'relu', 'leaky_relu')
if self.activation not in supported_activations:
raise ValueError("The activation '%s' is not supported. Supported "
"activations are %s." % (self.activation,
Expand Down Expand Up @@ -699,7 +700,8 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
The ith element represents the number of neurons in the ith
hidden layer.

activation : {'identity', 'logistic', 'tanh', 'relu'}, default 'relu'
activation : {'identity', 'logistic', 'tanh', 'relu', 'leaky_relu'},
default 'relu'
Activation function for the hidden layer.

- 'identity', no-op activation, useful to implement linear bottleneck,
Expand All @@ -713,6 +715,9 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):

- 'relu', the rectified linear unit function,
returns f(x) = max(0, x)

- 'relu', the leaky rectified linear unit function,
returns f(x) = max(alpha*x, x)

solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
The solver for weight optimization.
Expand Down