diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py index 7a1c17c6ff75e..7977a5702b638 100644 --- a/sklearn/neural_network/_base.py +++ b/sklearn/neural_network/_base.py @@ -75,6 +75,26 @@ def relu(X): return X +def leaky_relu(X, alpha=0.01): + """Compute the leaky rectified linear unit function inplace. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features) + The input data. + + alpha : float + The slope of the function when x < 0, default value is 0.01 + + Returns + ------- + X_new : {array-like, sparse matrix}, shape (n_samples, n_features) + The transformed data. + """ + np.clip(X, alpha*X, np.finfo(X.dtype).max, out=X) + return X + + def softmax(X): """Compute the K-way softmax function inplace. @@ -96,7 +116,7 @@ def softmax(X): ACTIVATIONS = {'identity': identity, 'tanh': tanh, 'logistic': logistic, - 'relu': relu, 'softmax': softmax} + 'relu': relu, 'leaky_relu': leaky_relu, 'softmax': softmax} def inplace_identity_derivative(Z, delta): @@ -169,10 +189,29 @@ def inplace_relu_derivative(Z, delta): delta[Z == 0] = 0 +def inplace_leaky_relu_derivative(Z, delta, alpha=0.01): + """Apply the derivative of the leaky relu function. + + It exploits the fact that the derivative is a simple function of the output + value from leaky rectified linear units activation function. + + Parameters + ---------- + Z : {array-like, sparse matrix}, shape (n_samples, n_features) + The data which was output from the rectified linear units activation + function during the forward pass. + + delta : {array-like}, shape (n_samples, n_features) + The backpropagated error signal to be modified inplace. + """ + delta[Z < 0] = alpha + + DERIVATIVES = {'identity': inplace_identity_derivative, 'tanh': inplace_tanh_derivative, 'logistic': inplace_logistic_derivative, - 'relu': inplace_relu_derivative} + 'relu': inplace_relu_derivative, + 'leaky_relu': inplace_leaky_relu_derivative} def squared_loss(y_true, y_pred): diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index c8c3e18708b9d..c8108fe2c00b0 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -419,7 +419,8 @@ def _validate_hyperparameters(self): % self.n_iter_no_change) # raise ValueError if not registered - supported_activations = ('identity', 'logistic', 'tanh', 'relu') + supported_activations = ('identity', 'logistic', 'tanh', + 'relu', 'leaky_relu') if self.activation not in supported_activations: raise ValueError("The activation '%s' is not supported. Supported " "activations are %s." % (self.activation, @@ -699,7 +700,8 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): The ith element represents the number of neurons in the ith hidden layer. - activation : {'identity', 'logistic', 'tanh', 'relu'}, default 'relu' + activation : {'identity', 'logistic', 'tanh', 'relu', 'leaky_relu'}, + default 'relu' Activation function for the hidden layer. - 'identity', no-op activation, useful to implement linear bottleneck, @@ -713,6 +715,9 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin): - 'relu', the rectified linear unit function, returns f(x) = max(0, x) + + - 'relu', the leaky rectified linear unit function, + returns f(x) = max(alpha*x, x) solver : {'lbfgs', 'sgd', 'adam'}, default 'adam' The solver for weight optimization.