Add ReLU and LeakyReLU activation function in ml module

LaurentBerger · LaurentBerger · commit a44573c43baa · 2017-11-28T11:02:05.000+01:00
diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp
@@ -1503,14 +1503,18 @@ class CV_EXPORTS_W ANN_MLP : public StatModel
     enum ActivationFunctions {
         /** Identity function: \f$f(x)=x\f$ */
         IDENTITY = 0,
-        /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$
+        /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$
         @note
         If you are using the default sigmoid activation function with the default parameter values
         fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
         will range from [-1.7159, 1.7159], instead of [0,1].*/
         SIGMOID_SYM = 1,
         /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
-        GAUSSIAN = 2
+        GAUSSIAN = 2,
+        /** ReLU function: \f$f(x)=max(0,x)\f$ */
+        RELU = 3,
+        /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/
+        LEAKYRELU= 4
     };
 
     /** Train options */
diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp
@@ -135,7 +135,7 @@ class ANN_MLPImpl : public ANN_MLP
 
     void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 )
     {
-        if( _activ_func < 0 || _activ_func > GAUSSIAN )
+        if( _activ_func < 0 || _activ_func > LEAKYRELU)
             CV_Error( CV_StsOutOfRange, "Unknown activation function" );
 
         activ_func = _activ_func;
@@ -153,11 +153,23 @@ class ANN_MLPImpl : public ANN_MLP
         case GAUSSIAN:
             max_val = 1.; min_val = 0.05;
             max_val1 = 1.; min_val1 = 0.02;
-            if( fabs(_f_param1) < FLT_EPSILON )
+            if (fabs(_f_param1) < FLT_EPSILON)
                 _f_param1 = 1.;
-            if( fabs(_f_param2) < FLT_EPSILON )
+            if (fabs(_f_param2) < FLT_EPSILON)
                 _f_param2 = 1.;
             break;
+        case RELU:
+            if (fabs(_f_param1) < FLT_EPSILON)
+                _f_param1 = 1;
+            min_val = max_val = min_val1 = max_val1 = 0.;
+            _f_param2 = 0.;
+            break;
+        case LEAKYRELU:
+            if (fabs(_f_param1) < FLT_EPSILON)
+                _f_param1 = 0.01;
+            min_val = max_val = min_val1 = max_val1 = 0.;
+            _f_param2 = 0.;
+            break;
         default:
             min_val = max_val = min_val1 = max_val1 = 0.;
             _f_param1 = 1.;
@@ -368,140 +380,194 @@ class ANN_MLPImpl : public ANN_MLP
         }
     }
 
-    void calc_activ_func( Mat& sums, const Mat& w ) const
+    void calc_activ_func(Mat& sums, const Mat& w) const
     {
-        const double* bias = w.ptr<double>(w.rows-1);
+        const double* bias = w.ptr<double>(w.rows - 1);
         int i, j, n = sums.rows, cols = sums.cols;
         double scale = 0, scale2 = f_param2;
 
-        switch( activ_func )
+        switch (activ_func)
         {
-            case IDENTITY:
-                scale = 1.;
-                break;
-            case SIGMOID_SYM:
-                scale = -f_param1;
-                break;
-            case GAUSSIAN:
-                scale = -f_param1*f_param1;
-                break;
-            default:
-                ;
+        case IDENTITY:
+            scale = 1.;
+            break;
+        case SIGMOID_SYM:
+            scale = -f_param1;
+            break;
+        case GAUSSIAN:
+            scale = -f_param1*f_param1;
+            break;
+        case RELU:
+            scale = 1;
+            break;
+        case LEAKYRELU:
+            scale = 1;
+            break;
+        default:
+            ;
         }
 
-        CV_Assert( sums.isContinuous() );
+        CV_Assert(sums.isContinuous());
 
-        if( activ_func != GAUSSIAN )
+        if (activ_func != GAUSSIAN)
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* data = sums.ptr<double>(i);
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
+                {
                     data[j] = (data[j] + bias[j])*scale;
+                    if (activ_func == RELU)
+                        if (data[j] < 0)
+                            data[j] = 0;
+                    if (activ_func == LEAKYRELU)
+                        if (data[j] < 0)
+                            data[j] *= f_param1;
+                }
             }
 
-            if( activ_func == IDENTITY )
+            if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU)
                 return;
         }
         else
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* data = sums.ptr<double>(i);
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     double t = data[j] + bias[j];
                     data[j] = t*t*scale;
                 }
             }
         }
 
-        exp( sums, sums );
+        exp(sums, sums);
 
-        if( sums.isContinuous() )
+        if (sums.isContinuous())
         {
             cols *= n;
             n = 1;
         }
 
-        switch( activ_func )
+        switch (activ_func)
         {
-            case SIGMOID_SYM:
-                for( i = 0; i < n; i++ )
+        case SIGMOID_SYM:
+            for (i = 0; i < n; i++)
+            {
+                double* data = sums.ptr<double>(i);
+                for (j = 0; j < cols; j++)
                 {
-                    double* data = sums.ptr<double>(i);
-                    for( j = 0; j < cols; j++ )
+                    if (!cvIsInf(data[j]))
                     {
-                        if(!cvIsInf(data[j]))
-                        {
-                            double t = scale2*(1. - data[j])/(1. + data[j]);
-                            data[j] = t;
-                        }
-                        else
-                        {
-                            data[j] = -scale2;
-                        }
+                        double t = scale2*(1. - data[j]) / (1. + data[j]);
+                        data[j] = t;
+                    }
+                    else
+                    {
+                        data[j] = -scale2;
                     }
                 }
-                break;
+            }
+            break;
 
-            case GAUSSIAN:
-                for( i = 0; i < n; i++ )
-                {
-                    double* data = sums.ptr<double>(i);
-                    for( j = 0; j < cols; j++ )
-                        data[j] = scale2*data[j];
-                }
-                break;
+        case GAUSSIAN:
+            for (i = 0; i < n; i++)
+            {
+                double* data = sums.ptr<double>(i);
+                for (j = 0; j < cols; j++)
+                    data[j] = scale2*data[j];
+            }
+            break;
 
-            default:
-                ;
+        default:
+            ;
         }
     }
 
-    void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const
+    void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const
     {
-        const double* bias = w.ptr<double>(w.rows-1);
+        const double* bias = w.ptr<double>(w.rows - 1);
         int i, j, n = _xf.rows, cols = _xf.cols;
 
-        if( activ_func == IDENTITY )
+        if (activ_func == IDENTITY)
         {
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     xf[j] += bias[j];
                     df[j] = 1;
                 }
             }
         }
-        else if( activ_func == GAUSSIAN )
+        else if (activ_func == RELU)
+        {
+            for (i = 0; i < n; i++)
+            {
+                double* xf = _xf.ptr<double>(i);
+                double* df = _df.ptr<double>(i);
+
+                for (j = 0; j < cols; j++)
+                {
+                    xf[j] += bias[j];
+                    if (xf[j] < 0)
+                    {
+                        xf[j] = 0;
+                        df[j] = 0;
+                    }
+                    else
+                        df[j] = 1;
+                }
+            }
+        }
+        else if (activ_func == LEAKYRELU)
+        {
+            for (i = 0; i < n; i++)
+            {
+                double* xf = _xf.ptr<double>(i);
+                double* df = _df.ptr<double>(i);
+
+                for (j = 0; j < cols; j++)
+                {
+                    xf[j] += bias[j];
+                    if (xf[j] < 0)
+                    {
+                        xf[j] = f_param1*xf[j];
+                        df[j] = f_param1;
+                    }
+                    else
+                        df[j] = 1;
+                }
+            }
+        }
+        else if (activ_func == GAUSSIAN)
         {
             double scale = -f_param1*f_param1;
             double scale2 = scale*f_param2;
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     double t = xf[j] + bias[j];
-                    df[j] = t*2*scale2;
+                    df[j] = t * 2 * scale2;
                     xf[j] = t*t*scale;
                 }
             }
-            exp( _xf, _xf );
+            exp(_xf, _xf);
 
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                     df[j] *= xf[j];
             }
         }
@@ -510,34 +576,34 @@ class ANN_MLPImpl : public ANN_MLP
             double scale = f_param1;
             double scale2 = f_param2;
 
-            for( i = 0; i < n; i++ )
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     xf[j] = (xf[j] + bias[j])*scale;
                     df[j] = -fabs(xf[j]);
                 }
             }
 
-            exp( _df, _df );
+            exp(_df, _df);
 
             // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax);
             // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2=
             // 2*a*exp(-ax)/(1+exp(-ax))^2
-            scale *= 2*f_param2;
-            for( i = 0; i < n; i++ )
+            scale *= 2 * f_param2;
+            for (i = 0; i < n; i++)
             {
                 double* xf = _xf.ptr<double>(i);
                 double* df = _df.ptr<double>(i);
 
-                for( j = 0; j < cols; j++ )
+                for (j = 0; j < cols; j++)
                 {
                     int s0 = xf[j] > 0 ? 1 : -1;
-                    double t0 = 1./(1. + df[j]);
-                    double t1 = scale*df[j]*t0*t0;
+                    double t0 = 1. / (1. + df[j]);
+                    double t1 = scale*df[j] * t0*t0;
                     t0 *= scale2*(1. - df[j])*s0;
                     df[j] = t1;
                     xf[j] = t0;
@@ -1110,7 +1176,9 @@ class ANN_MLPImpl : public ANN_MLP
     {
         const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" :
                                       activ_func == SIGMOID_SYM ? "SIGMOID_SYM" :
-                                      activ_func == GAUSSIAN ? "GAUSSIAN" : 0;
+                                      activ_func == GAUSSIAN ? "GAUSSIAN" :
+                                      activ_func == RELU ? "RELU" :
+                                      activ_func == LEAKYRELU ? "LEAKYRELU" : 0;
 
         if( activ_func_name )
             fs << "activation_function" << activ_func_name;
@@ -1191,6 +1259,8 @@ class ANN_MLPImpl : public ANN_MLP
         {
             activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM :
                          activ_func_name == "IDENTITY" ? IDENTITY :
+                         activ_func_name == "RELU" ? RELU :
+                         activ_func_name == "LEAKYRELU" ? LEAKYRELU :
                          activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1;
             CV_Assert( activ_func >= 0 );
         }
diff --git a/modules/ml/test/test_mltests2.cpp b/modules/ml/test/test_mltests2.cpp