pythonwebcoder
diff --git a/‎modules/dnn/src/dnn.cpp
Lines changed: 75 additions & 2 deletions b/‎modules/dnn/src/dnn.cpp
Lines changed: 75 additions & 2 deletions
diff --git a/‎modules/dnn/src/layers/convolution_layer.cpp
Lines changed: 39 additions & 4 deletions b/‎modules/dnn/src/layers/convolution_layer.cpp
Lines changed: 39 additions & 4 deletions
@@ -1233,12 +1233,13 @@ struct Net::Impl
                     }
                 }
 
-                // For now,  OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
+                // For now,  OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power
                 if ( preferableTarget != DNN_TARGET_OPENCL ||
                         (preferableTarget == DNN_TARGET_OPENCL &&
                          nextData &&
                         (!nextData->type.compare("ReLU") ||
-                         !nextData->type.compare("ChannelsPReLU"))) )
+                         !nextData->type.compare("ChannelsPReLU") ||
+                         !nextData->type.compare("Power"))) )
                 {
 
                     Ptr<ActivationLayer> nextActivLayer;
@@ -1253,6 +1254,78 @@ struct Net::Impl
                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
                         activData->skipFlags[DNN_BACKEND_DEFAULT] = true;
                         ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+
+                        if ( preferableTarget == DNN_TARGET_OPENCL )
+                        {
+                            nextData = &layers[activData->consumers[0].lid];
+                            lpNext = LayerPin(activData->consumers[0].lid, 0);
+                        }
+                    }
+                }
+
+                // fuse convlution layer followed by eltwise + relu
+                if ( preferableTarget == DNN_TARGET_OPENCL )
+                {
+                    Ptr<EltwiseLayer> nextEltwiseLayer;
+                    if( nextData )
+                        nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
+
+                    if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
+                    {
+                        LayerData *eltwiseData = nextData;
+                        // go down from the second input and find the first non-skipped layer.
+                        LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
+                        while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT])
+                        {
+                            downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
+                        }
+
+                        // second input layer is current layer.
+                        if ( ld.id == downLayerData->id )
+                        {
+                            // go down from the first input and find the first non-skipped layer
+                            downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
+                            while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT])
+                            {
+                                if ( !downLayerData->type.compare("Eltwise") )
+                                    downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
+                                else
+                                    downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
+                            }
+
+                            Ptr<ConvolutionLayer> convLayer;
+                            if( downLayerData )
+                                convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
+
+                            //  first input layer is convolution layer
+                            if( !convLayer.empty() )
+                            {
+                                // fuse eltwise + activation layer
+                                LayerData *firstConvLayerData = downLayerData;
+                                {
+                                    nextData = &layers[eltwiseData->consumers[0].lid];
+                                    lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
+                                    Ptr<ActivationLayer> nextActivLayer;
+                                    if( nextData )
+                                        nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
+
+                                    if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
+                                            (!nextData->type.compare("ReLU") ||
+                                             !nextData->type.compare("ChannelsPReLU") ||
+                                             !nextData->type.compare("Power")) &&
+                                            currLayer->setActivation(nextActivLayer) )
+                                    {
+                                        CV_Assert(firstConvLayerData->outputBlobs.size() == 1 && ld.inputBlobs.size() == 1);
+                                        ld.inputBlobs.push_back(&firstConvLayerData->outputBlobs[0]);
+                                        printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
+                                        printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
+                                        eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true;
+                                        nextData->skipFlags[DNN_BACKEND_DEFAULT] = true;
+                                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                                    }
+                                }
+                            }
+                        }
                     }
                 }
             }
 
@@ -142,6 +142,9 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
     }
 };
 
+
+#define IS_POWER_LAYER(layer) \
+            (!layer.empty() && !layer->type.compare("Power"))
 //TODO: simultaneously convolution and bias addition for cache optimization
 class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
 {
@@ -161,6 +164,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
     bool newWeightAndBias;
     bool newActiv;
     ocl4dnnFusedActiv_t activType;
+    float power;
 #endif
     ConvolutionLayerImpl()
     {
@@ -169,6 +173,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
         newWeightAndBias = false;
         newActiv = false;
         activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
+        power = 0.f;
 #endif
     }
 
@@ -225,6 +230,22 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
 #ifdef HAVE_OPENCL
         newActiv = true;
         activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
+
+        if (preferableTarget == DNN_TARGET_OPENCL)
+        {
+            Ptr<PowerLayer> activ_power = activ.dynamicCast<PowerLayer>();
+            if (!activ_power.empty())
+            {
+                if (activ_power->scale != 1.f || activ_power->shift != 0.f)
+                    newWeightAndBias = true;
+
+                if (activ_power->scale != 1.f)
+                    weightsMat.release();
+
+                power = activ_power->power;
+                activType = OCL4DNN_CONV_FUSED_ACTIV_POWER;
+            }
+        }
 #endif
         return !activ.empty();
     }
@@ -727,11 +748,12 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                     biasvec[k] = biasMat.at<float>(k);
             }
 
-            if( !bnorm.empty() || !scaleLayer.empty() )
+            if( !bnorm.empty() || !scaleLayer.empty() || IS_POWER_LAYER(activ))
             {
                 Mat scale, shift, scale2, shift2;
                 const float *scaleptr = 0, *shiftptr = 0;
                 const float *scaleptr2 = 0, *shiftptr2 = 0;
+                float a = 1.f, b = 0.f;
 
                 if( !bnorm.empty() )
                 {
@@ -758,7 +780,14 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                     }
                 }
 
-                if (shiftptr || shiftptr2)
+                if( IS_POWER_LAYER(activ) )
+                {
+                    Ptr<PowerLayer> activ_power = activ.dynamicCast<PowerLayer>();
+                    a = activ_power->scale;
+                    b = activ_power->shift;
+                }
+
+                if (shiftptr || shiftptr2 || b != 0.f)
                     fusedBias = true;
 
                 for( int i = 0; i < outCn; i++ )
@@ -771,9 +800,9 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                     int j, wcols = weightsMat.cols;
 
                     for( j = 0; j < wcols; j++ )
-                        w_i[j] *= (s1*s2);
+                        w_i[j] *= (s1*s2*a);
 
-                    biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
+                    biasvec[i] = biasvec[i]*(s1*s2*a) + (delta1*s2*a + delta2*a + b);
                 }
             }
             biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
@@ -827,10 +856,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                 CV_Assert(!reluslope.empty());
                 convolutionOp->setActivPReLU(true, reluslope);
             }
+            else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_POWER)
+            {
+                convolutionOp->setActivPower(true, power);
+            }
             else
             {
                 convolutionOp->setActivReLU(false, 0);
                 convolutionOp->setActivPReLU(false, reluslope);
+                convolutionOp->setActivPower(false, 1.f);
             }
             newActiv = false;
         }
@@ -840,6 +874,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
         int batch_size = inpMat.size[0];
 
         return convolutionOp->Forward(inpMat,
+                                      inputs.size() == 2 ? inputs[1] : UMat(),
                                       umat_blobs[0],
                                       (hasBias() || fusedBias) ? umat_blobs[1] : UMat(),
                                       outMat,