Skip to content

Commit 438e456

Browse files
committed
Merge pull request opencv#10113 from wzw-intel:fusion
2 parents 75b980a + 45d11dd commit 438e456

File tree

6 files changed

+613
-449
lines changed

6 files changed

+613
-449
lines changed

modules/dnn/src/dnn.cpp

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,12 +1233,13 @@ struct Net::Impl
12331233
}
12341234
}
12351235

1236-
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
1236+
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power
12371237
if ( preferableTarget != DNN_TARGET_OPENCL ||
12381238
(preferableTarget == DNN_TARGET_OPENCL &&
12391239
nextData &&
12401240
(!nextData->type.compare("ReLU") ||
1241-
!nextData->type.compare("ChannelsPReLU"))) )
1241+
!nextData->type.compare("ChannelsPReLU") ||
1242+
!nextData->type.compare("Power"))) )
12421243
{
12431244

12441245
Ptr<ActivationLayer> nextActivLayer;
@@ -1253,6 +1254,78 @@ struct Net::Impl
12531254
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
12541255
activData->skipFlags[DNN_BACKEND_DEFAULT] = true;
12551256
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1257+
1258+
if ( preferableTarget == DNN_TARGET_OPENCL )
1259+
{
1260+
nextData = &layers[activData->consumers[0].lid];
1261+
lpNext = LayerPin(activData->consumers[0].lid, 0);
1262+
}
1263+
}
1264+
}
1265+
1266+
// fuse convlution layer followed by eltwise + relu
1267+
if ( preferableTarget == DNN_TARGET_OPENCL )
1268+
{
1269+
Ptr<EltwiseLayer> nextEltwiseLayer;
1270+
if( nextData )
1271+
nextEltwiseLayer = nextData->layerInstance.dynamicCast<EltwiseLayer>();
1272+
1273+
if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 )
1274+
{
1275+
LayerData *eltwiseData = nextData;
1276+
// go down from the second input and find the first non-skipped layer.
1277+
LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid];
1278+
while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT])
1279+
{
1280+
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1281+
}
1282+
1283+
// second input layer is current layer.
1284+
if ( ld.id == downLayerData->id )
1285+
{
1286+
// go down from the first input and find the first non-skipped layer
1287+
downLayerData = &layers[eltwiseData->inputBlobsId[0].lid];
1288+
while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT])
1289+
{
1290+
if ( !downLayerData->type.compare("Eltwise") )
1291+
downLayerData = &layers[downLayerData->inputBlobsId[1].lid];
1292+
else
1293+
downLayerData = &layers[downLayerData->inputBlobsId[0].lid];
1294+
}
1295+
1296+
Ptr<ConvolutionLayer> convLayer;
1297+
if( downLayerData )
1298+
convLayer = downLayerData->layerInstance.dynamicCast<ConvolutionLayer>();
1299+
1300+
// first input layer is convolution layer
1301+
if( !convLayer.empty() )
1302+
{
1303+
// fuse eltwise + activation layer
1304+
LayerData *firstConvLayerData = downLayerData;
1305+
{
1306+
nextData = &layers[eltwiseData->consumers[0].lid];
1307+
lpNext = LayerPin(eltwiseData->consumers[0].lid, 0);
1308+
Ptr<ActivationLayer> nextActivLayer;
1309+
if( nextData )
1310+
nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
1311+
1312+
if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 &&
1313+
(!nextData->type.compare("ReLU") ||
1314+
!nextData->type.compare("ChannelsPReLU") ||
1315+
!nextData->type.compare("Power")) &&
1316+
currLayer->setActivation(nextActivLayer) )
1317+
{
1318+
CV_Assert(firstConvLayerData->outputBlobs.size() == 1 && ld.inputBlobs.size() == 1);
1319+
ld.inputBlobs.push_back(&firstConvLayerData->outputBlobs[0]);
1320+
printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
1321+
printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
1322+
eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true;
1323+
nextData->skipFlags[DNN_BACKEND_DEFAULT] = true;
1324+
ld.outputBlobs = layers[lpNext.lid].outputBlobs;
1325+
}
1326+
}
1327+
}
1328+
}
12561329
}
12571330
}
12581331
}

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
142142
}
143143
};
144144

145+
146+
#define IS_POWER_LAYER(layer) \
147+
(!layer.empty() && !layer->type.compare("Power"))
145148
//TODO: simultaneously convolution and bias addition for cache optimization
146149
class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
147150
{
@@ -161,6 +164,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
161164
bool newWeightAndBias;
162165
bool newActiv;
163166
ocl4dnnFusedActiv_t activType;
167+
float power;
164168
#endif
165169
ConvolutionLayerImpl()
166170
{
@@ -169,6 +173,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
169173
newWeightAndBias = false;
170174
newActiv = false;
171175
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
176+
power = 0.f;
172177
#endif
173178
}
174179

@@ -225,6 +230,22 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
225230
#ifdef HAVE_OPENCL
226231
newActiv = true;
227232
activType = OCL4DNN_CONV_FUSED_ACTIV_NONE;
233+
234+
if (preferableTarget == DNN_TARGET_OPENCL)
235+
{
236+
Ptr<PowerLayer> activ_power = activ.dynamicCast<PowerLayer>();
237+
if (!activ_power.empty())
238+
{
239+
if (activ_power->scale != 1.f || activ_power->shift != 0.f)
240+
newWeightAndBias = true;
241+
242+
if (activ_power->scale != 1.f)
243+
weightsMat.release();
244+
245+
power = activ_power->power;
246+
activType = OCL4DNN_CONV_FUSED_ACTIV_POWER;
247+
}
248+
}
228249
#endif
229250
return !activ.empty();
230251
}
@@ -727,11 +748,12 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
727748
biasvec[k] = biasMat.at<float>(k);
728749
}
729750

730-
if( !bnorm.empty() || !scaleLayer.empty() )
751+
if( !bnorm.empty() || !scaleLayer.empty() || IS_POWER_LAYER(activ))
731752
{
732753
Mat scale, shift, scale2, shift2;
733754
const float *scaleptr = 0, *shiftptr = 0;
734755
const float *scaleptr2 = 0, *shiftptr2 = 0;
756+
float a = 1.f, b = 0.f;
735757

736758
if( !bnorm.empty() )
737759
{
@@ -758,7 +780,14 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
758780
}
759781
}
760782

761-
if (shiftptr || shiftptr2)
783+
if( IS_POWER_LAYER(activ) )
784+
{
785+
Ptr<PowerLayer> activ_power = activ.dynamicCast<PowerLayer>();
786+
a = activ_power->scale;
787+
b = activ_power->shift;
788+
}
789+
790+
if (shiftptr || shiftptr2 || b != 0.f)
762791
fusedBias = true;
763792

764793
for( int i = 0; i < outCn; i++ )
@@ -771,9 +800,9 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
771800
int j, wcols = weightsMat.cols;
772801

773802
for( j = 0; j < wcols; j++ )
774-
w_i[j] *= (s1*s2);
803+
w_i[j] *= (s1*s2*a);
775804

776-
biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2);
805+
biasvec[i] = biasvec[i]*(s1*s2*a) + (delta1*s2*a + delta2*a + b);
777806
}
778807
}
779808
biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1];
@@ -827,10 +856,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
827856
CV_Assert(!reluslope.empty());
828857
convolutionOp->setActivPReLU(true, reluslope);
829858
}
859+
else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_POWER)
860+
{
861+
convolutionOp->setActivPower(true, power);
862+
}
830863
else
831864
{
832865
convolutionOp->setActivReLU(false, 0);
833866
convolutionOp->setActivPReLU(false, reluslope);
867+
convolutionOp->setActivPower(false, 1.f);
834868
}
835869
newActiv = false;
836870
}
@@ -840,6 +874,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
840874
int batch_size = inpMat.size[0];
841875

842876
return convolutionOp->Forward(inpMat,
877+
inputs.size() == 2 ? inputs[1] : UMat(),
843878
umat_blobs[0],
844879
(hasBias() || fusedBias) ? umat_blobs[1] : UMat(),
845880
outMat,

0 commit comments

Comments
 (0)