@@ -252,24 +252,13 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
252
252
}
253
253
254
254
Halide::RDom r (0 , kernel.width , 0 , kernel.height , 0 , inpGroupCn);
255
-
255
+ Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width ;
256
+ Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height ;
256
257
Halide::Expr kc = r.z ;
257
- if (group > 1 )
258
+ for ( int i = 1 ; i < group; ++i )
258
259
{
259
- int outCnBound = outGroupCn;
260
- int inpChBound = inpGroupCn;
261
- Halide::Expr shift = select (c < outCnBound, 0 , inpChBound);
262
- for (int i = 2 ; i < group; ++i)
263
- {
264
- outCnBound += outGroupCn;
265
- inpChBound += inpGroupCn;
266
- shift = select (c < outCnBound, shift, inpChBound);
267
- }
268
- kc += shift;
260
+ kc = select (c < outGroupCn * i, kc, inpGroupCn * i + r.z );
269
261
}
270
-
271
- Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width ;
272
- Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height ;
273
262
Halide::Expr topExpr = sum (padded_input (kx, ky, kc, n) *
274
263
weights (r.x , r.y , r.z , c));
275
264
if (hasBias ())
@@ -278,7 +267,6 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
278
267
topExpr += bias (c);
279
268
}
280
269
top (x, y, c, n) = topExpr;
281
- Ptr<BackendNode> pp (new HalideBackendNode ({ padded_input, top }));
282
270
return Ptr<BackendNode>(new HalideBackendNode ({ padded_input, top }));
283
271
#endif // HAVE_HALIDE
284
272
return Ptr<BackendNode>();
@@ -793,7 +781,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
793
781
int inpH = inpShape[2 ];
794
782
int inpW = inpShape[3 ];
795
783
int outCn = outShape[1 ];
796
- int ngroups = inpCn / blobs[0 ].size [1 ];
784
+ int ngroups = inpCn / blobs[0 ].size [0 ];
797
785
int outGroupCn = outCn / ngroups;
798
786
int ksize = outGroupCn * kernel.height * kernel.width ;
799
787
return shape (ksize, inpH * inpW);
@@ -804,7 +792,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
804
792
std::vector<MatShape> &outputs,
805
793
std::vector<MatShape> &internals) const
806
794
{
807
- CV_Assert (!hasBias () || blobs[1 ].total () == (size_t )blobs[ 0 ]. size [ 0 ] );
795
+ CV_Assert (!hasBias () || blobs[1 ].total () == (size_t )numOutput );
808
796
CV_Assert (inputs.size () != 0 );
809
797
810
798
int inpCn = inputs[0 ][1 ];
@@ -813,12 +801,13 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
813
801
814
802
int outH = stride.height * (inpH - 1 ) + kernel.height - 2 * pad.height + adjustPad.height ;
815
803
int outW = stride.width * (inpW - 1 ) + kernel.width - 2 * pad.width + adjustPad.width ;
816
- int outCn = blobs[ 0 ]. size [ 0 ] ;
804
+ int outCn = numOutput ;
817
805
818
- int ngroups = inpCn / blobs[0 ].size [1 ];
806
+ CV_Assert (outCn % blobs[0 ].size [1 ] == 0 );
807
+ int ngroups = outCn / blobs[0 ].size [1 ];
819
808
820
809
CV_Assert (inpCn % ngroups == 0 && outCn % ngroups == 0 );
821
- CV_Assert (blobs[0 ].size [0 ] == outCn && blobs[ 0 ]. size [ 1 ] == inpCn / ngroups );
810
+ CV_Assert (blobs[0 ].size [0 ] == inpCn);
822
811
823
812
int dims[] = {inputs[0 ][0 ], outCn, outH, outW};
824
813
outputs.resize (inputs.size (), shape (dims));
@@ -1073,7 +1062,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
1073
1062
CV_TRACE_FUNCTION ();
1074
1063
CV_TRACE_ARG_VALUE (name, " name" , name.c_str ());
1075
1064
1076
- int outCn = blobs[ 0 ]. size [ 0 ] ;
1065
+ int outCn = numOutput ;
1077
1066
int inpCn = inputs[0 ]->size [1 ];
1078
1067
bool is1x1flag = is1x1 ();
1079
1068
int nstripes = getNumThreads ();
@@ -1086,9 +1075,9 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
1086
1075
1087
1076
for (size_t ii = 0 ; ii < outputs.size (); ii++)
1088
1077
{
1089
- int ngroups = inpCn / blobs[0 ].size [1 ];
1090
- int inpGroupCn = blobs[ 0 ]. size [ 1 ] ;
1091
- int outGroupCn = outCn / ngroups ;
1078
+ int ngroups = outCn / blobs[0 ].size [1 ];
1079
+ int inpGroupCn = inpCn / ngroups ;
1080
+ int outGroupCn = blobs[ 0 ]. size [ 1 ] ;
1092
1081
const Mat& inp = *inputs[ii];
1093
1082
Mat& out = outputs[ii];
1094
1083
int numImg = inp.size [0 ];
@@ -1126,18 +1115,16 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
1126
1115
#ifdef HAVE_HALIDE
1127
1116
Halide::Buffer<float > inputBuffer = halideBuffer (inputs[0 ]);
1128
1117
1129
- int inW, inH, inC, inN, outC = blobs[ 0 ]. size [ 0 ] ;
1118
+ int inW, inH, inC, inN;
1130
1119
getCanonicalSize (inputBuffer, &inW, &inH, &inC, &inN);
1131
-
1132
- if (inC / blobs[0 ].size [1 ] != 1 )
1133
- CV_Error (cv::Error::StsNotImplemented,
1134
- " Halide backend for Deconvolution with group > 1 is not implemented" );
1120
+ const int outGroupCn = blobs[0 ].size [1 ];
1121
+ const int group = numOutput / outGroupCn;
1122
+ const int inpGroupCn = blobs[0 ].size [0 ] / group;
1135
1123
1136
1124
Halide::Var x (" x" ), y (" y" ), c (" c" ), n (" n" );
1137
1125
Halide::Func top = (name.empty () ? Halide::Func () : Halide::Func (name));
1138
1126
Halide::Func padded_input (name + " _constant_exterior" );
1139
- auto weights = wrapToHalideBuffer (blobs[0 ], {kernel.width ,
1140
- kernel.height , outC, inC});
1127
+ auto weights = wrapToHalideBuffer (blobs[0 ]);
1141
1128
1142
1129
Halide::Func dilated_input (" dilated_input" );
1143
1130
dilated_input (x, y, c, n) = 0 .0f ;
@@ -1153,13 +1140,21 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
1153
1140
0 , inC, 0 , inN);
1154
1141
padded_input (x, y, c, n) = bounded (x, y, c, n);
1155
1142
1156
- Halide::RDom r (0 , kernel.width , 0 , kernel.height , 0 , inC);
1157
- Halide::Expr topExpr = sum (
1158
- padded_input (x + pad.width - r.x , y + pad.height - r.y , r.z , n) *
1159
- weights (r.x , r.y , c, r.z ));
1143
+ Halide::RDom r (0 , kernel.width , 0 , kernel.height , 0 , inpGroupCn);
1144
+ Halide::Expr kx = x + pad.width - r.x ;
1145
+ Halide::Expr ky = y + pad.height - r.y ;
1146
+ Halide::Expr kInC = r.z ;
1147
+ Halide::Expr kOutC = c;
1148
+ for (int i = 1 ; i < group; ++i)
1149
+ {
1150
+ kInC = select (c < outGroupCn * i, kInC , inpGroupCn * i + r.z );
1151
+ kOutC = select (c < outGroupCn * i, kOutC , c - outGroupCn * i);
1152
+ }
1153
+ Halide::Expr topExpr = sum (padded_input (kx, ky, kInC , n) *
1154
+ weights (r.x , r.y , kOutC , kInC ));
1160
1155
if (hasBias ())
1161
1156
{
1162
- auto bias = wrapToHalideBuffer (blobs[1 ], {outC });
1157
+ auto bias = wrapToHalideBuffer (blobs[1 ], {numOutput });
1163
1158
topExpr += bias (c);
1164
1159
}
1165
1160
top (x, y, c, n) = topExpr;
@@ -1194,13 +1189,13 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
1194
1189
l->dilation .width , l->padMode );
1195
1190
1196
1191
bool bias = params.get <bool >(" bias_term" , true );
1197
- int numOutput = params.get <int >(" num_output" );
1192
+ l-> numOutput = params.get <int >(" num_output" );
1198
1193
int ngroups = params.get <int >(" group" , 1 );
1199
1194
1200
1195
l->adjustPad .height = params.get <int >(" adj_h" , 0 );
1201
1196
l->adjustPad .width = params.get <int >(" adj_w" , 0 );
1202
1197
1203
- CV_Assert (numOutput % ngroups == 0 );
1198
+ CV_Assert (l-> numOutput % ngroups == 0 );
1204
1199
CV_Assert ((bias && l->blobs .size () == 2 ) || (!bias && l->blobs .size () == 1 ));
1205
1200
CV_Assert (l->adjustPad .width < l->stride .width &&
1206
1201
l->adjustPad .height < l->stride .height );
0 commit comments