Skip to content

Commit 68cc2e2

Browse files
committed
Merge pull request opencv#9734 from dkurt:fix_deconv_layer_kernel_layout
2 parents 45365e4 + 6e593cd commit 68cc2e2

File tree

5 files changed

+39
-48
lines changed

5 files changed

+39
-48
lines changed

modules/dnn/include/opencv2/dnn/all_layers.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
199199
public:
200200
Size kernel, stride, pad, dilation, adjustPad;
201201
String padMode;
202+
int numOutput;
202203
};
203204

204205
class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -252,24 +252,13 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
252252
}
253253

254254
Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
255-
255+
Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
256+
Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
256257
Halide::Expr kc = r.z;
257-
if (group > 1)
258+
for (int i = 1; i < group; ++i)
258259
{
259-
int outCnBound = outGroupCn;
260-
int inpChBound = inpGroupCn;
261-
Halide::Expr shift = select(c < outCnBound, 0, inpChBound);
262-
for (int i = 2; i < group; ++i)
263-
{
264-
outCnBound += outGroupCn;
265-
inpChBound += inpGroupCn;
266-
shift = select(c < outCnBound, shift, inpChBound);
267-
}
268-
kc += shift;
260+
kc = select(c < outGroupCn * i, kc, inpGroupCn * i + r.z);
269261
}
270-
271-
Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
272-
Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
273262
Halide::Expr topExpr = sum(padded_input(kx, ky, kc, n) *
274263
weights(r.x, r.y, r.z, c));
275264
if (hasBias())
@@ -278,7 +267,6 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
278267
topExpr += bias(c);
279268
}
280269
top(x, y, c, n) = topExpr;
281-
Ptr<BackendNode> pp(new HalideBackendNode({ padded_input, top }));
282270
return Ptr<BackendNode>(new HalideBackendNode({ padded_input, top }));
283271
#endif // HAVE_HALIDE
284272
return Ptr<BackendNode>();
@@ -793,7 +781,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
793781
int inpH = inpShape[2];
794782
int inpW = inpShape[3];
795783
int outCn = outShape[1];
796-
int ngroups = inpCn / blobs[0].size[1];
784+
int ngroups = inpCn / blobs[0].size[0];
797785
int outGroupCn = outCn / ngroups;
798786
int ksize = outGroupCn * kernel.height * kernel.width;
799787
return shape(ksize, inpH * inpW);
@@ -804,7 +792,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
804792
std::vector<MatShape> &outputs,
805793
std::vector<MatShape> &internals) const
806794
{
807-
CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]);
795+
CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput);
808796
CV_Assert(inputs.size() != 0);
809797

810798
int inpCn = inputs[0][1];
@@ -813,12 +801,13 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
813801

814802
int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
815803
int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
816-
int outCn = blobs[0].size[0];
804+
int outCn = numOutput;
817805

818-
int ngroups = inpCn / blobs[0].size[1];
806+
CV_Assert(outCn % blobs[0].size[1] == 0);
807+
int ngroups = outCn / blobs[0].size[1];
819808

820809
CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);
821-
CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / ngroups);
810+
CV_Assert(blobs[0].size[0] == inpCn);
822811

823812
int dims[] = {inputs[0][0], outCn, outH, outW};
824813
outputs.resize(inputs.size(), shape(dims));
@@ -1073,7 +1062,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
10731062
CV_TRACE_FUNCTION();
10741063
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
10751064

1076-
int outCn = blobs[0].size[0];
1065+
int outCn = numOutput;
10771066
int inpCn = inputs[0]->size[1];
10781067
bool is1x1flag = is1x1();
10791068
int nstripes = getNumThreads();
@@ -1086,9 +1075,9 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
10861075

10871076
for (size_t ii = 0; ii < outputs.size(); ii++)
10881077
{
1089-
int ngroups = inpCn / blobs[0].size[1];
1090-
int inpGroupCn = blobs[0].size[1];
1091-
int outGroupCn = outCn / ngroups;
1078+
int ngroups = outCn / blobs[0].size[1];
1079+
int inpGroupCn = inpCn / ngroups;
1080+
int outGroupCn = blobs[0].size[1];
10921081
const Mat& inp = *inputs[ii];
10931082
Mat& out = outputs[ii];
10941083
int numImg = inp.size[0];
@@ -1126,18 +1115,16 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
11261115
#ifdef HAVE_HALIDE
11271116
Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
11281117

1129-
int inW, inH, inC, inN, outC = blobs[0].size[0];
1118+
int inW, inH, inC, inN;
11301119
getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
1131-
1132-
if (inC / blobs[0].size[1] != 1)
1133-
CV_Error(cv::Error::StsNotImplemented,
1134-
"Halide backend for Deconvolution with group > 1 is not implemented");
1120+
const int outGroupCn = blobs[0].size[1];
1121+
const int group = numOutput / outGroupCn;
1122+
const int inpGroupCn = blobs[0].size[0] / group;
11351123

11361124
Halide::Var x("x"), y("y"), c("c"), n("n");
11371125
Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
11381126
Halide::Func padded_input(name + "_constant_exterior");
1139-
auto weights = wrapToHalideBuffer(blobs[0], {kernel.width,
1140-
kernel.height, outC, inC});
1127+
auto weights = wrapToHalideBuffer(blobs[0]);
11411128

11421129
Halide::Func dilated_input("dilated_input");
11431130
dilated_input(x, y, c, n) = 0.0f;
@@ -1153,13 +1140,21 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
11531140
0, inC, 0, inN);
11541141
padded_input(x, y, c, n) = bounded(x, y, c, n);
11551142

1156-
Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inC);
1157-
Halide::Expr topExpr = sum(
1158-
padded_input(x + pad.width - r.x, y + pad.height - r.y, r.z, n) *
1159-
weights(r.x, r.y, c, r.z));
1143+
Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
1144+
Halide::Expr kx = x + pad.width - r.x;
1145+
Halide::Expr ky = y + pad.height - r.y;
1146+
Halide::Expr kInC = r.z;
1147+
Halide::Expr kOutC = c;
1148+
for (int i = 1; i < group; ++i)
1149+
{
1150+
kInC = select(c < outGroupCn * i, kInC, inpGroupCn * i + r.z);
1151+
kOutC = select(c < outGroupCn * i, kOutC, c - outGroupCn * i);
1152+
}
1153+
Halide::Expr topExpr = sum(padded_input(kx, ky, kInC, n) *
1154+
weights(r.x, r.y, kOutC, kInC));
11601155
if (hasBias())
11611156
{
1162-
auto bias = wrapToHalideBuffer(blobs[1], {outC});
1157+
auto bias = wrapToHalideBuffer(blobs[1], {numOutput});
11631158
topExpr += bias(c);
11641159
}
11651160
top(x, y, c, n) = topExpr;
@@ -1194,13 +1189,13 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
11941189
l->dilation.width, l->padMode);
11951190

11961191
bool bias = params.get<bool>("bias_term", true);
1197-
int numOutput = params.get<int>("num_output");
1192+
l->numOutput = params.get<int>("num_output");
11981193
int ngroups = params.get<int>("group", 1);
11991194

12001195
l->adjustPad.height = params.get<int>("adj_h", 0);
12011196
l->adjustPad.width = params.get<int>("adj_w", 0);
12021197

1203-
CV_Assert(numOutput % ngroups == 0);
1198+
CV_Assert(l->numOutput % ngroups == 0);
12041199
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
12051200
CV_Assert(l->adjustPad.width < l->stride.width &&
12061201
l->adjustPad.height < l->stride.height);

modules/dnn/src/tensorflow/tf_importer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,13 +1015,11 @@ void TFImporter::populateNet(Net dstNet)
10151015
}
10161016

10171017
kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
1018-
// Swap just numbers of input and output channels.
1019-
std::swap(layerParams.blobs[0].size[0], layerParams.blobs[0].size[1]);
10201018

10211019
const int* kshape = layerParams.blobs[0].size.p;
10221020
layerParams.set("kernel_h", kshape[2]);
10231021
layerParams.set("kernel_w", kshape[3]);
1024-
layerParams.set("num_output", kshape[0]);
1022+
layerParams.set("num_output", kshape[1]);
10251023

10261024
setStrides(layerParams, layer);
10271025
setPadding(layerParams, layer);

modules/dnn/src/torch/torch_importer.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -798,10 +798,7 @@ struct TorchImporter : public ::cv::dnn::Importer
798798
layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
799799
layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
800800

801-
Mat weights = tensorParams["weight"].second;
802-
CV_Assert(weights.dims == 4);
803-
int reorderedShape[] = { weights.size[1], weights.size[0], weights.size[2], weights.size[3] };
804-
layerParams.blobs.push_back(weights.reshape(1, 4, reorderedShape));
801+
layerParams.blobs.push_back(tensorParams["weight"].second);
805802

806803
bool bias = tensorParams.count("bias");
807804
layerParams.set("bias_term", bias);

modules/dnn/test/test_halide_layers.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ TEST_P(Deconvolution, Accuracy)
129129
Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
130130
bool hasBias = get<6>(GetParam());
131131

132-
Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F);
132+
Mat weights({inChannels, outChannels / group, kernel.height, kernel.width}, CV_32F);
133133
randu(weights, -1.0f, 1.0f);
134134

135135
LayerParams lp;
@@ -161,7 +161,7 @@ TEST_P(Deconvolution, Accuracy)
161161

162162
INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
163163
/*in channels, out channels, group*/
164-
Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1)),
164+
Values(Vec3i(6, 4, 1), Vec3i(6, 9, 3)),
165165
/*in size*/ Values(Size(5, 6)),
166166
/*kernel*/ Values(Size(3, 1), Size(1, 3)),
167167
/*pad*/ Values(Size(1, 0), Size(0, 1)),

0 commit comments

Comments
 (0)