Merge pull request opencv#9734 from dkurt:fix_deconv_layer_kernel_layout

vpisarev · vpisarev · commit 68cc2e292d22 · 2017-09-28T11:42:57.000Z
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -199,6 +199,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     public:
         Size kernel, stride, pad, dilation, adjustPad;
         String padMode;
+        int numOutput;
     };
 
     class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -252,24 +252,13 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
         }
 
         Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
-
+        Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
+        Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
         Halide::Expr kc = r.z;
-        if (group > 1)
+        for (int i = 1; i < group; ++i)
         {
-            int outCnBound = outGroupCn;
-            int inpChBound = inpGroupCn;
-            Halide::Expr shift = select(c < outCnBound, 0, inpChBound);
-            for (int i = 2; i < group; ++i)
-            {
-                outCnBound += outGroupCn;
-                inpChBound += inpGroupCn;
-                shift = select(c < outCnBound, shift, inpChBound);
-            }
-            kc += shift;
+            kc = select(c < outGroupCn * i, kc, inpGroupCn * i + r.z);
         }
-
-        Halide::Expr kx = x * stride.width - pad.width + r.x * dilation.width;
-        Halide::Expr ky = y * stride.height - pad.height + r.y * dilation.height;
         Halide::Expr topExpr = sum(padded_input(kx, ky, kc, n) *
                                    weights(r.x, r.y, r.z, c));
         if (hasBias())
@@ -278,7 +267,6 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
             topExpr += bias(c);
         }
         top(x, y, c, n) = topExpr;
-        Ptr<BackendNode> pp(new HalideBackendNode({ padded_input, top }));
         return Ptr<BackendNode>(new HalideBackendNode({ padded_input, top }));
 #endif  // HAVE_HALIDE
         return Ptr<BackendNode>();
@@ -793,7 +781,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
         int inpH = inpShape[2];
         int inpW = inpShape[3];
         int outCn = outShape[1];
-        int ngroups = inpCn / blobs[0].size[1];
+        int ngroups = inpCn / blobs[0].size[0];
         int outGroupCn = outCn / ngroups;
         int ksize = outGroupCn * kernel.height * kernel.width;
         return shape(ksize, inpH * inpW);
@@ -804,7 +792,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
-        CV_Assert(!hasBias() || blobs[1].total() == (size_t)blobs[0].size[0]);
+        CV_Assert(!hasBias() || blobs[1].total() == (size_t)numOutput);
         CV_Assert(inputs.size() != 0);
 
         int inpCn = inputs[0][1];
@@ -813,12 +801,13 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
 
         int outH = stride.height * (inpH - 1) + kernel.height - 2 * pad.height + adjustPad.height;
         int outW = stride.width * (inpW - 1) + kernel.width - 2 * pad.width + adjustPad.width;
-        int outCn = blobs[0].size[0];
+        int outCn = numOutput;
 
-        int ngroups = inpCn / blobs[0].size[1];
+        CV_Assert(outCn % blobs[0].size[1] == 0);
+        int ngroups = outCn / blobs[0].size[1];
 
         CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);
-        CV_Assert(blobs[0].size[0] == outCn && blobs[0].size[1] == inpCn / ngroups);
+        CV_Assert(blobs[0].size[0] == inpCn);
 
         int dims[] = {inputs[0][0], outCn, outH, outW};
         outputs.resize(inputs.size(), shape(dims));
@@ -1073,7 +1062,7 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        int outCn = blobs[0].size[0];
+        int outCn = numOutput;
         int inpCn = inputs[0]->size[1];
         bool is1x1flag = is1x1();
         int nstripes = getNumThreads();
@@ -1086,9 +1075,9 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
 
         for (size_t ii = 0; ii < outputs.size(); ii++)
         {
-            int ngroups = inpCn / blobs[0].size[1];
-            int inpGroupCn = blobs[0].size[1];
-            int outGroupCn = outCn / ngroups;
+            int ngroups = outCn / blobs[0].size[1];
+            int inpGroupCn = inpCn / ngroups;
+            int outGroupCn = blobs[0].size[1];
             const Mat& inp = *inputs[ii];
             Mat& out = outputs[ii];
             int numImg = inp.size[0];
@@ -1126,18 +1115,16 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
 #ifdef HAVE_HALIDE
         Halide::Buffer<float> inputBuffer = halideBuffer(inputs[0]);
 
-        int inW, inH, inC, inN, outC = blobs[0].size[0];
+        int inW, inH, inC, inN;
         getCanonicalSize(inputBuffer, &inW, &inH, &inC, &inN);
-
-        if (inC / blobs[0].size[1] != 1)
-            CV_Error(cv::Error::StsNotImplemented,
-                     "Halide backend for Deconvolution with group > 1 is not implemented");
+        const int outGroupCn = blobs[0].size[1];
+        const int group = numOutput / outGroupCn;
+        const int inpGroupCn = blobs[0].size[0] / group;
 
         Halide::Var x("x"), y("y"), c("c"), n("n");
         Halide::Func top = (name.empty() ? Halide::Func() : Halide::Func(name));
         Halide::Func padded_input(name + "_constant_exterior");
-        auto weights = wrapToHalideBuffer(blobs[0], {kernel.width,
-                                                     kernel.height, outC, inC});
+        auto weights = wrapToHalideBuffer(blobs[0]);
 
         Halide::Func dilated_input("dilated_input");
         dilated_input(x, y, c, n) = 0.0f;
@@ -1153,13 +1140,21 @@ class DeConvolutionLayerImpl : public BaseConvolutionLayerImpl
                                                           0, inC, 0, inN);
         padded_input(x, y, c, n) = bounded(x, y, c, n);
 
-        Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inC);
-        Halide::Expr topExpr = sum(
-            padded_input(x + pad.width - r.x, y + pad.height - r.y, r.z, n) *
-            weights(r.x, r.y, c, r.z));
+        Halide::RDom r(0, kernel.width, 0, kernel.height, 0, inpGroupCn);
+        Halide::Expr kx = x + pad.width - r.x;
+        Halide::Expr ky = y + pad.height - r.y;
+        Halide::Expr kInC = r.z;
+        Halide::Expr kOutC = c;
+        for (int i = 1; i < group; ++i)
+        {
+            kInC = select(c < outGroupCn * i, kInC, inpGroupCn * i + r.z);
+            kOutC = select(c < outGroupCn * i, kOutC, c - outGroupCn * i);
+        }
+        Halide::Expr topExpr = sum(padded_input(kx, ky, kInC, n) *
+                                   weights(r.x, r.y, kOutC, kInC));
         if (hasBias())
         {
-            auto bias = wrapToHalideBuffer(blobs[1], {outC});
+            auto bias = wrapToHalideBuffer(blobs[1], {numOutput});
             topExpr += bias(c);
         }
         top(x, y, c, n) = topExpr;
@@ -1194,13 +1189,13 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
                                l->dilation.width, l->padMode);
 
     bool bias = params.get<bool>("bias_term", true);
-    int numOutput = params.get<int>("num_output");
+    l->numOutput = params.get<int>("num_output");
     int ngroups = params.get<int>("group", 1);
 
     l->adjustPad.height = params.get<int>("adj_h", 0);
     l->adjustPad.width = params.get<int>("adj_w", 0);
 
-    CV_Assert(numOutput % ngroups == 0);
+    CV_Assert(l->numOutput % ngroups == 0);
     CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
     CV_Assert(l->adjustPad.width < l->stride.width &&
               l->adjustPad.height < l->stride.height);
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -1015,13 +1015,11 @@ void TFImporter::populateNet(Net dstNet)
             }
 
             kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]);
-            // Swap just numbers of input and output channels.
-            std::swap(layerParams.blobs[0].size[0], layerParams.blobs[0].size[1]);
 
             const int* kshape = layerParams.blobs[0].size.p;
             layerParams.set("kernel_h", kshape[2]);
             layerParams.set("kernel_w", kshape[3]);
-            layerParams.set("num_output", kshape[0]);
+            layerParams.set("num_output", kshape[1]);
 
             setStrides(layerParams, layer);
             setPadding(layerParams, layer);
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
@@ -798,10 +798,7 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("adj_h", static_cast<int>(scalarParams.get<double>("adjH")));
                 layerParams.set("num_output", static_cast<int>(scalarParams.get<double>("nOutputPlane")));
 
-                Mat weights = tensorParams["weight"].second;
-                CV_Assert(weights.dims == 4);
-                int reorderedShape[] = { weights.size[1], weights.size[0], weights.size[2], weights.size[3] };
-                layerParams.blobs.push_back(weights.reshape(1, 4, reorderedShape));
+                layerParams.blobs.push_back(tensorParams["weight"].second);
 
                 bool bias = tensorParams.count("bias");
                 layerParams.set("bias_term", bias);
diff --git a/modules/dnn/test/test_halide_layers.cpp b/modules/dnn/test/test_halide_layers.cpp
@@ -129,7 +129,7 @@ TEST_P(Deconvolution, Accuracy)
     Size adjPad = Size(get<5>(GetParam())[2], get<5>(GetParam())[3]);
     bool hasBias = get<6>(GetParam());
 
-    Mat weights({outChannels, inChannels / group, kernel.height, kernel.width}, CV_32F);
+    Mat weights({inChannels, outChannels / group, kernel.height, kernel.width}, CV_32F);
     randu(weights, -1.0f, 1.0f);
 
     LayerParams lp;
@@ -161,7 +161,7 @@ TEST_P(Deconvolution, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(Layer_Test_Halide, Deconvolution, Combine(
 /*in channels, out channels, group*/
-             Values(Vec3i(6, 4, 1), Vec3i(6, 9, 1)),
+             Values(Vec3i(6, 4, 1), Vec3i(6, 9, 3)),
 /*in size*/  Values(Size(5, 6)),
 /*kernel*/   Values(Size(3, 1), Size(1, 3)),
 /*pad*/      Values(Size(1, 0), Size(0, 1)),