Merge pull request opencv#9524 from dkurt:dnn_torch_openface

vpisarev · vpisarev · commit 41b23fde9f02 · 2017-09-15T12:38:12.000Z
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -245,6 +245,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         bool globalPooling;
         bool computeMaxIdx;
         String padMode;
+        bool ceilMode;
 
         static Ptr<PoolingLayer> create(const LayerParams& params);
     };
@@ -257,6 +258,14 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
         static Ptr<SoftmaxLayer> create(const LayerParams& params);
     };
 
+    class CV_EXPORTS LPNormalizeLayer : public Layer
+    {
+    public:
+        float pnorm, epsilon;
+
+        static Ptr<LPNormalizeLayer> create(const LayerParams& params);
+    };
+
     class CV_EXPORTS InnerProductLayer : public Layer
     {
     public:
@@ -294,6 +303,13 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     {
     public:
         int axis;
+        /**
+         * @brief Add zero padding in case of concatenation of blobs with different
+         * spatial sizes.
+         *
+         * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
+         */
+        bool padding;
 
         static Ptr<ConcatLayer> create(const LayerParams &params);
     };
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -1137,7 +1137,7 @@ struct Net::Impl
             // (and so we eliminate the concatenation layer, because the channels
             // are concatenated implicitly).
             Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();
-            if( !concatLayer.empty() && concatLayer->axis == 1 &&
+            if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&
                 ld.outputBlobs.size() == 1 )
             {
                 Mat& output = ld.outputBlobs[0];
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
@@ -91,6 +91,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(InnerProduct,   InnerProductLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Softmax,        SoftmaxLayer);
     CV_DNN_REGISTER_LAYER_CLASS(MVN,            MVNLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(LPNormalize,    LPNormalizeLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(ReLU,           ReLULayer);
     CV_DNN_REGISTER_LAYER_CLASS(ChannelsPReLU,  ChannelsPReLULayer);
diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp
@@ -56,6 +56,7 @@ class ConcatLayerImpl : public ConcatLayer
     {
         setParamsFrom(params);
         axis = params.get<int>("axis", 1);
+        padding = params.get<bool>("padding", false);
     }
 
     virtual bool getMemoryShapes(const std::vector<MatShape> &inputs,
@@ -64,34 +65,41 @@ class ConcatLayerImpl : public ConcatLayer
                                  std::vector<MatShape> &internals) const
     {
         CV_Assert(inputs.size() > 0);
-        outputs.clear();
-        outputs.push_back(inputs[0]);
+        outputs.resize(1, inputs[0]);
         int cAxis = clamp(axis, inputs[0]);
 
         int axisSum = 0;
         for (size_t i = 0; i < inputs.size(); i++)
         {
             MatShape curShape = inputs[i];
 
-            CV_Assert(curShape.size() == outputs.back().size());
-            for (int curAxis = 0; curAxis < outputs.back().size(); curAxis++)
+            if (padding)
             {
-                if (curAxis != cAxis && outputs.back()[curAxis] != curShape[curAxis])
-                    CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
+                for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
+                {
+                    outputs[0][curAxis] = std::max(outputs[0][curAxis], curShape[curAxis]);
+                }
+            }
+            else
+            {
+                CV_Assert(curShape.size() == outputs[0].size());
+                for (int curAxis = 0; curAxis < outputs[0].size(); curAxis++)
+                {
+                    if (curAxis != cAxis && outputs[0][curAxis] != curShape[curAxis])
+                        CV_Error(Error::StsBadSize, "Inconsitent shape for ConcatLayer");
+                }
             }
 
             axisSum += curShape[cAxis];
         }
-
-        outputs.back()[cAxis] = axisSum;
-
+        outputs[0][cAxis] = axisSum;
         return false;
     }
 
     virtual bool supportBackend(int backendId)
     {
         return backendId == DNN_BACKEND_DEFAULT ||
-               backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1;  // By channels
+               backendId == DNN_BACKEND_HALIDE && haveHalide() && axis == 1 && !padding;  // By channels
     }
 
     class ChannelConcatInvoker : public ParallelLoopBody
@@ -174,7 +182,10 @@ class ConcatLayerImpl : public ConcatLayer
         int cAxis = clamp(axis, inputs[0]->dims);
         Mat& outMat = outputs[0];
 
-        if( cAxis == 1 && outMat.dims == 4 )
+        if (padding)
+            outMat.setTo(0);
+
+        if( cAxis == 1 && outMat.dims == 4 && !padding)
         {
             int nstripes = getNumThreads();
             ChannelConcatInvoker::run(inputs, outMat, nstripes);
@@ -187,6 +198,12 @@ class ConcatLayerImpl : public ConcatLayer
             for (size_t i = 0; i < inputs.size(); i++)
             {
                 ranges[cAxis].end = ranges[cAxis].start + inputs[i]->size[cAxis];
+                for (int j = 0; j < outMat.dims; ++j)
+                {
+                    if (j == cAxis) continue;
+                    ranges[j].start = (outMat.size[j] - inputs[i]->size[j]) / 2;
+                    ranges[j].end = ranges[j].start + inputs[i]->size[j];
+                }
                 inputs[i]->copyTo(outMat(&ranges[0]));
                 ranges[cAxis].start = ranges[cAxis].end;
             }
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -187,7 +187,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
         }
 
         int ngroups = inpCn / blobs[0].size[1];
-        CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);
+        CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0);
 
         int dims[] = {inputs[0][0], outCn, out.height, out.width};
         outputs.resize(inputs.size(), shape(dims));
diff --git a/modules/dnn/src/layers/lp_normalize_layer.cpp b/modules/dnn/src/layers/lp_normalize_layer.cpp
@@ -0,0 +1,78 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "../precomp.hpp"
+#include "layers_common.hpp"
+#include <iostream>
+namespace cv { namespace dnn {
+
+class LPNormalizeLayerImpl : public LPNormalizeLayer
+{
+public:
+
+    LPNormalizeLayerImpl(const LayerParams& params)
+    {
+        setParamsFrom(params);
+        pnorm = params.get<float>("p", 2);
+        epsilon = params.get<float>("eps", 1e-10f);
+        CV_Assert(pnorm > 0);
+    }
+
+    bool getMemoryShapes(const std::vector<MatShape> &inputs,
+                         const int requiredOutputs,
+                         std::vector<MatShape> &outputs,
+                         std::vector<MatShape> &internals) const
+    {
+        Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
+        if (pnorm != 1 && pnorm != 2)
+        {
+            internals.resize(1, inputs[0]);
+        }
+        return true;
+    }
+
+    virtual bool supportBackend(int backendId)
+    {
+        return backendId == DNN_BACKEND_DEFAULT;
+    }
+
+    void forward(std::vector<Mat*> &inputs, std::vector<Mat> &outputs, std::vector<Mat> &internals)
+    {
+        CV_TRACE_FUNCTION();
+        CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+
+        CV_Assert(inputs[0]->total() == outputs[0].total());
+        float norm;
+        if (pnorm == 1)
+            norm = cv::norm(*inputs[0], NORM_L1);
+        else if (pnorm == 2)
+            norm = cv::norm(*inputs[0], NORM_L2);
+        else
+        {
+            pow(abs(*inputs[0]), pnorm, internals[0]);
+            norm = pow(sum(internals[0])[0], 1.0f / pnorm);
+        }
+        multiply(*inputs[0], 1.0f / (norm + epsilon), outputs[0]);
+    }
+
+    int64 getFLOPS(const std::vector<MatShape> &inputs,
+                  const std::vector<MatShape> &) const
+    {
+        int64 flops = 0;
+        for (int i = 0; i < inputs.size(); i++)
+            flops += 3 * total(inputs[i]);
+        return flops;
+    }
+};
+
+Ptr<LPNormalizeLayer> LPNormalizeLayer::create(const LayerParams& params)
+{
+    return Ptr<LPNormalizeLayer>(new LPNormalizeLayerImpl(params));
+}
+
+}  // namespace dnn
+}  // namespace cv
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
@@ -54,7 +54,6 @@ namespace cv
 namespace dnn
 {
 
-//TODO: add ceil_mode param
 class PoolingLayerImpl : public PoolingLayer
 {
 public:
@@ -79,6 +78,7 @@ class PoolingLayerImpl : public PoolingLayer
         getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
                                pad.height, pad.width, stride.height, stride.width, padMode);
         setParamsFrom(params);
+        ceilMode = params.get<bool>("ceil_mode", true);
     }
 
     void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)
@@ -572,11 +572,10 @@ class PoolingLayerImpl : public PoolingLayer
         }
         else if (padMode.empty())
         {
-            //Yeah, something strange Caffe scheme-)
-            out.height = static_cast<int>(ceil(static_cast<float>(in.height + 2 * pad.height -
-                                                                  kernel.height) / stride.height)) + 1;
-            out.width = static_cast<int>(ceil(static_cast<float>(in.width + 2 * pad.width -
-                                                                 kernel.width) / stride.width)) + 1;
+            float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;
+            float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width;
+            out.height = 1 + (ceilMode ? ceil(height) : floor(height));
+            out.width = 1 + (ceilMode ? ceil(width) : floor(width));
 
             if (pad.height || pad.width)
             {
diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp
@@ -75,12 +75,21 @@ static void computeShapeByReshapeMask(const MatShape &srcShape,
     if (explicitMask)
     {
         int maskTotal = total(maskShape);
-        for (int i = srcRange.start + 1; i < srcRange.end; ++i)
+        // Go from the end of mask until we collect required total.
+        bool matched = false;
+        for (int i = srcRange.end - 1; i >= srcRange.start; --i)
         {
-            if (total(srcShape, i, srcRange.end) != maskTotal)
+            if (matched)
             {
-                srcRange.start = i - 1;
-                break;
+                if (i == 0 || total(srcShape, i, srcRange.end) != maskTotal)
+                {
+                    srcRange.start = i + 1;
+                    break;
+                }
+            }
+            else
+            {
+                matched = total(srcShape, i, srcRange.end) == maskTotal;
             }
         }
         CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp

Original file line number	Diff line number	Diff line change
`@@ -1137,7 +1137,7 @@ struct Net::Impl`
`1137`	`1137`	`// (and so we eliminate the concatenation layer, because the channels`
`1138`	`1138`	`// are concatenated implicitly).`
`1139`	`1139`	`Ptr<ConcatLayer> concatLayer = ld.layerInstance.dynamicCast<ConcatLayer>();`
`1140`		`- if( !concatLayer.empty() && concatLayer->axis == 1 &&`
	`1140`	`+ if( !concatLayer.empty() && concatLayer->axis == 1 && !concatLayer->padding &&`
`1141`	`1141`	`ld.outputBlobs.size() == 1 )`
`1142`	`1142`	`{`
`1143`	`1143`	`Mat& output = ld.outputBlobs[0];`
Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl`
`187`	`187`	`}`
`188`	`188`
`189`	`189`	`int ngroups = inpCn / blobs[0].size[1];`
`190`		`- CV_Assert(inpCn % ngroups == 0 && outCn % ngroups == 0);`
	`190`	`+ CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0);`
`191`	`191`
`192`	`192`	`int dims[] = {inputs[0][0], outCn, out.height, out.width};`
`193`	`193`	`outputs.resize(inputs.size(), shape(dims));`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,6 @@ namespace cv`
`54`	`54`	`namespace dnn`
`55`	`55`	`{`
`56`	`56`
`57`		`-//TODO: add ceil_mode param`
`58`	`57`	`class PoolingLayerImpl : public PoolingLayer`
`59`	`58`	`{`
`60`	`59`	`public:`
`@@ -79,6 +78,7 @@ class PoolingLayerImpl : public PoolingLayer`
`79`	`78`	`getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,`
`80`	`79`	`pad.height, pad.width, stride.height, stride.width, padMode);`
`81`	`80`	`setParamsFrom(params);`
	`81`	`+ ceilMode = params.get<bool>("ceil_mode", true);`
`82`	`82`	`}`
`83`	`83`
`84`	`84`	`void finalize(const std::vector<Mat*> &inputs, std::vector<Mat> &outputs)`
`@@ -572,11 +572,10 @@ class PoolingLayerImpl : public PoolingLayer`
`572`	`572`	`}`
`573`	`573`	`else if (padMode.empty())`
`574`	`574`	`{`
`575`		`- //Yeah, something strange Caffe scheme-)`
`576`		`- out.height = static_cast<int>(ceil(static_cast<float>(in.height + 2 * pad.height -`
`577`		`- kernel.height) / stride.height)) + 1;`
`578`		`- out.width = static_cast<int>(ceil(static_cast<float>(in.width + 2 * pad.width -`
`579`		`- kernel.width) / stride.width)) + 1;`
	`575`	`+ float height = (float)(in.height + 2 * pad.height - kernel.height) / stride.height;`
	`576`	`+ float width = (float)(in.width + 2 * pad.width - kernel.width) / stride.width;`
	`577`	`+ out.height = 1 + (ceilMode ? ceil(height) : floor(height));`
	`578`	`+ out.width = 1 + (ceilMode ? ceil(width) : floor(width));`
`580`	`579`
`581`	`580`	`if (pad.height \|\| pad.width)`
`582`	`581`	`{`
Original file line number	Diff line number	Diff line change
`@@ -75,12 +75,21 @@ static void computeShapeByReshapeMask(const MatShape &srcShape,`
`75`	`75`	`if (explicitMask)`
`76`	`76`	`{`
`77`	`77`	`int maskTotal = total(maskShape);`
`78`		`- for (int i = srcRange.start + 1; i < srcRange.end; ++i)`
	`78`	`+ // Go from the end of mask until we collect required total.`
	`79`	`+ bool matched = false;`
	`80`	`+ for (int i = srcRange.end - 1; i >= srcRange.start; --i)`
`79`	`81`	`{`
`80`		`- if (total(srcShape, i, srcRange.end) != maskTotal)`
	`82`	`+ if (matched)`
`81`	`83`	`{`
`82`		`- srcRange.start = i - 1;`
`83`		`- break;`
	`84`	`+ if (i == 0 \|\| total(srcShape, i, srcRange.end) != maskTotal)`
	`85`	`+ {`
	`86`	`+ srcRange.start = i + 1;`
	`87`	`+ break;`
	`88`	`+ }`
	`89`	`+ }`
	`90`	`+ else`
	`91`	`+ {`
	`92`	`+ matched = total(srcShape, i, srcRange.end) == maskTotal;`
`84`	`93`	`}`
`85`	`94`	`}`
`86`	`95`	`CV_Assert(total(srcShape, srcRange.start, srcRange.end) == maskTotal);`