Dilated convolution import from TensorFlow

dkurt · dkurt · commit 58b890b9f765 · 2017-09-13T18:44:14.000+03:00
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -81,7 +81,7 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
 
         Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
         getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
-                kernel, stride, padMode, pad);
+                kernel, stride, padMode, dilation, pad);
     }
 
     bool hasBias() const
@@ -183,7 +183,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
         }
         else
         {
-            getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, out);
+            getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out);
         }
 
         int ngroups = inpCn / blobs[0].size[1];
diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp
@@ -167,12 +167,12 @@ void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &ke
 // we pad more on the right and bottom than on the top and left.
 void getConvPoolOutParams(const Size& inp, const Size &kernel,
                           const Size &stride, const String &padMode,
-                          Size& out)
+                          const Size &dilation, Size& out)
 {
     if (padMode == "VALID")
     {
-        out.height = (inp.height - kernel.height + stride.height) / stride.height;
-        out.width = (inp.width- kernel.width + stride.width) / stride.width;
+        out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height;
+        out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width;
     }
     else if (padMode == "SAME")
     {
@@ -187,16 +187,16 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
 
 void getConvPoolPaddings(const Size& inp, const Size& out,
                          const Size &kernel, const Size &stride,
-                         const String &padMode, Size &pad)
+                         const String &padMode, const Size &dilation, Size &pad)
 {
     if (padMode == "VALID")
     {
         pad = cv::Size(0,0);
     }
     else if (padMode == "SAME")
     {
-        int Ph = std::max(0, (out.height - 1) * stride.height + kernel.height - inp.height);
-        int Pw = std::max(0, (out.width - 1) * stride.width + kernel.width - inp.width);
+        int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height);
+        int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
         // For odd values of total padding, add more padding at the 'right'
         // side of the given dimension.
         pad = cv::Size(Pw / 2, Ph / 2);
diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp
@@ -64,11 +64,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
 
 void getConvPoolOutParams(const Size& inp, const Size &kernel,
                           const Size &stride, const String &padMode,
-                          Size& out);
+                          const Size &dilation, Size& out);
 
 void getConvPoolPaddings(const Size& inp, const Size& out,
                          const Size &kernel, const Size &stride,
-                         const String &padMode, Size &pad);
+                         const String &padMode, const Size &dilation, Size &pad);
 
 }
 }
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
@@ -93,7 +93,7 @@ class PoolingLayerImpl : public PoolingLayer
             kernel = inp;
         }
 
-        getConvPoolPaddings(inp, out, kernel, stride, padMode, pad);
+        getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
     }
 
     virtual bool supportBackend(int backendId)
@@ -592,8 +592,7 @@ class PoolingLayerImpl : public PoolingLayer
         }
         else
         {
-            getConvPoolOutParams(in, kernel, stride,
-                                 padMode, out);
+            getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
         }
 
         outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -88,6 +88,8 @@ static Mat getTensorContent(const tensorflow::TensorProto &tensor)
             return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
         case tensorflow::DT_DOUBLE:
             return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
+        case tensorflow::DT_INT32:
+            return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone();
         case tensorflow::DT_HALF:
         {
             Mat halfs;
@@ -563,16 +565,46 @@ void TFImporter::populateNet(Net dstNet)
 
     for (int li = 0; li < layersSize; li++)
     {
-        const tensorflow::NodeDef &layer = net.node(li);
+        tensorflow::NodeDef layer = net.node(li);
         String name = layer.name();
         String type = layer.op();
         LayerParams layerParams;
 
         if(layers_to_ignore.find(li) != layers_to_ignore.end())
             continue;
 
-        if (type == "Conv2D")
+        if (type == "Conv2D" || type == "SpaceToBatchND")
         {
+            // The first node of dilated convolution subgraph.
+            // Extract input node, dilation rate and paddings.
+            std::string input = layer.input(0);
+            if (type == "SpaceToBatchND")
+            {
+                // op: "SpaceToBatchND"
+                // input: "input"
+                // input: "SpaceToBatchND/block_shape"
+                // input: "SpaceToBatchND/paddings"
+                CV_Assert(layer.input_size() == 3);
+
+                DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
+                CV_Assert(dilation.size() == 2 && dilation.get<int>(0) == dilation.get<int>(1));
+                layerParams.set("dilation", dilation.get<int>(0));
+
+                Mat paddings;
+                parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
+
+                // paddings is a 2x2 matrix: [[top, bot], [left, right]]
+                layerParams.set("pad_h", paddings.at<float>(0));
+                layerParams.set("pad_w", paddings.at<float>(2));
+
+                StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
+                CV_Assert(next_layers.size() == 1);
+                layer = net.node(next_layers[0].second);
+                layers_to_ignore[next_layers[0].second] = next_layers[0].first;
+                name = layer.name();
+                type = layer.op();
+            }
+
             layerParams.set("bias_term", false);
             layerParams.blobs.resize(1);
 
@@ -597,11 +629,21 @@ void TFImporter::populateNet(Net dstNet)
             setStrides(layerParams, layer);
             setPadding(layerParams, layer);
 
+            // The final node of dilated convolution subgraph.
+            next_layers = getNextLayers(net, name, "BatchToSpaceND");
+            if (!next_layers.empty())
+            {
+                layerParams.set("pad_mode", "");  // We use padding values.
+                CV_Assert(next_layers.size() == 1);
+                ExcludeLayer(net, next_layers[0].second, 0, false);
+                layers_to_ignore[next_layers[0].second] = next_layers[0].first;
+            }
+
             int id = dstNet.addLayer(name, "Convolution", layerParams);
             layer_id[name] = id;
 
             // one input only
-            connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
+            connect(layer_id, dstNet, parsePin(input), id, 0);
         }
         else if (type == "BiasAdd" || type == "Add")
         {
diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp
@@ -96,6 +96,8 @@ static void runTensorFlowNet(const std::string& prefix,
 TEST(Test_TensorFlow, single_conv)
 {
     runTensorFlowNet("single_conv");
+    runTensorFlowNet("atrous_conv2d_valid");
+    runTensorFlowNet("atrous_conv2d_same");
 }
 
 TEST(Test_TensorFlow, padding)

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer`
`81`	`81`
`82`	`82`	`Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);`
`83`	`83`	`getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,`
`84`		`- kernel, stride, padMode, pad);`
	`84`	`+ kernel, stride, padMode, dilation, pad);`
`85`	`85`	`}`
`86`	`86`
`87`	`87`	`bool hasBias() const`
`@@ -183,7 +183,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl`
`183`	`183`	`}`
`184`	`184`	`else`
`185`	`185`	`{`
`186`		`- getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, out);`
	`186`	`+ getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out);`
`187`	`187`	`}`
`188`	`188`
`189`	`189`	`int ngroups = inpCn / blobs[0].size[1];`
Original file line number	Diff line number	Diff line change
`@@ -167,12 +167,12 @@ void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &ke`
`167`	`167`	`// we pad more on the right and bottom than on the top and left.`
`168`	`168`	`void getConvPoolOutParams(const Size& inp, const Size &kernel,`
`169`	`169`	`const Size &stride, const String &padMode,`
`170`		`- Size& out)`
	`170`	`+ const Size &dilation, Size& out)`
`171`	`171`	`{`
`172`	`172`	`if (padMode == "VALID")`
`173`	`173`	`{`
`174`		`- out.height = (inp.height - kernel.height + stride.height) / stride.height;`
`175`		`- out.width = (inp.width- kernel.width + stride.width) / stride.width;`
	`174`	`+ out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height;`
	`175`	`+ out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width;`
`176`	`176`	`}`
`177`	`177`	`else if (padMode == "SAME")`
`178`	`178`	`{`
`@@ -187,16 +187,16 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,`
`187`	`187`
`188`	`188`	`void getConvPoolPaddings(const Size& inp, const Size& out,`
`189`	`189`	`const Size &kernel, const Size &stride,`
`190`		`- const String &padMode, Size &pad)`
	`190`	`+ const String &padMode, const Size &dilation, Size &pad)`
`191`	`191`	`{`
`192`	`192`	`if (padMode == "VALID")`
`193`	`193`	`{`
`194`	`194`	`pad = cv::Size(0,0);`
`195`	`195`	`}`
`196`	`196`	`else if (padMode == "SAME")`
`197`	`197`	`{`
`198`		`- int Ph = std::max(0, (out.height - 1) * stride.height + kernel.height - inp.height);`
`199`		`- int Pw = std::max(0, (out.width - 1) * stride.width + kernel.width - inp.width);`
	`198`	`+ int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height);`
	`199`	`+ int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);`
`200`	`200`	`// For odd values of total padding, add more padding at the 'right'`
`201`	`201`	`// side of the given dimension.`
`202`	`202`	`pad = cv::Size(Pw / 2, Ph / 2);`
Original file line number	Diff line number	Diff line change
`@@ -64,11 +64,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel`
`64`	`64`
`65`	`65`	`void getConvPoolOutParams(const Size& inp, const Size &kernel,`
`66`	`66`	`const Size &stride, const String &padMode,`
`67`		`- Size& out);`
	`67`	`+ const Size &dilation, Size& out);`
`68`	`68`
`69`	`69`	`void getConvPoolPaddings(const Size& inp, const Size& out,`
`70`	`70`	`const Size &kernel, const Size &stride,`
`71`		`- const String &padMode, Size &pad);`
	`71`	`+ const String &padMode, const Size &dilation, Size &pad);`
`72`	`72`
`73`	`73`	`}`
`74`	`74`	`}`
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ class PoolingLayerImpl : public PoolingLayer`
`93`	`93`	`kernel = inp;`
`94`	`94`	`}`
`95`	`95`
`96`		`- getConvPoolPaddings(inp, out, kernel, stride, padMode, pad);`
	`96`	`+ getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);`
`97`	`97`	`}`
`98`	`98`
`99`	`99`	`virtual bool supportBackend(int backendId)`
`@@ -592,8 +592,7 @@ class PoolingLayerImpl : public PoolingLayer`
`592`	`592`	`}`
`593`	`593`	`else`
`594`	`594`	`{`
`595`		`- getConvPoolOutParams(in, kernel, stride,`
`596`		`- padMode, out);`
	`595`	`+ getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);`
`597`	`596`	`}`
`598`	`597`
`599`	`598`	`outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());`
Original file line number	Diff line number	Diff line change
`@@ -96,6 +96,8 @@ static void runTensorFlowNet(const std::string& prefix,`
`96`	`96`	`TEST(Test_TensorFlow, single_conv)`
`97`	`97`	`{`
`98`	`98`	`runTensorFlowNet("single_conv");`
	`99`	`+ runTensorFlowNet("atrous_conv2d_valid");`
	`100`	`+ runTensorFlowNet("atrous_conv2d_same");`
`99`	`101`	`}`
`100`	`102`
`101`	`103`	`TEST(Test_TensorFlow, padding)`