Skip to content

Commit 58b890b

Browse files
committed
Dilated convolution import from TensorFlow
1 parent 9640bbe commit 58b890b

File tree

6 files changed

+59
-16
lines changed

6 files changed

+59
-16
lines changed

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class BaseConvolutionLayerImpl : public ConvolutionLayer
8181

8282
Size outSize = Size(outputs[0].size[3], outputs[0].size[2]);
8383
getConvPoolPaddings(Size(input.size[3], input.size[2]), outSize,
84-
kernel, stride, padMode, pad);
84+
kernel, stride, padMode, dilation, pad);
8585
}
8686

8787
bool hasBias() const
@@ -183,7 +183,7 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
183183
}
184184
else
185185
{
186-
getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, out);
186+
getConvPoolOutParams(Size(inpW, inpH), kernel, stride, padMode, dilation, out);
187187
}
188188

189189
int ngroups = inpCn / blobs[0].size[1];

modules/dnn/src/layers/layers_common.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,12 +167,12 @@ void getConvolutionKernelParams(const LayerParams &params, int &kernelH, int &ke
167167
// we pad more on the right and bottom than on the top and left.
168168
void getConvPoolOutParams(const Size& inp, const Size &kernel,
169169
const Size &stride, const String &padMode,
170-
Size& out)
170+
const Size &dilation, Size& out)
171171
{
172172
if (padMode == "VALID")
173173
{
174-
out.height = (inp.height - kernel.height + stride.height) / stride.height;
175-
out.width = (inp.width- kernel.width + stride.width) / stride.width;
174+
out.height = (inp.height - (dilation.height * (kernel.height - 1) + 1) + stride.height) / stride.height;
175+
out.width = (inp.width - (dilation.width * (kernel.width - 1) + 1) + stride.width) / stride.width;
176176
}
177177
else if (padMode == "SAME")
178178
{
@@ -187,16 +187,16 @@ void getConvPoolOutParams(const Size& inp, const Size &kernel,
187187

188188
void getConvPoolPaddings(const Size& inp, const Size& out,
189189
const Size &kernel, const Size &stride,
190-
const String &padMode, Size &pad)
190+
const String &padMode, const Size &dilation, Size &pad)
191191
{
192192
if (padMode == "VALID")
193193
{
194194
pad = cv::Size(0,0);
195195
}
196196
else if (padMode == "SAME")
197197
{
198-
int Ph = std::max(0, (out.height - 1) * stride.height + kernel.height - inp.height);
199-
int Pw = std::max(0, (out.width - 1) * stride.width + kernel.width - inp.width);
198+
int Ph = std::max(0, (out.height - 1) * stride.height + (dilation.height * (kernel.height - 1) + 1) - inp.height);
199+
int Pw = std::max(0, (out.width - 1) * stride.width + (dilation.width * (kernel.width - 1) + 1) - inp.width);
200200
// For odd values of total padding, add more padding at the 'right'
201201
// side of the given dimension.
202202
pad = cv::Size(Pw / 2, Ph / 2);

modules/dnn/src/layers/layers_common.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ void getPoolingKernelParams(const LayerParams &params, int &kernelH, int &kernel
6464

6565
void getConvPoolOutParams(const Size& inp, const Size &kernel,
6666
const Size &stride, const String &padMode,
67-
Size& out);
67+
const Size &dilation, Size& out);
6868

6969
void getConvPoolPaddings(const Size& inp, const Size& out,
7070
const Size &kernel, const Size &stride,
71-
const String &padMode, Size &pad);
71+
const String &padMode, const Size &dilation, Size &pad);
7272

7373
}
7474
}

modules/dnn/src/layers/pooling_layer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class PoolingLayerImpl : public PoolingLayer
9393
kernel = inp;
9494
}
9595

96-
getConvPoolPaddings(inp, out, kernel, stride, padMode, pad);
96+
getConvPoolPaddings(inp, out, kernel, stride, padMode, Size(1, 1), pad);
9797
}
9898

9999
virtual bool supportBackend(int backendId)
@@ -592,8 +592,7 @@ class PoolingLayerImpl : public PoolingLayer
592592
}
593593
else
594594
{
595-
getConvPoolOutParams(in, kernel, stride,
596-
padMode, out);
595+
getConvPoolOutParams(in, kernel, stride, padMode, Size(1, 1), out);
597596
}
598597

599598
outputs.resize(type == MAX ? 2 * inputs.size() : inputs.size());

modules/dnn/src/tensorflow/tf_importer.cpp

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ static Mat getTensorContent(const tensorflow::TensorProto &tensor)
8888
return Mat(1, content.size() / sizeof(float), CV_32FC1, (void*)content.c_str()).clone();
8989
case tensorflow::DT_DOUBLE:
9090
return Mat(1, content.size() / sizeof(double), CV_64FC1, (void*)content.c_str()).clone();
91+
case tensorflow::DT_INT32:
92+
return Mat(1, content.size() / sizeof(int32_t), CV_32SC1, (void*)content.c_str()).clone();
9193
case tensorflow::DT_HALF:
9294
{
9395
Mat halfs;
@@ -563,16 +565,46 @@ void TFImporter::populateNet(Net dstNet)
563565

564566
for (int li = 0; li < layersSize; li++)
565567
{
566-
const tensorflow::NodeDef &layer = net.node(li);
568+
tensorflow::NodeDef layer = net.node(li);
567569
String name = layer.name();
568570
String type = layer.op();
569571
LayerParams layerParams;
570572

571573
if(layers_to_ignore.find(li) != layers_to_ignore.end())
572574
continue;
573575

574-
if (type == "Conv2D")
576+
if (type == "Conv2D" || type == "SpaceToBatchND")
575577
{
578+
// The first node of dilated convolution subgraph.
579+
// Extract input node, dilation rate and paddings.
580+
std::string input = layer.input(0);
581+
if (type == "SpaceToBatchND")
582+
{
583+
// op: "SpaceToBatchND"
584+
// input: "input"
585+
// input: "SpaceToBatchND/block_shape"
586+
// input: "SpaceToBatchND/paddings"
587+
CV_Assert(layer.input_size() == 3);
588+
589+
DictValue dilation = parseDims(getConstBlob(layer, value_id, 1));
590+
CV_Assert(dilation.size() == 2 && dilation.get<int>(0) == dilation.get<int>(1));
591+
layerParams.set("dilation", dilation.get<int>(0));
592+
593+
Mat paddings;
594+
parseTensor<int>(getConstBlob(layer, value_id, 2), paddings);
595+
596+
// paddings is a 2x2 matrix: [[top, bot], [left, right]]
597+
layerParams.set("pad_h", paddings.at<float>(0));
598+
layerParams.set("pad_w", paddings.at<float>(2));
599+
600+
StrIntVector next_layers = getNextLayers(net, name, "Conv2D");
601+
CV_Assert(next_layers.size() == 1);
602+
layer = net.node(next_layers[0].second);
603+
layers_to_ignore[next_layers[0].second] = next_layers[0].first;
604+
name = layer.name();
605+
type = layer.op();
606+
}
607+
576608
layerParams.set("bias_term", false);
577609
layerParams.blobs.resize(1);
578610

@@ -597,11 +629,21 @@ void TFImporter::populateNet(Net dstNet)
597629
setStrides(layerParams, layer);
598630
setPadding(layerParams, layer);
599631

632+
// The final node of dilated convolution subgraph.
633+
next_layers = getNextLayers(net, name, "BatchToSpaceND");
634+
if (!next_layers.empty())
635+
{
636+
layerParams.set("pad_mode", ""); // We use padding values.
637+
CV_Assert(next_layers.size() == 1);
638+
ExcludeLayer(net, next_layers[0].second, 0, false);
639+
layers_to_ignore[next_layers[0].second] = next_layers[0].first;
640+
}
641+
600642
int id = dstNet.addLayer(name, "Convolution", layerParams);
601643
layer_id[name] = id;
602644

603645
// one input only
604-
connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0);
646+
connect(layer_id, dstNet, parsePin(input), id, 0);
605647
}
606648
else if (type == "BiasAdd" || type == "Add")
607649
{

modules/dnn/test/test_tf_importer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ static void runTensorFlowNet(const std::string& prefix,
9696
TEST(Test_TensorFlow, single_conv)
9797
{
9898
runTensorFlowNet("single_conv");
99+
runTensorFlowNet("atrous_conv2d_valid");
100+
runTensorFlowNet("atrous_conv2d_same");
99101
}
100102

101103
TEST(Test_TensorFlow, padding)

0 commit comments

Comments
 (0)