Skip to content

Commit ff037eb

Browse files
committed
Merge pull request opencv#9845 from dkurt:fast_neural_style_models
2 parents 91c0b13 + 4b52b8d commit ff037eb

File tree

7 files changed

+218
-26
lines changed

7 files changed

+218
-26
lines changed

modules/dnn/include/opencv2/dnn/all_layers.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
377377
* starting from the first one. The rest of dimensions won't
378378
* be padded.
379379
* @param value Value to be padded. Defaults to zero.
380+
* @param type Padding type: 'constant', 'reflect'
380381
* @param input_dims Torch's parameter. If @p input_dims is not equal to the
381382
* actual input dimensionality then the `[0]th` dimension
382383
* is considered as a batch dimension and @p paddings are shifted

modules/dnn/include/opencv2/dnn/shape_utils.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,12 @@ static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const
112112
static inline Mat getPlane(const Mat &m, int n, int cn)
113113
{
114114
CV_Assert(m.dims > 2);
115-
Range range[CV_MAX_DIM];
116115
int sz[CV_MAX_DIM];
117116
for(int i = 2; i < m.dims; i++)
118117
{
119118
sz[i-2] = m.size.p[i];
120-
range[i] = Range::all();
121119
}
122-
range[0] = Range(n, n+1);
123-
range[1] = Range(cn, cn+1);
124-
return m(range).reshape(1, m.dims-2, sz);
120+
return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
125121
}
126122

127123
static inline MatShape shape(const int* dims, const int n = 4)
@@ -191,6 +187,14 @@ inline int clamp(int ax, const MatShape& shape)
191187
return clamp(ax, (int)shape.size());
192188
}
193189

190+
inline Range clamp(const Range& r, int axisSize)
191+
{
192+
Range clamped(std::max(r.start, 0),
193+
r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
194+
CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);
195+
return clamped;
196+
}
197+
194198
CV__DNN_EXPERIMENTAL_NS_END
195199
}
196200
}

modules/dnn/src/layers/padding_layer.cpp

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
1010
*/
1111

1212
#include "../precomp.hpp"
13+
#include "layers_common.hpp"
1314
#include "op_halide.hpp"
1415
#include <vector>
1516

@@ -26,6 +27,7 @@ class PaddingLayerImpl : public PaddingLayer
2627
setParamsFrom(params);
2728
paddingValue = params.get<float>("value", 0);
2829
inputDims = params.get<int>("input_dims", -1);
30+
paddingType = params.get<String>("type", "constant");
2931

3032
CV_Assert(params.has("paddings"));
3133
const DictValue& paddingsParam = params.get("paddings");
@@ -94,8 +96,45 @@ class PaddingLayerImpl : public PaddingLayer
9496
CV_TRACE_FUNCTION();
9597
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
9698

97-
outputs[0].setTo(paddingValue);
98-
inputs[0]->copyTo(outputs[0](dstRanges));
99+
if (paddingType == "constant")
100+
{
101+
outputs[0].setTo(paddingValue);
102+
inputs[0]->copyTo(outputs[0](dstRanges));
103+
}
104+
else if (paddingType == "reflect")
105+
{
106+
CV_Assert(inputs.size() == 1);
107+
CV_Assert(outputs.size() == 1);
108+
CV_Assert(inputs[0]->dims == 4);
109+
CV_Assert(outputs[0].dims == 4);
110+
111+
if (inputs[0]->size[0] != outputs[0].size[0] || inputs[0]->size[1] != outputs[0].size[1])
112+
CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
113+
114+
const int inpHeight = inputs[0]->size[2];
115+
const int inpWidth = inputs[0]->size[3];
116+
const int outHeight = outputs[0].size[2];
117+
const int outWidth = outputs[0].size[3];
118+
const int padTop = dstRanges[2].start;
119+
const int padBottom = outHeight - dstRanges[2].end;
120+
const int padLeft = dstRanges[3].start;
121+
const int padRight = outWidth - dstRanges[3].end;
122+
CV_Assert(padTop < inpHeight, padBottom < inpHeight,
123+
padLeft < inpWidth, padRight < inpWidth);
124+
125+
for (size_t n = 0; n < inputs[0]->size[0]; ++n)
126+
{
127+
for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
128+
{
129+
copyMakeBorder(getPlane(*inputs[0], n, ch),
130+
getPlane(outputs[0], n, ch),
131+
padTop, padBottom, padLeft, padRight,
132+
BORDER_REFLECT_101);
133+
}
134+
}
135+
}
136+
else
137+
CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
99138
}
100139

101140
virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
@@ -124,6 +163,7 @@ class PaddingLayerImpl : public PaddingLayer
124163
std::vector<Range> dstRanges;
125164
int inputDims;
126165
float paddingValue;
166+
std::string paddingType;
127167
};
128168

129169
Ptr<PaddingLayer> PaddingLayer::create(const LayerParams &params)

modules/dnn/src/layers/slice_layer.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class SliceLayerImpl : public SliceLayer
5858
axis = params.get<int>("axis", 1);
5959
if (params.has("slice_point"))
6060
{
61-
CV_Assert(!params.has("begin") && !params.has("size"));
61+
CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
6262
const DictValue &indicesValue = params.get("slice_point");
6363
sliceRanges.resize(indicesValue.size() + 1,
6464
std::vector<Range>(axis + 1, Range::all()));
@@ -71,24 +71,34 @@ class SliceLayerImpl : public SliceLayer
7171
}
7272
sliceRanges.back()[axis].start = prevSlice;
7373
}
74-
else if (params.has("begin") && params.has("size"))
74+
else if (params.has("begin"))
7575
{
76+
CV_Assert(params.has("size") ^ params.has("end"));
7677
const DictValue &begins = params.get("begin");
77-
const DictValue &sizes = params.get("size");
78-
CV_Assert(begins.size() == sizes.size());
78+
const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
79+
CV_Assert(begins.size() == sizesOrEnds.size());
7980

8081
sliceRanges.resize(1);
8182
sliceRanges[0].resize(begins.size(), Range::all());
8283
for (int i = 0; i < begins.size(); ++i)
8384
{
8485
int start = begins.get<int>(i);
85-
int size = sizes.get<int>(i);
86+
int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.
8687
CV_Assert(start >= 0);
87-
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
8888

8989
sliceRanges[0][i].start = start;
90-
if (size > 0)
91-
sliceRanges[0][i].end = start + size;
90+
if (params.has("size"))
91+
{
92+
int size = sizeOrEnd;
93+
CV_Assert(size == -1 || size > 0); // -1 value means range [start, axis_size).
94+
sliceRanges[0][i].end = start > 0 ? start + size : -1; // We'll finalize a negative value later.
95+
}
96+
else
97+
{
98+
int end = sizeOrEnd;
99+
CV_Assert(end < 0 || end > start); // End index is excluded.
100+
sliceRanges[0][i].end = end; // We'll finalize a negative value later.
101+
}
92102
}
93103
}
94104
}
@@ -109,8 +119,7 @@ class SliceLayerImpl : public SliceLayer
109119
CV_Assert(sliceRanges[i].size() <= inpShape.size());
110120
for (int j = 0; j < sliceRanges[i].size(); ++j)
111121
{
112-
outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -
113-
std::max(sliceRanges[i][j].start, 0);
122+
outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
114123
}
115124
}
116125
}
@@ -152,8 +161,7 @@ class SliceLayerImpl : public SliceLayer
152161
// Clamp.
153162
for (int j = 0; j < sliceRanges[i].size(); ++j)
154163
{
155-
sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);
156-
sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);
164+
sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
157165
}
158166
// Fill the rest of ranges.
159167
for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)

modules/dnn/src/torch/torch_importer.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ struct TorchImporter : public ::cv::dnn::Importer
617617
curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
618618
readObject();
619619
}
620-
else if (nnName == "SpatialBatchNormalization")
620+
else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization")
621621
{
622622
newModule->apiType = "BatchNorm";
623623
readTorchTable(scalarParams, tensorParams);
@@ -626,19 +626,31 @@ struct TorchImporter : public ::cv::dnn::Importer
626626
float eps = float(scalarParams.get<double>("eps"));
627627
layerParams.set("eps", eps);
628628

629-
CV_Assert((tensorParams.count("running_var") || tensorParams.count("running_std")) &&
630-
tensorParams.count("running_mean"));
631-
layerParams.blobs.push_back(tensorParams["running_mean"].second);
629+
if (tensorParams.count("running_mean"))
630+
{
631+
layerParams.blobs.push_back(tensorParams["running_mean"].second);
632+
}
633+
else
634+
{
635+
CV_Assert(scalarParams.has("nOutput"));
636+
layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
637+
}
638+
632639
if (tensorParams.count("running_var"))
633640
{
634641
layerParams.blobs.push_back(tensorParams["running_var"].second);
635642
}
636-
else
643+
else if (tensorParams.count("running_std"))
637644
{
638645
layerParams.blobs.push_back(tensorParams["running_std"].second);
639646
pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
640647
subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
641648
}
649+
else
650+
{
651+
CV_Assert(scalarParams.has("nOutput"));
652+
layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
653+
}
642654

643655
if (tensorParams.count("weight"))
644656
{
@@ -652,6 +664,16 @@ struct TorchImporter : public ::cv::dnn::Importer
652664
layerParams.blobs.push_back(tensorParams["bias"].second);
653665
}
654666

667+
if (nnName == "InstanceNormalization")
668+
{
669+
cv::Ptr<Module> mvnModule(new Module(nnName));
670+
mvnModule->apiType = "MVN";
671+
curModule->modules.push_back(mvnModule);
672+
673+
layerParams.blobs[0].setTo(0); // batch norm's mean
674+
layerParams.blobs[1].setTo(1); // batch norm's std
675+
}
676+
655677
curModule->modules.push_back(newModule);
656678
}
657679
else if (nnName == "PReLU")
@@ -691,7 +713,9 @@ struct TorchImporter : public ::cv::dnn::Importer
691713
layerParams.set("scale", scale);
692714
curModule->modules.push_back(newModule);
693715
}
694-
else if (nnName == "Identity")
716+
// TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
717+
// It's a loss function that has an Identity forward.
718+
else if (nnName == "Identity" || nnName == "TotalVariation")
695719
{
696720
readTorchTable(scalarParams, tensorParams);
697721
newModule->apiType = "Identity";
@@ -866,7 +890,7 @@ struct TorchImporter : public ::cv::dnn::Importer
866890
layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
867891
curModule->modules.push_back(newModule);
868892
}
869-
else if (nnName == "SpatialZeroPadding")
893+
else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
870894
{
871895
readTorchTable(scalarParams, tensorParams);
872896
CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
@@ -889,6 +913,26 @@ struct TorchImporter : public ::cv::dnn::Importer
889913
paddings[5] = padRight;
890914
layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
891915
layerParams.set("input_dims", 3);
916+
917+
if (nnName == "SpatialReflectionPadding")
918+
layerParams.set("type", "reflect");
919+
920+
curModule->modules.push_back(newModule);
921+
}
922+
else if (nnName == "ShaveImage")
923+
{
924+
// ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
925+
// It may be mapped to Slice layer.
926+
readTorchTable(scalarParams, tensorParams);
927+
CV_Assert(scalarParams.has("size"));
928+
int size = scalarParams.get<int>("size");
929+
930+
int begins[] = {0, 0, size, size};
931+
int ends[] = {-1, -1, -size - 1, -size - 1};
932+
933+
newModule->apiType = "Slice";
934+
layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
935+
layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
892936
curModule->modules.push_back(newModule);
893937
}
894938
else

modules/dnn/test/test_torch_importer.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ TEST(Torch_Importer, net_padding)
231231
{
232232
runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
233233
runTorchNet("net_spatial_zero_padding", DNN_TARGET_CPU, "", false, true);
234+
runTorchNet("net_spatial_reflection_padding", DNN_TARGET_CPU, "", false, true);
234235
}
235236

236237
TEST(Torch_Importer, ENet_accuracy)
@@ -338,6 +339,49 @@ OCL_TEST(Torch_Importer, ENet_accuracy)
338339
}
339340
}
340341

342+
// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
343+
// th fast_neural_style.lua \
344+
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
345+
// -output_image lena.png \
346+
// -median_filter 0 \
347+
// -image_size 0 \
348+
// -model models/eccv16/starry_night.t7
349+
// th fast_neural_style.lua \
350+
// -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
351+
// -output_image lena.png \
352+
// -median_filter 0 \
353+
// -image_size 0 \
354+
// -model models/instance_norm/feathers.t7
355+
TEST(Torch_Importer, FastNeuralStyle_accuracy)
356+
{
357+
std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
358+
"dnn/fast_neural_style_instance_norm_feathers.t7"};
359+
std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
360+
361+
for (int i = 0; i < 2; ++i)
362+
{
363+
const string model = findDataFile(models[i], false);
364+
Net net = readNetFromTorch(model);
365+
366+
Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
367+
Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
368+
369+
net.setInput(inputBlob);
370+
Mat out = net.forward();
371+
372+
// Deprocessing.
373+
getPlane(out, 0, 0) += 103.939;
374+
getPlane(out, 0, 1) += 116.779;
375+
getPlane(out, 0, 2) += 123.68;
376+
out = cv::min(cv::max(0, out), 255);
377+
378+
Mat ref = imread(findDataFile(targets[i]));
379+
Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
380+
381+
normAssert(out, refBlob, "", 0.5, 1.1);
382+
}
383+
}
384+
341385
}
342386

343387
#endif

0 commit comments

Comments
 (0)