Layers for fast-neural-style models: https://github.com/jcjohnson/fast-neural-style

dkurt · dkurt · commit 4b52b8df348b · 2017-10-27T14:26:45.000+03:00
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -377,6 +377,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      *                 starting from the first one. The rest of dimensions won't
      *                 be padded.
      * @param value Value to be padded. Defaults to zero.
+     * @param type Padding type: 'constant', 'reflect'
      * @param input_dims Torch's parameter. If @p input_dims is not equal to the
      *                   actual input dimensionality then the `[0]th` dimension
      *                   is considered as a batch dimension and @p paddings are shifted
diff --git a/modules/dnn/include/opencv2/dnn/shape_utils.hpp b/modules/dnn/include/opencv2/dnn/shape_utils.hpp
@@ -112,16 +112,12 @@ static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const
 static inline Mat getPlane(const Mat &m, int n, int cn)
 {
     CV_Assert(m.dims > 2);
-    Range range[CV_MAX_DIM];
     int sz[CV_MAX_DIM];
     for(int i = 2; i < m.dims; i++)
     {
         sz[i-2] = m.size.p[i];
-        range[i] = Range::all();
     }
-    range[0] = Range(n, n+1);
-    range[1] = Range(cn, cn+1);
-    return m(range).reshape(1, m.dims-2, sz);
+    return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));
 }
 
 static inline MatShape shape(const int* dims, const int n = 4)
@@ -191,6 +187,14 @@ inline int clamp(int ax, const MatShape& shape)
     return clamp(ax, (int)shape.size());
 }
 
+inline Range clamp(const Range& r, int axisSize)
+{
+    Range clamped(std::max(r.start, 0),
+                  r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);
+    CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);
+    return clamped;
+}
+
 CV__DNN_EXPERIMENTAL_NS_END
 }
 }
diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp
@@ -10,6 +10,7 @@ Implementation of padding layer, which adds paddings to input blob.
 */
 
 #include "../precomp.hpp"
+#include "layers_common.hpp"
 #include "op_halide.hpp"
 #include <vector>
 
@@ -26,6 +27,7 @@ class PaddingLayerImpl : public PaddingLayer
         setParamsFrom(params);
         paddingValue = params.get<float>("value", 0);
         inputDims = params.get<int>("input_dims", -1);
+        paddingType = params.get<String>("type", "constant");
 
         CV_Assert(params.has("paddings"));
         const DictValue& paddingsParam = params.get("paddings");
@@ -94,8 +96,45 @@ class PaddingLayerImpl : public PaddingLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        outputs[0].setTo(paddingValue);
-        inputs[0]->copyTo(outputs[0](dstRanges));
+        if (paddingType == "constant")
+        {
+            outputs[0].setTo(paddingValue);
+            inputs[0]->copyTo(outputs[0](dstRanges));
+        }
+        else if (paddingType == "reflect")
+        {
+            CV_Assert(inputs.size() == 1);
+            CV_Assert(outputs.size() == 1);
+            CV_Assert(inputs[0]->dims == 4);
+            CV_Assert(outputs[0].dims == 4);
+
+            if (inputs[0]->size[0] != outputs[0].size[0] || inputs[0]->size[1] != outputs[0].size[1])
+                CV_Error(Error::StsNotImplemented, "Only spatial reflection padding is supported.");
+
+            const int inpHeight = inputs[0]->size[2];
+            const int inpWidth = inputs[0]->size[3];
+            const int outHeight = outputs[0].size[2];
+            const int outWidth = outputs[0].size[3];
+            const int padTop = dstRanges[2].start;
+            const int padBottom = outHeight - dstRanges[2].end;
+            const int padLeft = dstRanges[3].start;
+            const int padRight = outWidth - dstRanges[3].end;
+            CV_Assert(padTop < inpHeight, padBottom < inpHeight,
+                      padLeft < inpWidth, padRight < inpWidth);
+
+            for (size_t n = 0; n < inputs[0]->size[0]; ++n)
+            {
+                for (size_t ch = 0; ch < inputs[0]->size[1]; ++ch)
+                {
+                    copyMakeBorder(getPlane(*inputs[0], n, ch),
+                                   getPlane(outputs[0], n, ch),
+                                   padTop, padBottom, padLeft, padRight,
+                                   BORDER_REFLECT_101);
+                }
+            }
+        }
+        else
+            CV_Error(Error::StsNotImplemented, "Unknown padding type: " + paddingType);
     }
 
     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
@@ -124,6 +163,7 @@ class PaddingLayerImpl : public PaddingLayer
     std::vector<Range> dstRanges;
     int inputDims;
     float paddingValue;
+    std::string paddingType;
 };
 
 Ptr<PaddingLayer> PaddingLayer::create(const LayerParams &params)
diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp
@@ -58,7 +58,7 @@ class SliceLayerImpl : public SliceLayer
         axis = params.get<int>("axis", 1);
         if (params.has("slice_point"))
         {
-            CV_Assert(!params.has("begin") && !params.has("size"));
+            CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));
             const DictValue &indicesValue = params.get("slice_point");
             sliceRanges.resize(indicesValue.size() + 1,
                                std::vector<Range>(axis + 1, Range::all()));
@@ -71,24 +71,34 @@ class SliceLayerImpl : public SliceLayer
             }
             sliceRanges.back()[axis].start = prevSlice;
         }
-        else if (params.has("begin") && params.has("size"))
+        else if (params.has("begin"))
         {
+            CV_Assert(params.has("size") ^ params.has("end"));
             const DictValue &begins = params.get("begin");
-            const DictValue &sizes = params.get("size");
-            CV_Assert(begins.size() == sizes.size());
+            const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");
+            CV_Assert(begins.size() == sizesOrEnds.size());
 
             sliceRanges.resize(1);
             sliceRanges[0].resize(begins.size(), Range::all());
             for (int i = 0; i < begins.size(); ++i)
             {
                 int start = begins.get<int>(i);
-                int size = sizes.get<int>(i);
+                int sizeOrEnd = sizesOrEnds.get<int>(i);  // It may be negative to reverse indexation.
                 CV_Assert(start >= 0);
-                CV_Assert(size == -1 || size > 0);  // -1 value means range [start, axis_size).
 
                 sliceRanges[0][i].start = start;
-                if (size > 0)
-                    sliceRanges[0][i].end = start + size;
+                if (params.has("size"))
+                {
+                    int size = sizeOrEnd;
+                    CV_Assert(size == -1 || size > 0);  // -1 value means range [start, axis_size).
+                    sliceRanges[0][i].end = start > 0 ? start + size : -1;  // We'll finalize a negative value later.
+                }
+                else
+                {
+                    int end = sizeOrEnd;
+                    CV_Assert(end < 0 || end > start);  // End index is excluded.
+                    sliceRanges[0][i].end = end;  // We'll finalize a negative value later.
+                }
             }
         }
     }
@@ -109,8 +119,7 @@ class SliceLayerImpl : public SliceLayer
                 CV_Assert(sliceRanges[i].size() <= inpShape.size());
                 for (int j = 0; j < sliceRanges[i].size(); ++j)
                 {
-                    outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -
-                                    std::max(sliceRanges[i][j].start, 0);
+                    outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();
                 }
             }
         }
@@ -152,8 +161,7 @@ class SliceLayerImpl : public SliceLayer
             // Clamp.
             for (int j = 0; j < sliceRanges[i].size(); ++j)
             {
-                sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);
-                sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);
+                sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);
             }
             // Fill the rest of ranges.
             for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
@@ -617,7 +617,7 @@ struct TorchImporter : public ::cv::dnn::Importer
                 curModule->modules.push_back(cv::Ptr<Module>(new Module(nnName, "Sigmoid")));
                 readObject();
             }
-            else if (nnName == "SpatialBatchNormalization")
+            else if (nnName == "SpatialBatchNormalization" || nnName == "InstanceNormalization")
             {
                 newModule->apiType = "BatchNorm";
                 readTorchTable(scalarParams, tensorParams);
@@ -626,19 +626,31 @@ struct TorchImporter : public ::cv::dnn::Importer
                 float eps = float(scalarParams.get<double>("eps"));
                 layerParams.set("eps", eps);
 
-                CV_Assert((tensorParams.count("running_var") || tensorParams.count("running_std")) &&
-                          tensorParams.count("running_mean"));
-                layerParams.blobs.push_back(tensorParams["running_mean"].second);
+                if (tensorParams.count("running_mean"))
+                {
+                    layerParams.blobs.push_back(tensorParams["running_mean"].second);
+                }
+                else
+                {
+                    CV_Assert(scalarParams.has("nOutput"));
+                    layerParams.blobs.push_back(Mat::zeros(1, scalarParams.get<int>("nOutput"), CV_32F));
+                }
+
                 if (tensorParams.count("running_var"))
                 {
                     layerParams.blobs.push_back(tensorParams["running_var"].second);
                 }
-                else
+                else if (tensorParams.count("running_std"))
                 {
                     layerParams.blobs.push_back(tensorParams["running_std"].second);
                     pow(layerParams.blobs.back(), -2, layerParams.blobs.back());
                     subtract(layerParams.blobs.back(), eps, layerParams.blobs.back());
                 }
+                else
+                {
+                    CV_Assert(scalarParams.has("nOutput"));
+                    layerParams.blobs.push_back(Mat::ones(1, scalarParams.get<int>("nOutput"), CV_32F));
+                }
 
                 if (tensorParams.count("weight"))
                 {
@@ -652,6 +664,16 @@ struct TorchImporter : public ::cv::dnn::Importer
                     layerParams.blobs.push_back(tensorParams["bias"].second);
                 }
 
+                if (nnName == "InstanceNormalization")
+                {
+                    cv::Ptr<Module> mvnModule(new Module(nnName));
+                    mvnModule->apiType = "MVN";
+                    curModule->modules.push_back(mvnModule);
+
+                    layerParams.blobs[0].setTo(0);  // batch norm's mean
+                    layerParams.blobs[1].setTo(1);  // batch norm's std
+                }
+
                 curModule->modules.push_back(newModule);
             }
             else if (nnName == "PReLU")
@@ -691,7 +713,9 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("scale", scale);
                 curModule->modules.push_back(newModule);
             }
-            else if (nnName == "Identity")
+            // TotalVariation layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
+            // It's a loss function that has an Identity forward.
+            else if (nnName == "Identity" || nnName == "TotalVariation")
             {
                 readTorchTable(scalarParams, tensorParams);
                 newModule->apiType = "Identity";
@@ -866,7 +890,7 @@ struct TorchImporter : public ::cv::dnn::Importer
                 layerParams.set("scale", scalarParams.get<float>("constant_scalar"));
                 curModule->modules.push_back(newModule);
             }
-            else if (nnName == "SpatialZeroPadding")
+            else if (nnName == "SpatialZeroPadding" || nnName == "SpatialReflectionPadding")
             {
                 readTorchTable(scalarParams, tensorParams);
                 CV_Assert(scalarParams.has("pad_l"), scalarParams.has("pad_r"),
@@ -889,6 +913,26 @@ struct TorchImporter : public ::cv::dnn::Importer
                 paddings[5] = padRight;
                 layerParams.set("paddings", DictValue::arrayInt<int*>(&paddings[0], paddings.size()));
                 layerParams.set("input_dims", 3);
+
+                if (nnName == "SpatialReflectionPadding")
+                    layerParams.set("type", "reflect");
+
+                curModule->modules.push_back(newModule);
+            }
+            else if (nnName == "ShaveImage")
+            {
+                // ShaveImage layer is from fast-neural-style project: https://github.com/jcjohnson/fast-neural-style
+                // It may be mapped to Slice layer.
+                readTorchTable(scalarParams, tensorParams);
+                CV_Assert(scalarParams.has("size"));
+                int size = scalarParams.get<int>("size");
+
+                int begins[] = {0, 0, size, size};
+                int ends[] = {-1, -1, -size - 1, -size - 1};
+
+                newModule->apiType = "Slice";
+                layerParams.set("begin", DictValue::arrayInt<int*>(&begins[0], 4));
+                layerParams.set("end", DictValue::arrayInt<int*>(&ends[0], 4));
                 curModule->modules.push_back(newModule);
             }
             else
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
@@ -231,6 +231,7 @@ TEST(Torch_Importer, net_padding)
 {
     runTorchNet("net_padding", DNN_TARGET_CPU, "", false, true);
     runTorchNet("net_spatial_zero_padding", DNN_TARGET_CPU, "", false, true);
+    runTorchNet("net_spatial_reflection_padding", DNN_TARGET_CPU, "", false, true);
 }
 
 TEST(Torch_Importer, ENet_accuracy)
@@ -338,6 +339,49 @@ OCL_TEST(Torch_Importer, ENet_accuracy)
     }
 }
 
+// Check accuracy of style transfer models from https://github.com/jcjohnson/fast-neural-style
+// th fast_neural_style.lua \
+//   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
+//   -output_image lena.png \
+//   -median_filter 0 \
+//   -image_size 0 \
+//   -model models/eccv16/starry_night.t7
+// th fast_neural_style.lua \
+//   -input_image ~/opencv_extra/testdata/dnn/googlenet_1.png \
+//   -output_image lena.png \
+//   -median_filter 0 \
+//   -image_size 0 \
+//   -model models/instance_norm/feathers.t7
+TEST(Torch_Importer, FastNeuralStyle_accuracy)
+{
+    std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
+                            "dnn/fast_neural_style_instance_norm_feathers.t7"};
+    std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
+
+    for (int i = 0; i < 2; ++i)
+    {
+        const string model = findDataFile(models[i], false);
+        Net net = readNetFromTorch(model);
+
+        Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
+        Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
+
+        net.setInput(inputBlob);
+        Mat out = net.forward();
+
+        // Deprocessing.
+        getPlane(out, 0, 0) += 103.939;
+        getPlane(out, 0, 1) += 116.779;
+        getPlane(out, 0, 2) += 123.68;
+        out = cv::min(cv::max(0, out), 255);
+
+        Mat ref = imread(findDataFile(targets[i]));
+        Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
+
+        normAssert(out, refBlob, "", 0.5, 1.1);
+    }
+}
+
 }
 
 #endif
diff --git a/samples/dnn/fast_neural_style.py b/samples/dnn/fast_neural_style.py

Original file line number	Diff line number	Diff line change
`@@ -112,16 +112,12 @@ static inline Mat slice(const Mat &m, const _Range &r0, const _Range &r1, const`
`112`	`112`	`static inline Mat getPlane(const Mat &m, int n, int cn)`
`113`	`113`	`{`
`114`	`114`	`CV_Assert(m.dims > 2);`
`115`		`- Range range[CV_MAX_DIM];`
`116`	`115`	`int sz[CV_MAX_DIM];`
`117`	`116`	`for(int i = 2; i < m.dims; i++)`
`118`	`117`	`{`
`119`	`118`	`sz[i-2] = m.size.p[i];`
`120`		`- range[i] = Range::all();`
`121`	`119`	`}`
`122`		`- range[0] = Range(n, n+1);`
`123`		`- range[1] = Range(cn, cn+1);`
`124`		`- return m(range).reshape(1, m.dims-2, sz);`
	`120`	`+ return Mat(m.dims - 2, sz, m.type(), (void*)m.ptr<float>(n, cn));`
`125`	`121`	`}`
`126`	`122`
`127`	`123`	`static inline MatShape shape(const int* dims, const int n = 4)`
`@@ -191,6 +187,14 @@ inline int clamp(int ax, const MatShape& shape)`
`191`	`187`	`return clamp(ax, (int)shape.size());`
`192`	`188`	`}`
`193`	`189`
	`190`	`+inline Range clamp(const Range& r, int axisSize)`
	`191`	`+{`
	`192`	`+ Range clamped(std::max(r.start, 0),`
	`193`	`+ r.end > 0 ? std::min(r.end, axisSize) : axisSize + r.end + 1);`
	`194`	`+ CV_Assert(clamped.start < clamped.end, clamped.end <= axisSize);`
	`195`	`+ return clamped;`
	`196`	`+}`
	`197`	`+`
`194`	`198`	`CV__DNN_EXPERIMENTAL_NS_END`
`195`	`199`	`}`
`196`	`200`	`}`
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ class SliceLayerImpl : public SliceLayer`
`58`	`58`	`axis = params.get<int>("axis", 1);`
`59`	`59`	`if (params.has("slice_point"))`
`60`	`60`	`{`
`61`		`- CV_Assert(!params.has("begin") && !params.has("size"));`
	`61`	`+ CV_Assert(!params.has("begin") && !params.has("size") && !params.has("end"));`
`62`	`62`	`const DictValue &indicesValue = params.get("slice_point");`
`63`	`63`	`sliceRanges.resize(indicesValue.size() + 1,`
`64`	`64`	`std::vector<Range>(axis + 1, Range::all()));`
`@@ -71,24 +71,34 @@ class SliceLayerImpl : public SliceLayer`
`71`	`71`	`}`
`72`	`72`	`sliceRanges.back()[axis].start = prevSlice;`
`73`	`73`	`}`
`74`		`- else if (params.has("begin") && params.has("size"))`
	`74`	`+ else if (params.has("begin"))`
`75`	`75`	`{`
	`76`	`+ CV_Assert(params.has("size") ^ params.has("end"));`
`76`	`77`	`const DictValue &begins = params.get("begin");`
`77`		`- const DictValue &sizes = params.get("size");`
`78`		`- CV_Assert(begins.size() == sizes.size());`
	`78`	`+ const DictValue &sizesOrEnds = params.has("size") ? params.get("size") : params.get("end");`
	`79`	`+ CV_Assert(begins.size() == sizesOrEnds.size());`
`79`	`80`
`80`	`81`	`sliceRanges.resize(1);`
`81`	`82`	`sliceRanges[0].resize(begins.size(), Range::all());`
`82`	`83`	`for (int i = 0; i < begins.size(); ++i)`
`83`	`84`	`{`
`84`	`85`	`int start = begins.get<int>(i);`
`85`		`- int size = sizes.get<int>(i);`
	`86`	`+ int sizeOrEnd = sizesOrEnds.get<int>(i); // It may be negative to reverse indexation.`
`86`	`87`	`CV_Assert(start >= 0);`
`87`		`- CV_Assert(size == -1 \|\| size > 0); // -1 value means range [start, axis_size).`
`88`	`88`
`89`	`89`	`sliceRanges[0][i].start = start;`
`90`		`- if (size > 0)`
`91`		`- sliceRanges[0][i].end = start + size;`
	`90`	`+ if (params.has("size"))`
	`91`	`+ {`
	`92`	`+ int size = sizeOrEnd;`
	`93`	`+ CV_Assert(size == -1 \|\| size > 0); // -1 value means range [start, axis_size).`
	`94`	`+ sliceRanges[0][i].end = start > 0 ? start + size : -1; // We'll finalize a negative value later.`
	`95`	`+ }`
	`96`	`+ else`
	`97`	`+ {`
	`98`	`+ int end = sizeOrEnd;`
	`99`	`+ CV_Assert(end < 0 \|\| end > start); // End index is excluded.`
	`100`	`+ sliceRanges[0][i].end = end; // We'll finalize a negative value later.`
	`101`	`+ }`
`92`	`102`	`}`
`93`	`103`	`}`
`94`	`104`	`}`
`@@ -109,8 +119,7 @@ class SliceLayerImpl : public SliceLayer`
`109`	`119`	`CV_Assert(sliceRanges[i].size() <= inpShape.size());`
`110`	`120`	`for (int j = 0; j < sliceRanges[i].size(); ++j)`
`111`	`121`	`{`
`112`		`- outputs[i][j] = std::min(sliceRanges[i][j].end, inpShape[j]) -`
`113`		`- std::max(sliceRanges[i][j].start, 0);`
	`122`	`+ outputs[i][j] = clamp(sliceRanges[i][j], inpShape[j]).size();`
`114`	`123`	`}`
`115`	`124`	`}`
`116`	`125`	`}`
`@@ -152,8 +161,7 @@ class SliceLayerImpl : public SliceLayer`
`152`	`161`	`// Clamp.`
`153`	`162`	`for (int j = 0; j < sliceRanges[i].size(); ++j)`
`154`	`163`	`{`
`155`		`- sliceRanges[i][j].start = std::max(0, sliceRanges[i][j].start);`
`156`		`- sliceRanges[i][j].end = std::min(sliceRanges[i][j].end, inpShape[j]);`
	`164`	`+ sliceRanges[i][j] = clamp(sliceRanges[i][j], inpShape[j]);`
`157`	`165`	`}`
`158`	`166`	`// Fill the rest of ranges.`
`159`	`167`	`for (int j = sliceRanges[i].size(); j < inpShape[-1]; ++j)`