Merge pull request opencv#10164 from pengli:dnn

alalek · alalek · commit cc2ee923e460 · 2017-11-29T12:05:10.000Z
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -1196,7 +1196,8 @@ struct Net::Impl
             // some other layers.
 
             // TODO: OpenCL target support more fusion styles.
-            if ( preferableTarget == DNN_TARGET_OPENCL && ld.layerInstance->type.compare("Convolution") )
+            if ( preferableTarget == DNN_TARGET_OPENCL &&
+                 (!cv::ocl::useOpenCL() || ld.layerInstance->type.compare("Convolution")) )
                 continue;
 
             Ptr<Layer>& currLayer = ld.layerInstance;
@@ -1214,7 +1215,10 @@ struct Net::Impl
                     {
                         printf_(("\tfused with %s\n", nextBNormLayer->name.c_str()));
                         bnormData->skipFlags[DNN_BACKEND_DEFAULT] = true;
-                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                        if ( preferableTarget == DNN_TARGET_OPENCL )
+                            ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs;
+                        else
+                            ld.outputBlobs = layers[lpNext.lid].outputBlobs;
                         if( bnormData->consumers.size() == 1 )
                         {
                             nextData = &layers[bnormData->consumers[0].lid];
@@ -1234,7 +1238,10 @@ struct Net::Impl
                     {
                         printf_(("\tfused with %s\n", nextScaleLayer->name.c_str()));
                         scaleData->skipFlags[DNN_BACKEND_DEFAULT] = true;
-                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                        if ( preferableTarget == DNN_TARGET_OPENCL )
+                            ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs;
+                        else
+                            ld.outputBlobs = layers[lpNext.lid].outputBlobs;
                         if( scaleData->consumers.size() == 1 )
                         {
                             nextData = &layers[scaleData->consumers[0].lid];
@@ -1263,7 +1270,10 @@ struct Net::Impl
                         LayerData *activData = nextData;
                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
                         activData->skipFlags[DNN_BACKEND_DEFAULT] = true;
-                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                        if ( preferableTarget == DNN_TARGET_OPENCL )
+                            ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs;
+                        else
+                            ld.outputBlobs = layers[lpNext.lid].outputBlobs;
 
                         if ( preferableTarget == DNN_TARGET_OPENCL )
                         {
@@ -1325,13 +1335,13 @@ struct Net::Impl
                                              !nextData->type.compare("Power")) &&
                                             currLayer->setActivation(nextActivLayer) )
                                     {
-                                        CV_Assert(firstConvLayerData->outputBlobs.size() == 1 && ld.inputBlobs.size() == 1);
-                                        ld.inputBlobs.push_back(&firstConvLayerData->outputBlobs[0]);
+                                        CV_Assert(firstConvLayerData->umat_outputBlobs.size() == 1 && ld.umat_inputBlobs.size() == 1);
+                                        ld.umat_inputBlobs.push_back(firstConvLayerData->umat_outputBlobs[0]);
                                         printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str()));
                                         printf_(("\tfused with %s\n", nextActivLayer->name.c_str()));
                                         eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true;
                                         nextData->skipFlags[DNN_BACKEND_DEFAULT] = true;
-                                        ld.outputBlobs = layers[lpNext.lid].outputBlobs;
+                                        ld.umat_outputBlobs = layers[lpNext.lid].umat_outputBlobs;
                                     }
                                 }
                             }
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
@@ -42,6 +42,8 @@
 #include "test_precomp.hpp"
 #include "npy_blob.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
+#include <opencv2/core/ocl.hpp>
+#include <opencv2/ts/ocl_test.hpp>
 
 namespace cvtest
 {
@@ -119,6 +121,43 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_AlexNet, testing::Values(true, false));
 
+typedef testing::TestWithParam<tuple<bool> > Reproducibility_OCL_AlexNet;
+OCL_TEST_P(Reproducibility_OCL_AlexNet, Accuracy)
+{
+    bool readFromMemory = get<0>(GetParam());
+    Net net;
+    {
+        const string proto = findDataFile("dnn/bvlc_alexnet.prototxt", false);
+        const string model = findDataFile("dnn/bvlc_alexnet.caffemodel", false);
+        if (readFromMemory)
+        {
+            string dataProto;
+            ASSERT_TRUE(readFileInMemory(proto, dataProto));
+            string dataModel;
+            ASSERT_TRUE(readFileInMemory(model, dataModel));
+
+            net = readNetFromCaffe(dataProto.c_str(), dataProto.size(),
+                                   dataModel.c_str(), dataModel.size());
+        }
+        else
+            net = readNetFromCaffe(proto, model);
+        ASSERT_FALSE(net.empty());
+    }
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat sample = imread(_tf("grace_hopper_227.png"));
+    ASSERT_TRUE(!sample.empty());
+
+    net.setInput(blobFromImage(sample, 1.0f, Size(227, 227), Scalar(), false), "data");
+    Mat out = net.forward("prob");
+    Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy"));
+    normAssert(ref, out);
+}
+
+OCL_INSTANTIATE_TEST_CASE_P(Test_Caffe, Reproducibility_OCL_AlexNet, testing::Values(true, false));
+
 #if !defined(_WIN32) || defined(_WIN64)
 TEST(Reproducibility_FCN, Accuracy)
 {
@@ -201,6 +240,38 @@ TEST(Reproducibility_MobileNet_SSD, Accuracy)
     }
 }
 
+OCL_TEST(Reproducibility_MobileNet_SSD, Accuracy)
+{
+    const string proto = findDataFile("dnn/MobileNetSSD_deploy.prototxt", false);
+    const string model = findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false);
+    Net net = readNetFromCaffe(proto, model);
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat sample = imread(_tf("street.png"));
+
+    Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
+    net.setInput(inp);
+    Mat out = net.forward();
+
+    Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy"));
+    normAssert(ref, out);
+
+    // Check that detections aren't preserved.
+    inp.setTo(0.0f);
+    net.setInput(inp);
+    out = net.forward();
+
+    const int numDetections = out.size[2];
+    ASSERT_NE(numDetections, 0);
+    for (int i = 0; i < numDetections; ++i)
+    {
+        float confidence = out.ptr<float>(0, 0, i)[2];
+        ASSERT_EQ(confidence, 0);
+    }
+}
+
 TEST(Reproducibility_ResNet50, Accuracy)
 {
     Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false),
@@ -216,6 +287,24 @@ TEST(Reproducibility_ResNet50, Accuracy)
     normAssert(ref, out);
 }
 
+OCL_TEST(Reproducibility_ResNet50, Accuracy)
+{
+    Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt", false),
+                               findDataFile("dnn/ResNet-50-model.caffemodel", false));
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(224,224), Scalar(), false);
+    ASSERT_TRUE(!input.empty());
+
+    net.setInput(input);
+    Mat out = net.forward();
+
+    Mat ref = blobFromNPY(_tf("resnet50_prob.npy"));
+    normAssert(ref, out);
+}
+
 TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
 {
     Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
@@ -231,6 +320,24 @@ TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
     normAssert(ref, out);
 }
 
+OCL_TEST(Reproducibility_SqueezeNet_v1_1, Accuracy)
+{
+    Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt", false),
+                               findDataFile("dnn/squeezenet_v1.1.caffemodel", false));
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    Mat input = blobFromImage(imread(_tf("googlenet_0.png")), 1.0f, Size(227,227), Scalar(), false);
+    ASSERT_TRUE(!input.empty());
+
+    net.setInput(input);
+    Mat out = net.forward();
+
+    Mat ref = blobFromNPY(_tf("squeezenet_v1.1_prob.npy"));
+    normAssert(ref, out);
+}
+
 TEST(Reproducibility_AlexNet_fp16, Accuracy)
 {
     const float l1 = 1e-5;
diff --git a/modules/dnn/test/test_darknet_importer.cpp b/modules/dnn/test/test_darknet_importer.cpp
@@ -184,6 +184,68 @@ TEST(Reproducibility_TinyYoloVoc, Accuracy)
     normAssert(ref, detection);
 }
 
+OCL_TEST(Reproducibility_YoloVoc, Accuracy)
+{
+    Net net;
+    {
+        const string cfg = findDataFile("dnn/yolo-voc.cfg", false);
+        const string model = findDataFile("dnn/yolo-voc.weights", false);
+        net = readNetFromDarknet(cfg, model);
+        ASSERT_FALSE(net.empty());
+    }
+
+    net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+    net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+    // dog416.png is dog.jpg that resized to 416x416 in the lossless PNG format
+    Mat sample = imread(_tf("dog416.png"));
+    ASSERT_TRUE(!sample.empty());
+
+    Size inputSize(416, 416);
+
+    if (sample.size() != inputSize)
+        resize(sample, sample, inputSize);
+
+    net.setInput(blobFromImage(sample, 1 / 255.F), "data");
+    Mat out = net.forward("detection_out");
+
+    Mat detection;
+    const float confidenceThreshold = 0.24;
+
+    for (int i = 0; i < out.rows; i++) {
+        const int probability_index = 5;
+        const int probability_size = out.cols - probability_index;
+        float *prob_array_ptr = &out.at<float>(i, probability_index);
+        size_t objectClass = std::max_element(prob_array_ptr, prob_array_ptr + probability_size) - prob_array_ptr;
+        float confidence = out.at<float>(i, (int)objectClass + probability_index);
+
+        if (confidence > confidenceThreshold)
+            detection.push_back(out.row(i));
+    }
+
+    // obtained by: ./darknet detector test ./cfg/voc.data  ./cfg/yolo-voc.cfg ./yolo-voc.weights -thresh 0.24 ./dog416.png
+    // There are 3 objects (6-car, 1-bicycle, 11-dog) with 25 values for each:
+    // { relative_center_x, relative_center_y, relative_width, relative_height, unused_t0, probability_for_each_class[20] }
+    float ref_array[] = {
+        0.740161F, 0.214100F, 0.325575F, 0.173418F, 0.750769F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+        0.000000F, 0.000000F, 0.750469F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+
+        0.501618F, 0.504757F, 0.461713F, 0.481310F, 0.783550F, 0.000000F, 0.780879F, 0.000000F, 0.000000F,
+        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+
+        0.279968F, 0.638651F, 0.282737F, 0.600284F, 0.901864F, 0.000000F, 0.000000F, 0.000000F, 0.000000F,
+        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.901615F,
+        0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F, 0.000000F
+    };
+
+    const int number_of_objects = 3;
+    Mat ref(number_of_objects, sizeof(ref_array) / (number_of_objects * sizeof(float)), CV_32FC1, &ref_array);
+
+    normAssert(ref, detection);
+}
+
 TEST(Reproducibility_YoloVoc, Accuracy)
 {
     Net net;
diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp
@@ -382,6 +382,39 @@ TEST(Torch_Importer, FastNeuralStyle_accuracy)
     }
 }
 
+OCL_TEST(Torch_Importer, FastNeuralStyle_accuracy)
+{
+    std::string models[] = {"dnn/fast_neural_style_eccv16_starry_night.t7",
+                            "dnn/fast_neural_style_instance_norm_feathers.t7"};
+    std::string targets[] = {"dnn/lena_starry_night.png", "dnn/lena_feathers.png"};
+
+    for (int i = 0; i < 2; ++i)
+    {
+        const string model = findDataFile(models[i], false);
+        Net net = readNetFromTorch(model);
+
+        net.setPreferableBackend(DNN_BACKEND_DEFAULT);
+        net.setPreferableTarget(DNN_TARGET_OPENCL);
+
+        Mat img = imread(findDataFile("dnn/googlenet_1.png", false));
+        Mat inputBlob = blobFromImage(img, 1.0, Size(), Scalar(103.939, 116.779, 123.68), false);
+
+        net.setInput(inputBlob);
+        Mat out = net.forward();
+
+        // Deprocessing.
+        getPlane(out, 0, 0) += 103.939;
+        getPlane(out, 0, 1) += 116.779;
+        getPlane(out, 0, 2) += 123.68;
+        out = cv::min(cv::max(0, out), 255);
+
+        Mat ref = imread(findDataFile(targets[i]));
+        Mat refBlob = blobFromImage(ref, 1.0, Size(), Scalar(), false);
+
+        normAssert(out, refBlob, "", 0.5, 1.1);
+    }
+}
+
 }
 
 #endif