Merge pull request opencv#10207 from cabelo:cabelo-opencv

alalek · web-flow · commit 0042bacd887d · 2017-12-05T21:23:23.000+03:00
Repair: incorrect display of class in DNN
diff --git a/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown b/doc/tutorials/dnn/dnn_yolo/dnn_yolo.markdown
@@ -0,0 +1,58 @@
+YOLO DNNs  {#tutorial_dnn_yolo}
+===============================
+
+Introduction
+------------
+
+In this text you will learn how to use opencv_dnn module using yolo_object_detection (Sample of using OpenCV dnn module in real time with device capture, video and image).
+
+We will demonstrate results of this example on the following picture.
+![Picture example](images/yolo.jpg)
+
+Examples
+--------
+
+VIDEO DEMO:
+@youtube{NHtRlndE2cg}
+
+Source Code
+-----------
+
+The latest version of sample source code can be downloaded [here](https://github.com/opencv/opencv/blob/master/samples/dnn/yolo_object_detection.cpp).
+
+@include dnn/yolo_object_detection.cpp
+
+How to compile in command line with pkg-config
+----------------------------------------------
+
+@code{.bash}
+
+# g++ `pkg-config --cflags opencv` `pkg-config --libs opencv` yolo_object_detection.cpp -o yolo_object_detection
+
+@endcode
+
+Execute in webcam:
+
+@code{.bash}
+
+$ yolo_object_detection -camera_device=0  -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
+
+@endcode
+
+Execute with image:
+
+@code{.bash}
+
+$ yolo_object_detection -source=[PATH-IMAGE]  -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
+
+@endcode
+
+Execute in video file:
+
+@code{.bash}
+
+$ yolo_object_detection -source=[PATH-TO-VIDEO] -cfg=[PATH-TO-DARKNET]/cfg/yolo.cfg -model=[PATH-TO-DARKNET]/yolo.weights   -class_names=[PATH-TO-DARKNET]/data/coco.names
+
+@endcode
+
+Questions and suggestions email to: Alessandro de Oliveira Faria cabelo@opensuse.org or OpenCV Team.
diff --git a/doc/tutorials/dnn/dnn_yolo/images/yolo.jpg b/doc/tutorials/dnn/dnn_yolo/images/yolo.jpg
diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown
@@ -15,12 +15,20 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
 
     *Author:* Dmitry Kurtaev
 
-    This tutorial guidelines how to run your models in OpenCV deep learning module using Halide language backend
+    This tutorial guidelines how to run your models in OpenCV deep learning module using Halide language backend.
 
 -   @subpage tutorial_dnn_halide_scheduling
 
     *Compatibility:* \> OpenCV 3.3
 
     *Author:* Dmitry Kurtaev
 
-    In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
+    In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
+
+-   @subpage tutorial_dnn_yolo
+
+    *Compatibility:* \> OpenCV 3.3.1
+
+    *Author:* Alessandro de Oliveira Faria
+
+    In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image.
diff --git a/samples/dnn/yolo_object_detection.cpp b/samples/dnn/yolo_object_detection.cpp
@@ -1,36 +1,36 @@
+// Brief Sample of using OpenCV dnn module in real time with device capture, video and image.
+// VIDEO DEMO: https://www.youtube.com/watch?v=NHtRlndE2cg
+
 #include <opencv2/dnn.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/highgui.hpp>
-using namespace cv;
-using namespace cv::dnn;
-
 #include <fstream>
 #include <iostream>
 #include <algorithm>
 #include <cstdlib>
+
 using namespace std;
+using namespace cv;
+using namespace cv::dnn;
 
 const size_t network_width = 416;
 const size_t network_height = 416;
 
-const char* about = "This sample uses You only look once (YOLO)-Detector "
-                    "(https://arxiv.org/abs/1612.08242) "
-                    "to detect objects on camera/video/image.\n"
-                    "Models can be downloaded here: "
-                    "https://pjreddie.com/darknet/yolo/\n"
-                    "Default network is 416x416.\n"
-                    "Class names can be downloaded here: "
-                    "https://github.com/pjreddie/darknet/tree/master/data\n";
-
-const char* params
-    = "{ help           | false | print usage         }"
-      "{ cfg            |       | model configuration }"
-      "{ model          |       | model weights       }"
-      "{ camera_device  | 0     | camera device number}"
-      "{ video          |       | video or image for detection}"
-      "{ min_confidence | 0.24  | min confidence      }"
-      "{ class_names    |       | class names         }";
+static const char* about =
+"This sample uses You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects on camera/video/image.\n"
+"Models can be downloaded here: https://pjreddie.com/darknet/yolo/\n"
+"Default network is 416x416.\n"
+"Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data\n";
+
+static const char* params =
+"{ help           | false | print usage         }"
+"{ cfg            |       | model configuration }"
+"{ model          |       | model weights       }"
+"{ camera_device  | 0     | camera device number}"
+"{ source         |       | video or image for detection}"
+"{ min_confidence | 0.24  | min confidence      }"
+"{ class_names    |       | File with class names, [PATH-TO-DARKNET]/data/coco.names }";
 
 int main(int argc, char** argv)
 {
@@ -61,7 +61,7 @@ int main(int argc, char** argv)
     }
 
     VideoCapture cap;
-    if (parser.get<String>("video").empty())
+    if (parser.get<String>("source").empty())
     {
         int cameraDevice = parser.get<int>("camera_device");
         cap = VideoCapture(cameraDevice);
@@ -73,7 +73,7 @@ int main(int argc, char** argv)
     }
     else
     {
-        cap.open(parser.get<String>("video"));
+        cap.open(parser.get<String>("source"));
         if(!cap.isOpened())
         {
             cout << "Couldn't open image or video: " << parser.get<String>("video") << endl;
@@ -86,7 +86,7 @@ int main(int argc, char** argv)
     if (classNamesFile.is_open())
     {
         string className = "";
-        while (classNamesFile >> className)
+        while (std::getline(classNamesFile, className))
             classNamesVec.push_back(className);
     }
 
@@ -119,14 +119,14 @@ int main(int argc, char** argv)
 
         //! [Make forward pass]
         Mat detectionMat = net.forward("detection_out");   //compute output
-       //! [Make forward pass]
+        //! [Make forward pass]
 
-       vector<double> layersTimings;
-       double freq = getTickFrequency() / 1000;
-       double time = net.getPerfProfile(layersTimings) / freq;
-       ostringstream ss;
-       ss << "FPS: " << 1000/time << " ; time: " << time << " ms";
-       putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));
+        vector<double> layersTimings;
+        double freq = getTickFrequency() / 1000;
+        double time = net.getPerfProfile(layersTimings) / freq;
+        ostringstream ss;
+        ss << "FPS: " << 1000/time << " ; time: " << time << " ms";
+        putText(frame, ss.str(), Point(20,20), 0, 0.5, Scalar(0,0,255));
 
         float confidenceThreshold = parser.get<float>("min_confidence");
         for (int i = 0; i < detectionMat.rows; i++)
@@ -163,10 +163,10 @@ int main(int argc, char** argv)
                     String label = String(classNamesVec[objectClass]) + ": " + conf;
                     int baseLine = 0;
                     Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
-                    rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom - labelSize.height),
+                    rectangle(frame, Rect(Point(xLeftBottom, yLeftBottom ),
                                           Size(labelSize.width, labelSize.height + baseLine)),
                               Scalar(255, 255, 255), CV_FILLED);
-                    putText(frame, label, Point(xLeftBottom, yLeftBottom),
+                    putText(frame, label, Point(xLeftBottom, yLeftBottom+labelSize.height),
                             FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
                 }
                 else
@@ -181,7 +181,7 @@ int main(int argc, char** argv)
             }
         }
 
-        imshow("detections", frame);
+        imshow("YOLO: Detections", frame);
         if (waitKey(1) >= 0) break;
     }