Merge pull request opencv#9862 from sovrasov:dnn_nms

vpisarev · vpisarev · commit bc937753854d · 2017-10-27T11:19:57.000Z
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -734,6 +734,21 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      */
     CV_EXPORTS_W void shrinkCaffeModel(const String& src, const String& dst);
 
+    /** @brief Performs non maximum suppression given boxes and corresponding scores.
+
+     * @param bboxes a set of bounding boxes to apply NMS.
+     * @param scores a set of corresponding confidences.
+     * @param score_threshold a threshold used to filter boxes by score.
+     * @param nms_threshold a threshold used in non maximum suppression.
+     * @param indices the kept indices of bboxes after NMS.
+     * @param eta a coefficient in adaptive threshold formula: \f$nms\_threshold_{i+1}=eta\cdot nms\_threshold_i\f$.
+     * @param top_k if `>0`, keep at most @p top_k picked indices.
+     */
+    CV_EXPORTS_W void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores,
+                               const float score_threshold, const float nms_threshold,
+                               CV_OUT std::vector<int>& indices,
+                               const float eta = 1.f, const int top_k = 0);
+
 
 //! @}
 CV__DNN_EXPERIMENTAL_NS_END
diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -45,6 +45,7 @@
 #include <float.h>
 #include <string>
 #include <caffe.pb.h>
+#include "../nms.inl.hpp"
 
 namespace cv
 {
@@ -61,6 +62,8 @@ static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
     return pair1.first > pair2.first;
 }
 
+static inline float caffe_box_overlap(const caffe::NormalizedBBox& a, const caffe::NormalizedBBox& b);
+
 } // namespace
 
 class DetectionOutputLayerImpl : public DetectionOutputLayer
@@ -308,7 +311,8 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
             LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
             if (label_bboxes == decodeBBoxes.end())
                 CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
-            ApplyNMSFast(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK, indices[c]);
+            NMSFast_(label_bboxes->second, scores, _confidenceThreshold, _nmsThreshold, 1.0, _topK,
+                indices[c], util::caffe_box_overlap);
             numDetections += indices[c].size();
         }
         if (_keepTopK > -1 && numDetections > (size_t)_keepTopK)
@@ -619,75 +623,6 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
         }
     }
 
-    // Do non maximum suppression given bboxes and scores.
-    // Inspired by Piotr Dollar's NMS implementation in EdgeBox.
-    // https://goo.gl/jV3JYS
-    //    bboxes: a set of bounding boxes.
-    //    scores: a set of corresponding confidences.
-    //    score_threshold: a threshold used to filter detection results.
-    //    nms_threshold: a threshold used in non maximum suppression.
-    //    top_k: if not -1, keep at most top_k picked indices.
-    //    indices: the kept indices of bboxes after nms.
-    static void ApplyNMSFast(const std::vector<caffe::NormalizedBBox>& bboxes,
-          const std::vector<float>& scores, const float score_threshold,
-          const float nms_threshold, const float eta, const int top_k,
-          std::vector<int>& indices)
-    {
-        CV_Assert(bboxes.size() == scores.size());
-
-        // Get top_k scores (with corresponding indices).
-        std::vector<std::pair<float, int> > score_index_vec;
-        GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);
-
-        // Do nms.
-        float adaptive_threshold = nms_threshold;
-        indices.clear();
-        while (score_index_vec.size() != 0) {
-            const int idx = score_index_vec.front().second;
-            bool keep = true;
-            for (int k = 0; k < (int)indices.size() && keep; ++k) {
-                const int kept_idx = indices[k];
-                float overlap = JaccardOverlap<true>(bboxes[idx], bboxes[kept_idx]);
-                keep = overlap <= adaptive_threshold;
-            }
-            if (keep)
-                indices.push_back(idx);
-            score_index_vec.erase(score_index_vec.begin());
-            if (keep && eta < 1 && adaptive_threshold > 0.5) {
-              adaptive_threshold *= eta;
-            }
-        }
-    }
-
-    // Get max scores with corresponding indices.
-    //    scores: a set of scores.
-    //    threshold: only consider scores higher than the threshold.
-    //    top_k: if -1, keep all; otherwise, keep at most top_k.
-    //    score_index_vec: store the sorted (score, index) pair.
-    static void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold, const int top_k,
-                          std::vector<std::pair<float, int> >& score_index_vec)
-    {
-        CV_DbgAssert(score_index_vec.empty());
-        // Generate index score pairs.
-        for (size_t i = 0; i < scores.size(); ++i)
-        {
-            if (scores[i] > threshold)
-            {
-                score_index_vec.push_back(std::make_pair(scores[i], i));
-            }
-        }
-
-        // Sort the score pair according to the scores in descending order
-        std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
-                         util::SortScorePairDescend<int>);
-
-        // Keep top_k scores if needed.
-        if (top_k > -1 && top_k < (int)score_index_vec.size())
-        {
-            score_index_vec.resize(top_k);
-        }
-    }
-
     // Compute the jaccard (intersection over union IoU) overlap between two bboxes.
     template<bool normalized>
     static float JaccardOverlap(const caffe::NormalizedBBox& bbox1,
@@ -733,6 +668,11 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
     }
 };
 
+float util::caffe_box_overlap(const caffe::NormalizedBBox& a, const caffe::NormalizedBBox& b)
+{
+    return DetectionOutputLayerImpl::JaccardOverlap<true>(a, b);
+}
+
 const std::string DetectionOutputLayerImpl::_layerName = std::string("DetectionOutput");
 
 Ptr<DetectionOutputLayer> DetectionOutputLayer::create(const LayerParams &params)
diff --git a/modules/dnn/src/nms.cpp b/modules/dnn/src/nms.cpp
@@ -0,0 +1,33 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "precomp.hpp"
+#include <nms.inl.hpp>
+
+namespace cv
+{
+namespace dnn
+{
+CV__DNN_EXPERIMENTAL_NS_BEGIN
+
+static inline float rectOverlap(const Rect& a, const Rect& b)
+{
+    return 1.f - static_cast<float>(jaccardDistance(a, b));
+}
+
+void NMSBoxes(const std::vector<Rect>& bboxes, const std::vector<float>& scores,
+                          const float score_threshold, const float nms_threshold,
+                          std::vector<int>& indices, const float eta, const int top_k)
+{
+    CV_Assert(bboxes.size() == scores.size(), score_threshold >= 0,
+        nms_threshold >= 0, eta > 0);
+    NMSFast_(bboxes, scores, score_threshold, nms_threshold, eta, top_k, indices, rectOverlap);
+}
+
+CV__DNN_EXPERIMENTAL_NS_END
+}// dnn
+}// cv
diff --git a/modules/dnn/src/nms.inl.hpp b/modules/dnn/src/nms.inl.hpp
@@ -0,0 +1,100 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#ifndef OPENCV_DNN_NMS_INL_HPP
+#define OPENCV_DNN_NMS_INL_HPP
+
+#include <opencv2/dnn.hpp>
+
+namespace cv {
+namespace dnn {
+
+namespace
+{
+
+template <typename T>
+static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                          const std::pair<float, T>& pair2)
+{
+    return pair1.first > pair2.first;
+}
+
+} // namespace
+
+// Get max scores with corresponding indices.
+//    scores: a set of scores.
+//    threshold: only consider scores higher than the threshold.
+//    top_k: if -1, keep all; otherwise, keep at most top_k.
+//    score_index_vec: store the sorted (score, index) pair.
+inline void GetMaxScoreIndex(const std::vector<float>& scores, const float threshold, const int top_k,
+                      std::vector<std::pair<float, int> >& score_index_vec)
+{
+    CV_DbgAssert(score_index_vec.empty());
+    // Generate index score pairs.
+    for (size_t i = 0; i < scores.size(); ++i)
+    {
+        if (scores[i] > threshold)
+        {
+            score_index_vec.push_back(std::make_pair(scores[i], i));
+        }
+    }
+
+    // Sort the score pair according to the scores in descending order
+    std::stable_sort(score_index_vec.begin(), score_index_vec.end(),
+                     SortScorePairDescend<int>);
+
+    // Keep top_k scores if needed.
+    if (top_k > 0 && top_k < (int)score_index_vec.size())
+    {
+        score_index_vec.resize(top_k);
+    }
+}
+
+// Do non maximum suppression given bboxes and scores.
+// Inspired by Piotr Dollar's NMS implementation in EdgeBox.
+// https://goo.gl/jV3JYS
+//    bboxes: a set of bounding boxes.
+//    scores: a set of corresponding confidences.
+//    score_threshold: a threshold used to filter detection results.
+//    nms_threshold: a threshold used in non maximum suppression.
+//    top_k: if not > 0, keep at most top_k picked indices.
+//    indices: the kept indices of bboxes after nms.
+template <typename BoxType>
+inline void NMSFast_(const std::vector<BoxType>& bboxes,
+      const std::vector<float>& scores, const float score_threshold,
+      const float nms_threshold, const float eta, const int top_k,
+      std::vector<int>& indices, float (*computeOverlap)(const BoxType&, const BoxType&))
+{
+    CV_Assert(bboxes.size() == scores.size());
+
+    // Get top_k scores (with corresponding indices).
+    std::vector<std::pair<float, int> > score_index_vec;
+    GetMaxScoreIndex(scores, score_threshold, top_k, score_index_vec);
+
+    // Do nms.
+    float adaptive_threshold = nms_threshold;
+    indices.clear();
+    for (size_t i = 0; i < score_index_vec.size(); ++i) {
+        const int idx = score_index_vec[i].second;
+        bool keep = true;
+        for (int k = 0; k < (int)indices.size() && keep; ++k) {
+            const int kept_idx = indices[k];
+            float overlap = computeOverlap(bboxes[idx], bboxes[kept_idx]);
+            keep = overlap <= adaptive_threshold;
+        }
+        if (keep)
+            indices.push_back(idx);
+        if (keep && eta < 1 && adaptive_threshold > 0.5) {
+          adaptive_threshold *= eta;
+        }
+    }
+}
+
+}// dnn
+}// cv
+
+#endif
diff --git a/modules/dnn/test/test_nms.cpp b/modules/dnn/test/test_nms.cpp
@@ -0,0 +1,41 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2017, Intel Corporation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+
+#include "test_precomp.hpp"
+
+namespace cvtest
+{
+
+TEST(NMS, Accuracy)
+{
+    //reference results obtained using tf.image.non_max_suppression with iou_threshold=0.5
+    std::string dataPath = findDataFile("dnn/nms_reference.yml");
+    FileStorage fs(dataPath, FileStorage::READ);
+
+    std::vector<Rect> bboxes;
+    std::vector<float> scores;
+    std::vector<int> ref_indices;
+
+    fs["boxes"] >> bboxes;
+    fs["probs"] >> scores;
+    fs["output"] >> ref_indices;
+
+    const float nms_thresh = .5f;
+    const float score_thresh = .01f;
+    std::vector<int> indices;
+    cv::dnn::NMSBoxes(bboxes, scores, score_thresh, nms_thresh, indices);
+
+    ASSERT_EQ(ref_indices.size(), indices.size());
+
+    std::sort(indices.begin(), indices.end());
+    std::sort(ref_indices.begin(), ref_indices.end());
+
+    for(size_t i = 0; i < indices.size(); i++)
+        ASSERT_EQ(indices[i], ref_indices[i]);
+}
+
+}//cvtest