pythonwebcoder
diff --git a/‎modules/dnn/include/opencv2/dnn/dnn.hpp
Lines changed: 1 addition & 1 deletion b/‎modules/dnn/include/opencv2/dnn/dnn.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎modules/dnn/src/layers/detection_output_layer.cpp
Lines changed: 22 additions & 6 deletions b/‎modules/dnn/src/layers/detection_output_layer.cpp
Lines changed: 22 additions & 6 deletions
diff --git a/‎modules/dnn/src/layers/prior_box_layer.cpp
Lines changed: 29 additions & 10 deletions b/‎modules/dnn/src/layers/prior_box_layer.cpp
Lines changed: 29 additions & 10 deletions
@@ -629,7 +629,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     /** @brief Reads a network model stored in Tensorflow model file.
       * @details This is shortcut consisting from createTensorflowImporter and Net::populateNet calls.
       */
-    CV_EXPORTS_W Net readNetFromTensorflow(const String &model);
+    CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
 
     /** @brief Reads a network model stored in Torch model file.
       * @details This is shortcut consisting from createTorchImporter and Net::populateNet calls.
 
@@ -81,6 +81,8 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
 
     float _nmsThreshold;
     int _topK;
+    // Whenever predicted bounding boxes are respresented in YXHW instead of XYWH layout.
+    bool _locPredTransposed;
 
     enum { _numAxes = 4 };
     static const std::string _layerName;
@@ -148,6 +150,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
         _keepTopK = getParameter<int>(params, "keep_top_k");
         _confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX);
         _topK = getParameter<int>(params, "top_k", 0, false, -1);
+        _locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
 
         getCodeType(params);
 
@@ -209,7 +212,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
             // Retrieve all location predictions
             std::vector<LabelBBox> allLocationPredictions;
             GetLocPredictions(locationData, num, numPriors, _numLocClasses,
-                              _shareLocation, allLocationPredictions);
+                              _shareLocation, _locPredTransposed, allLocationPredictions);
 
             // Retrieve all confidences
             GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
@@ -540,11 +543,14 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
     //    num_loc_classes: number of location classes. It is 1 if share_location is
     //      true; and is equal to number of classes needed to predict otherwise.
     //    share_location: if true, all classes share the same location prediction.
+    //    loc_pred_transposed: if true, represent four bounding box values as
+    //                         [y,x,height,width] or [x,y,width,height] otherwise.
     //    loc_preds: stores the location prediction, where each item contains
     //      location prediction for an image.
     static void GetLocPredictions(const float* locData, const int num,
                            const int numPredsPerClass, const int numLocClasses,
-                           const bool shareLocation, std::vector<LabelBBox>& locPreds)
+                           const bool shareLocation, const bool locPredTransposed,
+                           std::vector<LabelBBox>& locPreds)
     {
         locPreds.clear();
         if (shareLocation)
@@ -566,10 +572,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
                         labelBBox[label].resize(numPredsPerClass);
                     }
                     caffe::NormalizedBBox& bbox = labelBBox[label][p];
-                    bbox.set_xmin(locData[startIdx + c * 4]);
-                    bbox.set_ymin(locData[startIdx + c * 4 + 1]);
-                    bbox.set_xmax(locData[startIdx + c * 4 + 2]);
-                    bbox.set_ymax(locData[startIdx + c * 4 + 3]);
+                    if (locPredTransposed)
+                    {
+                        bbox.set_ymin(locData[startIdx + c * 4]);
+                        bbox.set_xmin(locData[startIdx + c * 4 + 1]);
+                        bbox.set_ymax(locData[startIdx + c * 4 + 2]);
+                        bbox.set_xmax(locData[startIdx + c * 4 + 3]);
+                    }
+                    else
+                    {
+                        bbox.set_xmin(locData[startIdx + c * 4]);
+                        bbox.set_ymin(locData[startIdx + c * 4 + 1]);
+                        bbox.set_xmax(locData[startIdx + c * 4 + 2]);
+                        bbox.set_ymax(locData[startIdx + c * 4 + 3]);
+                    }
                 }
             }
         }
 
@@ -124,6 +124,20 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         }
     }
 
+    void getScales(const LayerParams &params)
+    {
+        DictValue scalesParameter;
+        bool scalesRetieved = getParameterDict(params, "scales", scalesParameter);
+        if (scalesRetieved)
+        {
+            _scales.resize(scalesParameter.size());
+            for (int i = 0; i < scalesParameter.size(); ++i)
+            {
+                _scales[i] = scalesParameter.get<float>(i);
+            }
+        }
+    }
+
     void getVariance(const LayerParams &params)
     {
         DictValue varianceParameter;
@@ -169,13 +183,14 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         _flip = getParameter<bool>(params, "flip");
         _clip = getParameter<bool>(params, "clip");
 
+        _scales.clear();
         _aspectRatios.clear();
-        _aspectRatios.push_back(1.);
 
         getAspectRatios(params);
         getVariance(params);
+        getScales(params);
 
-        _numPriors = _aspectRatios.size();
+        _numPriors = _aspectRatios.size() + 1;  // + 1 for an aspect ratio 1.0
 
         _maxSize = -1;
         if (params.has("max_size"))
@@ -231,6 +246,11 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
+        if (_scales.empty())
+            _scales.resize(_numPriors, 1.0f);
+        else
+            CV_Assert(_scales.size() == _numPriors);
+
         int _layerWidth = inputs[0]->size[3];
         int _layerHeight = inputs[0]->size[2];
 
@@ -256,7 +276,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
         {
             for (size_t w = 0; w < _layerWidth; ++w)
             {
-                _boxWidth = _boxHeight = _minSize;
+                _boxWidth = _boxHeight = _minSize * _scales[0];
 
                 float center_x = (w + 0.5) * stepX;
                 float center_y = (h + 0.5) * stepY;
@@ -272,7 +292,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
                 if (_maxSize > 0)
                 {
                     // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
-                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
+                    _boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
                     // xmin
                     outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                     // ymin
@@ -284,15 +304,13 @@ class PriorBoxLayerImpl : public PriorBoxLayer
                 }
 
                 // rest of priors
+                CV_Assert((_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
                 for (size_t r = 0; r < _aspectRatios.size(); ++r)
                 {
                     float ar = _aspectRatios[r];
-                    if (fabs(ar - 1.) < 1e-6)
-                    {
-                        continue;
-                    }
-                    _boxWidth = _minSize * sqrt(ar);
-                    _boxHeight = _minSize / sqrt(ar);
+                    float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
+                    _boxWidth = _minSize * sqrt(ar) * scale;
+                    _boxHeight = _minSize / sqrt(ar) * scale;
                     // xmin
                     outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
                     // ymin
@@ -363,6 +381,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
 
     std::vector<float> _aspectRatios;
     std::vector<float> _variance;
+    std::vector<float> _scales;
 
     bool _flip;
     bool _clip;