Skip to content

Commit 0460452

Browse files
committed
Merge pull request opencv#9750 from dkurt:feature_dnn_tf_text_graph
2 parents 7d55c09 + e4aa39f commit 0460452

File tree

10 files changed

+538
-85
lines changed

10 files changed

+538
-85
lines changed

modules/dnn/include/opencv2/dnn/dnn.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
629629
/** @brief Reads a network model stored in Tensorflow model file.
630630
* @details This is shortcut consisting from createTensorflowImporter and Net::populateNet calls.
631631
*/
632-
CV_EXPORTS_W Net readNetFromTensorflow(const String &model);
632+
CV_EXPORTS_W Net readNetFromTensorflow(const String &model, const String &config = String());
633633

634634
/** @brief Reads a network model stored in Torch model file.
635635
* @details This is shortcut consisting from createTorchImporter and Net::populateNet calls.

modules/dnn/src/layers/detection_output_layer.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
8181

8282
float _nmsThreshold;
8383
int _topK;
84+
// Whenever predicted bounding boxes are respresented in YXHW instead of XYWH layout.
85+
bool _locPredTransposed;
8486

8587
enum { _numAxes = 4 };
8688
static const std::string _layerName;
@@ -148,6 +150,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
148150
_keepTopK = getParameter<int>(params, "keep_top_k");
149151
_confidenceThreshold = getParameter<float>(params, "confidence_threshold", 0, false, -FLT_MAX);
150152
_topK = getParameter<int>(params, "top_k", 0, false, -1);
153+
_locPredTransposed = getParameter<bool>(params, "loc_pred_transposed", 0, false, false);
151154

152155
getCodeType(params);
153156

@@ -209,7 +212,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
209212
// Retrieve all location predictions
210213
std::vector<LabelBBox> allLocationPredictions;
211214
GetLocPredictions(locationData, num, numPriors, _numLocClasses,
212-
_shareLocation, allLocationPredictions);
215+
_shareLocation, _locPredTransposed, allLocationPredictions);
213216

214217
// Retrieve all confidences
215218
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
@@ -540,11 +543,14 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
540543
// num_loc_classes: number of location classes. It is 1 if share_location is
541544
// true; and is equal to number of classes needed to predict otherwise.
542545
// share_location: if true, all classes share the same location prediction.
546+
// loc_pred_transposed: if true, represent four bounding box values as
547+
// [y,x,height,width] or [x,y,width,height] otherwise.
543548
// loc_preds: stores the location prediction, where each item contains
544549
// location prediction for an image.
545550
static void GetLocPredictions(const float* locData, const int num,
546551
const int numPredsPerClass, const int numLocClasses,
547-
const bool shareLocation, std::vector<LabelBBox>& locPreds)
552+
const bool shareLocation, const bool locPredTransposed,
553+
std::vector<LabelBBox>& locPreds)
548554
{
549555
locPreds.clear();
550556
if (shareLocation)
@@ -566,10 +572,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
566572
labelBBox[label].resize(numPredsPerClass);
567573
}
568574
caffe::NormalizedBBox& bbox = labelBBox[label][p];
569-
bbox.set_xmin(locData[startIdx + c * 4]);
570-
bbox.set_ymin(locData[startIdx + c * 4 + 1]);
571-
bbox.set_xmax(locData[startIdx + c * 4 + 2]);
572-
bbox.set_ymax(locData[startIdx + c * 4 + 3]);
575+
if (locPredTransposed)
576+
{
577+
bbox.set_ymin(locData[startIdx + c * 4]);
578+
bbox.set_xmin(locData[startIdx + c * 4 + 1]);
579+
bbox.set_ymax(locData[startIdx + c * 4 + 2]);
580+
bbox.set_xmax(locData[startIdx + c * 4 + 3]);
581+
}
582+
else
583+
{
584+
bbox.set_xmin(locData[startIdx + c * 4]);
585+
bbox.set_ymin(locData[startIdx + c * 4 + 1]);
586+
bbox.set_xmax(locData[startIdx + c * 4 + 2]);
587+
bbox.set_ymax(locData[startIdx + c * 4 + 3]);
588+
}
573589
}
574590
}
575591
}

modules/dnn/src/layers/prior_box_layer.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,20 @@ class PriorBoxLayerImpl : public PriorBoxLayer
124124
}
125125
}
126126

127+
void getScales(const LayerParams &params)
128+
{
129+
DictValue scalesParameter;
130+
bool scalesRetieved = getParameterDict(params, "scales", scalesParameter);
131+
if (scalesRetieved)
132+
{
133+
_scales.resize(scalesParameter.size());
134+
for (int i = 0; i < scalesParameter.size(); ++i)
135+
{
136+
_scales[i] = scalesParameter.get<float>(i);
137+
}
138+
}
139+
}
140+
127141
void getVariance(const LayerParams &params)
128142
{
129143
DictValue varianceParameter;
@@ -169,13 +183,14 @@ class PriorBoxLayerImpl : public PriorBoxLayer
169183
_flip = getParameter<bool>(params, "flip");
170184
_clip = getParameter<bool>(params, "clip");
171185

186+
_scales.clear();
172187
_aspectRatios.clear();
173-
_aspectRatios.push_back(1.);
174188

175189
getAspectRatios(params);
176190
getVariance(params);
191+
getScales(params);
177192

178-
_numPriors = _aspectRatios.size();
193+
_numPriors = _aspectRatios.size() + 1; // + 1 for an aspect ratio 1.0
179194

180195
_maxSize = -1;
181196
if (params.has("max_size"))
@@ -231,6 +246,11 @@ class PriorBoxLayerImpl : public PriorBoxLayer
231246
CV_TRACE_FUNCTION();
232247
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
233248

249+
if (_scales.empty())
250+
_scales.resize(_numPriors, 1.0f);
251+
else
252+
CV_Assert(_scales.size() == _numPriors);
253+
234254
int _layerWidth = inputs[0]->size[3];
235255
int _layerHeight = inputs[0]->size[2];
236256

@@ -256,7 +276,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
256276
{
257277
for (size_t w = 0; w < _layerWidth; ++w)
258278
{
259-
_boxWidth = _boxHeight = _minSize;
279+
_boxWidth = _boxHeight = _minSize * _scales[0];
260280

261281
float center_x = (w + 0.5) * stepX;
262282
float center_y = (h + 0.5) * stepY;
@@ -272,7 +292,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
272292
if (_maxSize > 0)
273293
{
274294
// second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
275-
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize);
295+
_boxWidth = _boxHeight = sqrt(_minSize * _maxSize) * _scales[1];
276296
// xmin
277297
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
278298
// ymin
@@ -284,15 +304,13 @@ class PriorBoxLayerImpl : public PriorBoxLayer
284304
}
285305

286306
// rest of priors
307+
CV_Assert((_maxSize > 0 ? 2 : 1) + _aspectRatios.size() == _scales.size());
287308
for (size_t r = 0; r < _aspectRatios.size(); ++r)
288309
{
289310
float ar = _aspectRatios[r];
290-
if (fabs(ar - 1.) < 1e-6)
291-
{
292-
continue;
293-
}
294-
_boxWidth = _minSize * sqrt(ar);
295-
_boxHeight = _minSize / sqrt(ar);
311+
float scale = _scales[(_maxSize > 0 ? 2 : 1) + r];
312+
_boxWidth = _minSize * sqrt(ar) * scale;
313+
_boxHeight = _minSize / sqrt(ar) * scale;
296314
// xmin
297315
outputPtr[idx++] = (center_x - _boxWidth / 2.) / _imageWidth;
298316
// ymin
@@ -363,6 +381,7 @@ class PriorBoxLayerImpl : public PriorBoxLayer
363381

364382
std::vector<float> _aspectRatios;
365383
std::vector<float> _variance;
384+
std::vector<float> _scales;
366385

367386
bool _flip;
368387
bool _clip;

0 commit comments

Comments
 (0)