@@ -81,6 +81,8 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
81
81
82
82
float _nmsThreshold;
83
83
int _topK;
84
+ // Whenever predicted bounding boxes are respresented in YXHW instead of XYWH layout.
85
+ bool _locPredTransposed;
84
86
85
87
enum { _numAxes = 4 };
86
88
static const std::string _layerName;
@@ -148,6 +150,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
148
150
_keepTopK = getParameter<int >(params, " keep_top_k" );
149
151
_confidenceThreshold = getParameter<float >(params, " confidence_threshold" , 0 , false , -FLT_MAX);
150
152
_topK = getParameter<int >(params, " top_k" , 0 , false , -1 );
153
+ _locPredTransposed = getParameter<bool >(params, " loc_pred_transposed" , 0 , false , false );
151
154
152
155
getCodeType (params);
153
156
@@ -209,7 +212,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
209
212
// Retrieve all location predictions
210
213
std::vector<LabelBBox> allLocationPredictions;
211
214
GetLocPredictions (locationData, num, numPriors, _numLocClasses,
212
- _shareLocation, allLocationPredictions);
215
+ _shareLocation, _locPredTransposed, allLocationPredictions);
213
216
214
217
// Retrieve all confidences
215
218
GetConfidenceScores (confidenceData, num, numPriors, _numClasses, allConfidenceScores);
@@ -540,11 +543,14 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
540
543
// num_loc_classes: number of location classes. It is 1 if share_location is
541
544
// true; and is equal to number of classes needed to predict otherwise.
542
545
// share_location: if true, all classes share the same location prediction.
546
+ // loc_pred_transposed: if true, represent four bounding box values as
547
+ // [y,x,height,width] or [x,y,width,height] otherwise.
543
548
// loc_preds: stores the location prediction, where each item contains
544
549
// location prediction for an image.
545
550
static void GetLocPredictions (const float * locData, const int num,
546
551
const int numPredsPerClass, const int numLocClasses,
547
- const bool shareLocation, std::vector<LabelBBox>& locPreds)
552
+ const bool shareLocation, const bool locPredTransposed,
553
+ std::vector<LabelBBox>& locPreds)
548
554
{
549
555
locPreds.clear ();
550
556
if (shareLocation)
@@ -566,10 +572,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
566
572
labelBBox[label].resize (numPredsPerClass);
567
573
}
568
574
caffe::NormalizedBBox& bbox = labelBBox[label][p];
569
- bbox.set_xmin (locData[startIdx + c * 4 ]);
570
- bbox.set_ymin (locData[startIdx + c * 4 + 1 ]);
571
- bbox.set_xmax (locData[startIdx + c * 4 + 2 ]);
572
- bbox.set_ymax (locData[startIdx + c * 4 + 3 ]);
575
+ if (locPredTransposed)
576
+ {
577
+ bbox.set_ymin (locData[startIdx + c * 4 ]);
578
+ bbox.set_xmin (locData[startIdx + c * 4 + 1 ]);
579
+ bbox.set_ymax (locData[startIdx + c * 4 + 2 ]);
580
+ bbox.set_xmax (locData[startIdx + c * 4 + 3 ]);
581
+ }
582
+ else
583
+ {
584
+ bbox.set_xmin (locData[startIdx + c * 4 ]);
585
+ bbox.set_ymin (locData[startIdx + c * 4 + 1 ]);
586
+ bbox.set_xmax (locData[startIdx + c * 4 + 2 ]);
587
+ bbox.set_ymax (locData[startIdx + c * 4 + 3 ]);
588
+ }
573
589
}
574
590
}
575
591
}
0 commit comments