Skip to content

Commit 75b980a

Browse files
committed
Merge pull request opencv#10120 from dkurt:remove_caffe_header_from_layer
2 parents 3cbe60c + 6c5dd5c commit 75b980a

File tree

1 file changed

+119
-104
lines changed

1 file changed

+119
-104
lines changed

modules/dnn/src/layers/detection_output_layer.cpp

Lines changed: 119 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
#include "layers_common.hpp"
4545
#include <float.h>
4646
#include <string>
47-
#include <caffe.pb.h>
4847
#include "../nms.inl.hpp"
4948

5049
namespace cv
@@ -55,14 +54,35 @@ namespace dnn
5554
namespace util
5655
{
5756

57+
class NormalizedBBox
58+
{
59+
public:
60+
float xmin, ymin, xmax, ymax;
61+
62+
NormalizedBBox()
63+
: xmin(0), ymin(0), xmax(0), ymax(0), has_size_(false), size_(0) {}
64+
65+
float size() const { return size_; }
66+
67+
bool has_size() const { return has_size_; }
68+
69+
void set_size(float value) { size_ = value; has_size_ = true; }
70+
71+
void clear_size() { size_ = 0; has_size_ = false; }
72+
73+
private:
74+
bool has_size_;
75+
float size_;
76+
};
77+
5878
template <typename T>
5979
static inline bool SortScorePairDescend(const std::pair<float, T>& pair1,
6080
const std::pair<float, T>& pair2)
6181
{
6282
return pair1.first > pair2.first;
6383
}
6484

65-
static inline float caffe_box_overlap(const caffe::NormalizedBBox& a, const caffe::NormalizedBBox& b);
85+
static inline float caffe_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b);
6686

6787
} // namespace
6888

@@ -75,8 +95,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
7595

7696
int _backgroundLabelId;
7797

78-
typedef caffe::PriorBoxParameter_CodeType CodeType;
79-
CodeType _codeType;
98+
cv::String _codeType;
8099

81100
bool _varianceEncodedInTarget;
82101
int _keepTopK;
@@ -90,7 +109,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
90109
enum { _numAxes = 4 };
91110
static const std::string _layerName;
92111

93-
typedef std::map<int, std::vector<caffe::NormalizedBBox> > LabelBBox;
112+
typedef std::map<int, std::vector<util::NormalizedBBox> > LabelBBox;
94113

95114
bool getParameterDict(const LayerParams &params,
96115
const std::string &parameterName,
@@ -135,12 +154,10 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
135154
void getCodeType(const LayerParams &params)
136155
{
137156
String codeTypeString = params.get<String>("code_type").toLowerCase();
138-
if (codeTypeString == "corner")
139-
_codeType = caffe::PriorBoxParameter_CodeType_CORNER;
140-
else if (codeTypeString == "center_size")
141-
_codeType = caffe::PriorBoxParameter_CodeType_CENTER_SIZE;
157+
if (codeTypeString == "center_size")
158+
_codeType = "CENTER_SIZE";
142159
else
143-
_codeType = caffe::PriorBoxParameter_CodeType_CORNER;
160+
_codeType = "CORNER";
144161
}
145162

146163
DetectionOutputLayerImpl(const LayerParams &params)
@@ -229,7 +246,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
229246
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
230247

231248
// Retrieve all prior bboxes
232-
std::vector<caffe::NormalizedBBox> priorBBoxes;
249+
std::vector<util::NormalizedBBox> priorBBoxes;
233250
std::vector<std::vector<float> > priorVariances;
234251
GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
235252

@@ -310,7 +327,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
310327
GetConfidenceScores(confidenceData, num, numPriors, _numClasses, allConfidenceScores);
311328

312329
// Retrieve all prior bboxes
313-
std::vector<caffe::NormalizedBBox> priorBBoxes;
330+
std::vector<util::NormalizedBBox> priorBBoxes;
314331
std::vector<std::vector<float> > priorVariances;
315332
GetPriorBBoxes(priorData, numPriors, priorBBoxes, priorVariances);
316333

@@ -370,14 +387,14 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
370387
for (size_t j = 0; j < indices.size(); ++j, ++count)
371388
{
372389
int idx = indices[j];
373-
const caffe::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
390+
const util::NormalizedBBox& decode_bbox = label_bboxes->second[idx];
374391
outputsData[count * 7] = i;
375392
outputsData[count * 7 + 1] = label;
376393
outputsData[count * 7 + 2] = scores[idx];
377-
outputsData[count * 7 + 3] = decode_bbox.xmin();
378-
outputsData[count * 7 + 4] = decode_bbox.ymin();
379-
outputsData[count * 7 + 5] = decode_bbox.xmax();
380-
outputsData[count * 7 + 6] = decode_bbox.ymax();
394+
outputsData[count * 7 + 3] = decode_bbox.xmin;
395+
outputsData[count * 7 + 4] = decode_bbox.ymin;
396+
outputsData[count * 7 + 5] = decode_bbox.xmax;
397+
outputsData[count * 7 + 6] = decode_bbox.ymax;
381398
}
382399
}
383400
return count;
@@ -454,9 +471,9 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
454471

455472
// Compute bbox size
456473
template<bool normalized>
457-
static float BBoxSize(const caffe::NormalizedBBox& bbox)
474+
static float BBoxSize(const util::NormalizedBBox& bbox)
458475
{
459-
if (bbox.xmax() < bbox.xmin() || bbox.ymax() < bbox.ymin())
476+
if (bbox.xmax < bbox.xmin || bbox.ymax < bbox.ymin)
460477
{
461478
return 0; // If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
462479
}
@@ -468,8 +485,8 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
468485
}
469486
else
470487
{
471-
float width = bbox.xmax() - bbox.xmin();
472-
float height = bbox.ymax() - bbox.ymin();
488+
float width = bbox.xmax - bbox.xmin;
489+
float height = bbox.ymax - bbox.ymin;
473490
if (normalized)
474491
{
475492
return width * height;
@@ -487,66 +504,64 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
487504
// Decode a bbox according to a prior bbox
488505
template<bool variance_encoded_in_target>
489506
static void DecodeBBox(
490-
const caffe::NormalizedBBox& prior_bbox, const std::vector<float>& prior_variance,
491-
const CodeType code_type,
492-
const bool clip_bbox, const caffe::NormalizedBBox& bbox,
493-
caffe::NormalizedBBox& decode_bbox)
507+
const util::NormalizedBBox& prior_bbox, const std::vector<float>& prior_variance,
508+
const cv::String& code_type,
509+
const bool clip_bbox, const util::NormalizedBBox& bbox,
510+
util::NormalizedBBox& decode_bbox)
494511
{
495-
float bbox_xmin = variance_encoded_in_target ? bbox.xmin() : prior_variance[0] * bbox.xmin();
496-
float bbox_ymin = variance_encoded_in_target ? bbox.ymin() : prior_variance[1] * bbox.ymin();
497-
float bbox_xmax = variance_encoded_in_target ? bbox.xmax() : prior_variance[2] * bbox.xmax();
498-
float bbox_ymax = variance_encoded_in_target ? bbox.ymax() : prior_variance[3] * bbox.ymax();
499-
switch(code_type)
500-
{
501-
case caffe::PriorBoxParameter_CodeType_CORNER:
502-
decode_bbox.set_xmin(prior_bbox.xmin() + bbox_xmin);
503-
decode_bbox.set_ymin(prior_bbox.ymin() + bbox_ymin);
504-
decode_bbox.set_xmax(prior_bbox.xmax() + bbox_xmax);
505-
decode_bbox.set_ymax(prior_bbox.ymax() + bbox_ymax);
506-
break;
507-
case caffe::PriorBoxParameter_CodeType_CENTER_SIZE:
508-
{
509-
float prior_width = prior_bbox.xmax() - prior_bbox.xmin();
510-
CV_Assert(prior_width > 0);
511-
float prior_height = prior_bbox.ymax() - prior_bbox.ymin();
512-
CV_Assert(prior_height > 0);
513-
float prior_center_x = (prior_bbox.xmin() + prior_bbox.xmax()) * .5;
514-
float prior_center_y = (prior_bbox.ymin() + prior_bbox.ymax()) * .5;
515-
516-
float decode_bbox_center_x, decode_bbox_center_y;
517-
float decode_bbox_width, decode_bbox_height;
518-
decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
519-
decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
520-
decode_bbox_width = exp(bbox_xmax) * prior_width;
521-
decode_bbox_height = exp(bbox_ymax) * prior_height;
522-
decode_bbox.set_xmin(decode_bbox_center_x - decode_bbox_width * .5);
523-
decode_bbox.set_ymin(decode_bbox_center_y - decode_bbox_height * .5);
524-
decode_bbox.set_xmax(decode_bbox_center_x + decode_bbox_width * .5);
525-
decode_bbox.set_ymax(decode_bbox_center_y + decode_bbox_height * .5);
526-
break;
527-
}
528-
default:
529-
CV_ErrorNoReturn(Error::StsBadArg, "Unknown type.");
530-
};
512+
float bbox_xmin = variance_encoded_in_target ? bbox.xmin : prior_variance[0] * bbox.xmin;
513+
float bbox_ymin = variance_encoded_in_target ? bbox.ymin : prior_variance[1] * bbox.ymin;
514+
float bbox_xmax = variance_encoded_in_target ? bbox.xmax : prior_variance[2] * bbox.xmax;
515+
float bbox_ymax = variance_encoded_in_target ? bbox.ymax : prior_variance[3] * bbox.ymax;
516+
if (code_type == "CORNER")
517+
{
518+
decode_bbox.xmin = prior_bbox.xmin + bbox_xmin;
519+
decode_bbox.ymin = prior_bbox.ymin + bbox_ymin;
520+
decode_bbox.xmax = prior_bbox.xmax + bbox_xmax;
521+
decode_bbox.ymax = prior_bbox.ymax + bbox_ymax;
522+
}
523+
else if (code_type == "CENTER_SIZE")
524+
{
525+
float prior_width = prior_bbox.xmax - prior_bbox.xmin;
526+
CV_Assert(prior_width > 0);
527+
float prior_height = prior_bbox.ymax - prior_bbox.ymin;
528+
CV_Assert(prior_height > 0);
529+
float prior_center_x = (prior_bbox.xmin + prior_bbox.xmax) * .5;
530+
float prior_center_y = (prior_bbox.ymin + prior_bbox.ymax) * .5;
531+
532+
float decode_bbox_center_x, decode_bbox_center_y;
533+
float decode_bbox_width, decode_bbox_height;
534+
decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
535+
decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
536+
decode_bbox_width = exp(bbox_xmax) * prior_width;
537+
decode_bbox_height = exp(bbox_ymax) * prior_height;
538+
decode_bbox.xmin = decode_bbox_center_x - decode_bbox_width * .5;
539+
decode_bbox.ymin = decode_bbox_center_y - decode_bbox_height * .5;
540+
decode_bbox.xmax = decode_bbox_center_x + decode_bbox_width * .5;
541+
decode_bbox.ymax = decode_bbox_center_y + decode_bbox_height * .5;
542+
}
543+
else
544+
CV_ErrorNoReturn(Error::StsBadArg, "Unknown type.");
545+
531546
if (clip_bbox)
532547
{
533-
// Clip the caffe::NormalizedBBox such that the range for each corner is [0, 1]
534-
decode_bbox.set_xmin(std::max(std::min(decode_bbox.xmin(), 1.f), 0.f));
535-
decode_bbox.set_ymin(std::max(std::min(decode_bbox.ymin(), 1.f), 0.f));
536-
decode_bbox.set_xmax(std::max(std::min(decode_bbox.xmax(), 1.f), 0.f));
537-
decode_bbox.set_ymax(std::max(std::min(decode_bbox.ymax(), 1.f), 0.f));
548+
// Clip the util::NormalizedBBox such that the range for each corner is [0, 1]
549+
decode_bbox.xmin = std::max(std::min(decode_bbox.xmin, 1.f), 0.f);
550+
decode_bbox.ymin = std::max(std::min(decode_bbox.ymin, 1.f), 0.f);
551+
decode_bbox.xmax = std::max(std::min(decode_bbox.xmax, 1.f), 0.f);
552+
decode_bbox.ymax = std::max(std::min(decode_bbox.ymax, 1.f), 0.f);
538553
}
539554
decode_bbox.clear_size();
540555
decode_bbox.set_size(BBoxSize<true>(decode_bbox));
541556
}
542557

543558
// Decode a set of bboxes according to a set of prior bboxes
544559
static void DecodeBBoxes(
545-
const std::vector<caffe::NormalizedBBox>& prior_bboxes,
560+
const std::vector<util::NormalizedBBox>& prior_bboxes,
546561
const std::vector<std::vector<float> >& prior_variances,
547-
const CodeType code_type, const bool variance_encoded_in_target,
548-
const bool clip_bbox, const std::vector<caffe::NormalizedBBox>& bboxes,
549-
std::vector<caffe::NormalizedBBox>& decode_bboxes)
562+
const cv::String& code_type, const bool variance_encoded_in_target,
563+
const bool clip_bbox, const std::vector<util::NormalizedBBox>& bboxes,
564+
std::vector<util::NormalizedBBox>& decode_bboxes)
550565
{
551566
CV_Assert(prior_bboxes.size() == prior_variances.size());
552567
CV_Assert(prior_bboxes.size() == bboxes.size());
@@ -569,11 +584,11 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
569584

570585
// Decode all bboxes in a batch
571586
static void DecodeBBoxesAll(const std::vector<LabelBBox>& all_loc_preds,
572-
const std::vector<caffe::NormalizedBBox>& prior_bboxes,
587+
const std::vector<util::NormalizedBBox>& prior_bboxes,
573588
const std::vector<std::vector<float> >& prior_variances,
574589
const int num, const bool share_location,
575590
const int num_loc_classes, const int background_label_id,
576-
const CodeType code_type, const bool variance_encoded_in_target,
591+
const cv::String& code_type, const bool variance_encoded_in_target,
577592
const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
578593
{
579594
CV_Assert(all_loc_preds.size() == num);
@@ -602,22 +617,22 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
602617
// Get prior bounding boxes from prior_data
603618
// prior_data: 1 x 2 x num_priors * 4 x 1 blob.
604619
// num_priors: number of priors.
605-
// prior_bboxes: stores all the prior bboxes in the format of caffe::NormalizedBBox.
620+
// prior_bboxes: stores all the prior bboxes in the format of util::NormalizedBBox.
606621
// prior_variances: stores all the variances needed by prior bboxes.
607622
static void GetPriorBBoxes(const float* priorData, const int& numPriors,
608-
std::vector<caffe::NormalizedBBox>& priorBBoxes,
623+
std::vector<util::NormalizedBBox>& priorBBoxes,
609624
std::vector<std::vector<float> >& priorVariances)
610625
{
611626
priorBBoxes.clear(); priorBBoxes.resize(numPriors);
612627
priorVariances.clear(); priorVariances.resize(numPriors);
613628
for (int i = 0; i < numPriors; ++i)
614629
{
615630
int startIdx = i * 4;
616-
caffe::NormalizedBBox& bbox = priorBBoxes[i];
617-
bbox.set_xmin(priorData[startIdx]);
618-
bbox.set_ymin(priorData[startIdx + 1]);
619-
bbox.set_xmax(priorData[startIdx + 2]);
620-
bbox.set_ymax(priorData[startIdx + 3]);
631+
util::NormalizedBBox& bbox = priorBBoxes[i];
632+
bbox.xmin = priorData[startIdx];
633+
bbox.ymin = priorData[startIdx + 1];
634+
bbox.xmax = priorData[startIdx + 2];
635+
bbox.ymax = priorData[startIdx + 3];
621636
bbox.set_size(BBoxSize<true>(bbox));
622637
}
623638

@@ -667,20 +682,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
667682
{
668683
labelBBox[label].resize(numPredsPerClass);
669684
}
670-
caffe::NormalizedBBox& bbox = labelBBox[label][p];
685+
util::NormalizedBBox& bbox = labelBBox[label][p];
671686
if (locPredTransposed)
672687
{
673-
bbox.set_ymin(locData[startIdx + c * 4]);
674-
bbox.set_xmin(locData[startIdx + c * 4 + 1]);
675-
bbox.set_ymax(locData[startIdx + c * 4 + 2]);
676-
bbox.set_xmax(locData[startIdx + c * 4 + 3]);
688+
bbox.ymin = locData[startIdx + c * 4];
689+
bbox.xmin = locData[startIdx + c * 4 + 1];
690+
bbox.ymax = locData[startIdx + c * 4 + 2];
691+
bbox.xmax = locData[startIdx + c * 4 + 3];
677692
}
678693
else
679694
{
680-
bbox.set_xmin(locData[startIdx + c * 4]);
681-
bbox.set_ymin(locData[startIdx + c * 4 + 1]);
682-
bbox.set_xmax(locData[startIdx + c * 4 + 2]);
683-
bbox.set_ymax(locData[startIdx + c * 4 + 3]);
695+
bbox.xmin = locData[startIdx + c * 4];
696+
bbox.ymin = locData[startIdx + c * 4 + 1];
697+
bbox.xmax = locData[startIdx + c * 4 + 2];
698+
bbox.ymax = locData[startIdx + c * 4 + 3];
684699
}
685700
}
686701
}
@@ -717,30 +732,30 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
717732

718733
// Compute the jaccard (intersection over union IoU) overlap between two bboxes.
719734
template<bool normalized>
720-
static float JaccardOverlap(const caffe::NormalizedBBox& bbox1,
721-
const caffe::NormalizedBBox& bbox2)
735+
static float JaccardOverlap(const util::NormalizedBBox& bbox1,
736+
const util::NormalizedBBox& bbox2)
722737
{
723-
caffe::NormalizedBBox intersect_bbox;
724-
if (bbox2.xmin() > bbox1.xmax() || bbox2.xmax() < bbox1.xmin() ||
725-
bbox2.ymin() > bbox1.ymax() || bbox2.ymax() < bbox1.ymin())
738+
util::NormalizedBBox intersect_bbox;
739+
if (bbox2.xmin > bbox1.xmax || bbox2.xmax < bbox1.xmin ||
740+
bbox2.ymin > bbox1.ymax || bbox2.ymax < bbox1.ymin)
726741
{
727742
// Return [0, 0, 0, 0] if there is no intersection.
728-
intersect_bbox.set_xmin(0);
729-
intersect_bbox.set_ymin(0);
730-
intersect_bbox.set_xmax(0);
731-
intersect_bbox.set_ymax(0);
743+
intersect_bbox.xmin = 0;
744+
intersect_bbox.ymin = 0;
745+
intersect_bbox.xmax = 0;
746+
intersect_bbox.ymax = 0;
732747
}
733748
else
734749
{
735-
intersect_bbox.set_xmin(std::max(bbox1.xmin(), bbox2.xmin()));
736-
intersect_bbox.set_ymin(std::max(bbox1.ymin(), bbox2.ymin()));
737-
intersect_bbox.set_xmax(std::min(bbox1.xmax(), bbox2.xmax()));
738-
intersect_bbox.set_ymax(std::min(bbox1.ymax(), bbox2.ymax()));
750+
intersect_bbox.xmin = std::max(bbox1.xmin, bbox2.xmin);
751+
intersect_bbox.ymin = std::max(bbox1.ymin, bbox2.ymin);
752+
intersect_bbox.xmax = std::min(bbox1.xmax, bbox2.xmax);
753+
intersect_bbox.ymax = std::min(bbox1.ymax, bbox2.ymax);
739754
}
740755

741756
float intersect_width, intersect_height;
742-
intersect_width = intersect_bbox.xmax() - intersect_bbox.xmin();
743-
intersect_height = intersect_bbox.ymax() - intersect_bbox.ymin();
757+
intersect_width = intersect_bbox.xmax - intersect_bbox.xmin;
758+
intersect_height = intersect_bbox.ymax - intersect_bbox.ymin;
744759
if (intersect_width > 0 && intersect_height > 0)
745760
{
746761
if (!normalized)
@@ -760,7 +775,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
760775
}
761776
};
762777

763-
float util::caffe_box_overlap(const caffe::NormalizedBBox& a, const caffe::NormalizedBBox& b)
778+
float util::caffe_box_overlap(const util::NormalizedBBox& a, const util::NormalizedBBox& b)
764779
{
765780
return DetectionOutputLayerImpl::JaccardOverlap<true>(a, b);
766781
}

0 commit comments

Comments
 (0)