45
45
#include < float.h>
46
46
#include < string>
47
47
#include " ../nms.inl.hpp"
48
+ #include " opencl_kernels_dnn.hpp"
48
49
49
50
namespace cv
50
51
{
@@ -211,11 +212,160 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
211
212
return false ;
212
213
}
213
214
215
+ #ifdef HAVE_OPENCL
216
+ // Decode all bboxes in a batch
217
+ bool ocl_DecodeBBoxesAll (UMat& loc_mat, UMat& prior_mat,
218
+ const int num, const int numPriors, const bool share_location,
219
+ const int num_loc_classes, const int background_label_id,
220
+ const cv::String& code_type, const bool variance_encoded_in_target,
221
+ const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
222
+ {
223
+ UMat outmat = UMat (loc_mat.dims , loc_mat.size , CV_32F);
224
+ size_t nthreads = loc_mat.total ();
225
+ String kernel_name;
226
+
227
+ if (code_type == " CORNER" )
228
+ kernel_name = " DecodeBBoxesCORNER" ;
229
+ else if (code_type == " CENTER_SIZE" )
230
+ kernel_name = " DecodeBBoxesCENTER_SIZE" ;
231
+ else
232
+ return false ;
233
+
234
+ for (int i = 0 ; i < num; ++i)
235
+ {
236
+ ocl::Kernel kernel (kernel_name.c_str (), ocl::dnn::detection_output_oclsrc);
237
+ kernel.set (0 , (int )nthreads);
238
+ kernel.set (1 , ocl::KernelArg::PtrReadOnly (loc_mat));
239
+ kernel.set (2 , ocl::KernelArg::PtrReadOnly (prior_mat));
240
+ kernel.set (3 , (int )variance_encoded_in_target);
241
+ kernel.set (4 , (int )numPriors);
242
+ kernel.set (5 , (int )share_location);
243
+ kernel.set (6 , (int )num_loc_classes);
244
+ kernel.set (7 , (int )background_label_id);
245
+ kernel.set (8 , (int )clip);
246
+ kernel.set (9 , ocl::KernelArg::PtrWriteOnly (outmat));
247
+
248
+ if (!kernel.run (1 , &nthreads, NULL , false ))
249
+ return false ;
250
+ }
251
+
252
+ all_decode_bboxes.clear ();
253
+ all_decode_bboxes.resize (num);
254
+ {
255
+ Mat mat = outmat.getMat (ACCESS_READ);
256
+ const float * decode_data = mat.ptr <float >();
257
+ for (int i = 0 ; i < num; ++i)
258
+ {
259
+ LabelBBox& decode_bboxes = all_decode_bboxes[i];
260
+ for (int c = 0 ; c < num_loc_classes; ++c)
261
+ {
262
+ int label = share_location ? -1 : c;
263
+ decode_bboxes[label].resize (numPriors);
264
+ for (int p = 0 ; p < numPriors; ++p)
265
+ {
266
+ int startIdx = p * num_loc_classes * 4 ;
267
+ util::NormalizedBBox& bbox = decode_bboxes[label][p];
268
+ bbox.xmin = decode_data[startIdx + c * 4 ];
269
+ bbox.ymin = decode_data[startIdx + c * 4 + 1 ];
270
+ bbox.xmax = decode_data[startIdx + c * 4 + 2 ];
271
+ bbox.ymax = decode_data[startIdx + c * 4 + 3 ];
272
+ }
273
+ }
274
+ }
275
+ }
276
+ return true ;
277
+ }
278
+
279
+ void ocl_GetConfidenceScores (const UMat& inp1, const int num,
280
+ const int numPredsPerClass, const int numClasses,
281
+ std::vector<Mat>& confPreds)
282
+ {
283
+ int shape[] = { numClasses, numPredsPerClass };
284
+ for (int i = 0 ; i < num; i++)
285
+ confPreds.push_back (Mat (2 , shape, CV_32F));
286
+
287
+ UMat umat = inp1.reshape (1 , num * numPredsPerClass);
288
+ for (int i = 0 ; i < num; ++i)
289
+ {
290
+ Range ranges[] = { Range (i * numPredsPerClass, (i + 1 ) * numPredsPerClass), Range::all () };
291
+ transpose (umat (ranges), confPreds[i]);
292
+ }
293
+ }
294
+
295
+ bool forward_ocl (InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
296
+ {
297
+ std::vector<UMat> inputs;
298
+ std::vector<UMat> outputs;
299
+
300
+ inps.getUMatVector (inputs);
301
+ outs.getUMatVector (outputs);
302
+
303
+ std::vector<LabelBBox> allDecodedBBoxes;
304
+ std::vector<Mat> allConfidenceScores;
305
+
306
+ int num = inputs[0 ].size [0 ];
307
+
308
+ // extract predictions from input layers
309
+ {
310
+ int numPriors = inputs[2 ].size [2 ] / 4 ;
311
+
312
+ // Retrieve all confidences
313
+ ocl_GetConfidenceScores (inputs[1 ], num, numPriors, _numClasses, allConfidenceScores);
314
+
315
+ // Decode all loc predictions to bboxes
316
+ bool ret = ocl_DecodeBBoxesAll (inputs[0 ], inputs[2 ], num, numPriors,
317
+ _shareLocation, _numLocClasses, _backgroundLabelId,
318
+ _codeType, _varianceEncodedInTarget, false ,
319
+ allDecodedBBoxes);
320
+ if (!ret)
321
+ return false ;
322
+ }
323
+
324
+ size_t numKept = 0 ;
325
+ std::vector<std::map<int , std::vector<int > > > allIndices;
326
+ for (int i = 0 ; i < num; ++i)
327
+ {
328
+ numKept += processDetections_ (allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
329
+ }
330
+
331
+ if (numKept == 0 )
332
+ {
333
+ // Set confidences to zeros.
334
+ Range ranges[] = {Range::all (), Range::all (), Range::all (), Range (2 , 3 )};
335
+ outputs[0 ](ranges).setTo (0 );
336
+ return true ;
337
+ }
338
+ int outputShape[] = {1 , 1 , (int )numKept, 7 };
339
+ UMat umat = UMat (4 , outputShape, CV_32F);
340
+ {
341
+ Mat mat = umat.getMat (ACCESS_WRITE);
342
+ float * outputsData = mat.ptr <float >();
343
+
344
+ size_t count = 0 ;
345
+ for (int i = 0 ; i < num; ++i)
346
+ {
347
+ count += outputDetections_ (i, &outputsData[count * 7 ],
348
+ allDecodedBBoxes[i], allConfidenceScores[i],
349
+ allIndices[i]);
350
+ }
351
+ CV_Assert (count == numKept);
352
+ }
353
+ outputs.clear ();
354
+ outputs.push_back (umat);
355
+ outs.assign (outputs);
356
+ return true ;
357
+ }
358
+ #endif
359
+
214
360
void forward (InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
215
361
{
216
362
CV_TRACE_FUNCTION ();
217
363
CV_TRACE_ARG_VALUE (name, " name" , name.c_str ());
218
364
365
+ CV_OCL_RUN ((preferableTarget == DNN_TARGET_OPENCL) &&
366
+ OCL_PERFORMANCE_CHECK (ocl::Device::getDefault ().isIntel ()),
367
+ forward_ocl (inputs_arr, outputs_arr, internals_arr))
368
+
219
369
Layer::forward_fallback (inputs_arr, outputs_arr, internals_arr);
220
370
}
221
371
@@ -225,7 +375,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
225
375
CV_TRACE_ARG_VALUE (name, " name" , name.c_str ());
226
376
227
377
std::vector<LabelBBox> allDecodedBBoxes;
228
- std::vector<std::vector<std::vector< float > > > allConfidenceScores;
378
+ std::vector<Mat > allConfidenceScores;
229
379
230
380
int num = inputs[0 ]->size [0 ];
231
381
@@ -286,17 +436,17 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
286
436
287
437
size_t outputDetections_ (
288
438
const int i, float * outputsData,
289
- const LabelBBox& decodeBBoxes, const std::vector<std::vector< float > > & confidenceScores,
439
+ const LabelBBox& decodeBBoxes, Mat & confidenceScores,
290
440
const std::map<int , std::vector<int > >& indicesMap
291
441
)
292
442
{
293
443
size_t count = 0 ;
294
444
for (std::map<int , std::vector<int > >::const_iterator it = indicesMap.begin (); it != indicesMap.end (); ++it)
295
445
{
296
446
int label = it->first ;
297
- if (confidenceScores.size () <= label)
447
+ if (confidenceScores.rows <= label)
298
448
CV_ErrorNoReturn_ (cv::Error::StsError, (" Could not find confidence predictions for label %d" , label));
299
- const std::vector<float >& scores = confidenceScores[ label] ;
449
+ const std::vector<float >& scores = confidenceScores. row ( label) ;
300
450
int locLabel = _shareLocation ? -1 : label;
301
451
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find (locLabel);
302
452
if (label_bboxes == decodeBBoxes.end ())
@@ -320,7 +470,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
320
470
}
321
471
322
472
size_t processDetections_ (
323
- const LabelBBox& decodeBBoxes, const std::vector<std::vector< float > > & confidenceScores,
473
+ const LabelBBox& decodeBBoxes, Mat & confidenceScores,
324
474
std::vector<std::map<int , std::vector<int > > >& allIndices
325
475
)
326
476
{
@@ -330,10 +480,10 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
330
480
{
331
481
if (c == _backgroundLabelId)
332
482
continue ; // Ignore background class.
333
- if (c >= confidenceScores.size () )
483
+ if (c >= confidenceScores.rows )
334
484
CV_ErrorNoReturn_ (cv::Error::StsError, (" Could not find confidence predictions for label %d" , c));
335
485
336
- const std::vector<float >& scores = confidenceScores[c] ;
486
+ const std::vector<float > scores = confidenceScores. row (c) ;
337
487
int label = _shareLocation ? -1 : c;
338
488
339
489
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find (label);
@@ -351,9 +501,9 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
351
501
{
352
502
int label = it->first ;
353
503
const std::vector<int >& labelIndices = it->second ;
354
- if (label >= confidenceScores.size () )
504
+ if (label >= confidenceScores.rows )
355
505
CV_ErrorNoReturn_ (cv::Error::StsError, (" Could not find location predictions for label %d" , label));
356
- const std::vector<float >& scores = confidenceScores[ label] ;
506
+ const std::vector<float >& scores = confidenceScores. row ( label) ;
357
507
for (size_t j = 0 ; j < labelIndices.size (); ++j)
358
508
{
359
509
size_t idx = labelIndices[j];
@@ -630,20 +780,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
630
780
// confidence prediction for an image.
631
781
static void GetConfidenceScores (const float * confData, const int num,
632
782
const int numPredsPerClass, const int numClasses,
633
- std::vector<std::vector<std::vector< float > > >& confPreds)
783
+ std::vector<Mat >& confPreds)
634
784
{
635
- confPreds.clear (); confPreds.resize (num);
785
+ int shape[] = { numClasses, numPredsPerClass };
786
+ for (int i = 0 ; i < num; i++)
787
+ confPreds.push_back (Mat (2 , shape, CV_32F));
788
+
636
789
for (int i = 0 ; i < num; ++i, confData += numPredsPerClass * numClasses)
637
790
{
638
- std::vector<std::vector<float > >& labelScores = confPreds[i];
639
- labelScores.resize (numClasses);
791
+ Mat labelScores = confPreds[i];
640
792
for (int c = 0 ; c < numClasses; ++c)
641
793
{
642
- std::vector<float >& classLabelScores = labelScores[c];
643
- classLabelScores.resize (numPredsPerClass);
644
794
for (int p = 0 ; p < numPredsPerClass; ++p)
645
795
{
646
- classLabelScores[p] = confData[p * numClasses + c];
796
+ labelScores. at < float >(c, p) = confData[p * numClasses + c];
647
797
}
648
798
}
649
799
}
0 commit comments