Skip to content

Commit 6074f92

Browse files
committed
Merge pull request opencv#10228 from pengli:dnn_new
2 parents 0b688cd + 59cbaca commit 6074f92

File tree

10 files changed

+797
-21
lines changed

10 files changed

+797
-21
lines changed

modules/dnn/src/layers/concat_layer.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,13 @@ class ConcatLayerImpl : public ConcatLayer
185185
outs.getUMatVector(outputs);
186186

187187
int cAxis = clamp(axis, inputs[0].dims);
188-
if (!(cAxis == 1 && outputs[0].dims == 4 && !padding))
188+
if (padding)
189189
return false;
190190

191191
int bottom_concat_axis;
192-
int concat_size = inputs[0].size[2] * inputs[0].size[3];
193-
int top_concat_axis = outputs[0].size[1];
192+
int concat_size = total(shape(inputs[0]), cAxis + 1);
193+
int top_concat_axis = outputs[0].size[cAxis];
194+
int num_concats = total(shape(inputs[0]), 0, cAxis);
194195
int offset_concat_axis = 0;
195196
UMat& outMat = outputs[0];
196197
String buildopt = String("-DDtype=") + ocl::typeToStr(inputs[0].type()) + String(" ");
@@ -202,12 +203,12 @@ class ConcatLayerImpl : public ConcatLayer
202203
return false;
203204

204205
UMat& inpMat = inputs[i];
205-
bottom_concat_axis = inputs[i].size[1];
206+
bottom_concat_axis = inputs[i].size[cAxis];
206207
size_t nthreads = inputs[i].total();
207208

208209
kernel.set(0, (int)nthreads);
209210
kernel.set(1, ocl::KernelArg::PtrReadOnly(inpMat));
210-
kernel.set(2, (int)inputs[i].size[0]);
211+
kernel.set(2, (int)num_concats);
211212
kernel.set(3, (int)concat_size);
212213
kernel.set(4, (int)top_concat_axis);
213214
kernel.set(5, (int)bottom_concat_axis);

modules/dnn/src/layers/detection_output_layer.cpp

Lines changed: 166 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <float.h>
4646
#include <string>
4747
#include "../nms.inl.hpp"
48+
#include "opencl_kernels_dnn.hpp"
4849

4950
namespace cv
5051
{
@@ -211,11 +212,160 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
211212
return false;
212213
}
213214

215+
#ifdef HAVE_OPENCL
216+
// Decode all bboxes in a batch
217+
bool ocl_DecodeBBoxesAll(UMat& loc_mat, UMat& prior_mat,
218+
const int num, const int numPriors, const bool share_location,
219+
const int num_loc_classes, const int background_label_id,
220+
const cv::String& code_type, const bool variance_encoded_in_target,
221+
const bool clip, std::vector<LabelBBox>& all_decode_bboxes)
222+
{
223+
UMat outmat = UMat(loc_mat.dims, loc_mat.size, CV_32F);
224+
size_t nthreads = loc_mat.total();
225+
String kernel_name;
226+
227+
if (code_type == "CORNER")
228+
kernel_name = "DecodeBBoxesCORNER";
229+
else if (code_type == "CENTER_SIZE")
230+
kernel_name = "DecodeBBoxesCENTER_SIZE";
231+
else
232+
return false;
233+
234+
for (int i = 0; i < num; ++i)
235+
{
236+
ocl::Kernel kernel(kernel_name.c_str(), ocl::dnn::detection_output_oclsrc);
237+
kernel.set(0, (int)nthreads);
238+
kernel.set(1, ocl::KernelArg::PtrReadOnly(loc_mat));
239+
kernel.set(2, ocl::KernelArg::PtrReadOnly(prior_mat));
240+
kernel.set(3, (int)variance_encoded_in_target);
241+
kernel.set(4, (int)numPriors);
242+
kernel.set(5, (int)share_location);
243+
kernel.set(6, (int)num_loc_classes);
244+
kernel.set(7, (int)background_label_id);
245+
kernel.set(8, (int)clip);
246+
kernel.set(9, ocl::KernelArg::PtrWriteOnly(outmat));
247+
248+
if (!kernel.run(1, &nthreads, NULL, false))
249+
return false;
250+
}
251+
252+
all_decode_bboxes.clear();
253+
all_decode_bboxes.resize(num);
254+
{
255+
Mat mat = outmat.getMat(ACCESS_READ);
256+
const float* decode_data = mat.ptr<float>();
257+
for (int i = 0; i < num; ++i)
258+
{
259+
LabelBBox& decode_bboxes = all_decode_bboxes[i];
260+
for (int c = 0; c < num_loc_classes; ++c)
261+
{
262+
int label = share_location ? -1 : c;
263+
decode_bboxes[label].resize(numPriors);
264+
for (int p = 0; p < numPriors; ++p)
265+
{
266+
int startIdx = p * num_loc_classes * 4;
267+
util::NormalizedBBox& bbox = decode_bboxes[label][p];
268+
bbox.xmin = decode_data[startIdx + c * 4];
269+
bbox.ymin = decode_data[startIdx + c * 4 + 1];
270+
bbox.xmax = decode_data[startIdx + c * 4 + 2];
271+
bbox.ymax = decode_data[startIdx + c * 4 + 3];
272+
}
273+
}
274+
}
275+
}
276+
return true;
277+
}
278+
279+
void ocl_GetConfidenceScores(const UMat& inp1, const int num,
280+
const int numPredsPerClass, const int numClasses,
281+
std::vector<Mat>& confPreds)
282+
{
283+
int shape[] = { numClasses, numPredsPerClass };
284+
for (int i = 0; i < num; i++)
285+
confPreds.push_back(Mat(2, shape, CV_32F));
286+
287+
UMat umat = inp1.reshape(1, num * numPredsPerClass);
288+
for (int i = 0; i < num; ++i)
289+
{
290+
Range ranges[] = { Range(i * numPredsPerClass, (i + 1) * numPredsPerClass), Range::all() };
291+
transpose(umat(ranges), confPreds[i]);
292+
}
293+
}
294+
295+
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
296+
{
297+
std::vector<UMat> inputs;
298+
std::vector<UMat> outputs;
299+
300+
inps.getUMatVector(inputs);
301+
outs.getUMatVector(outputs);
302+
303+
std::vector<LabelBBox> allDecodedBBoxes;
304+
std::vector<Mat> allConfidenceScores;
305+
306+
int num = inputs[0].size[0];
307+
308+
// extract predictions from input layers
309+
{
310+
int numPriors = inputs[2].size[2] / 4;
311+
312+
// Retrieve all confidences
313+
ocl_GetConfidenceScores(inputs[1], num, numPriors, _numClasses, allConfidenceScores);
314+
315+
// Decode all loc predictions to bboxes
316+
bool ret = ocl_DecodeBBoxesAll(inputs[0], inputs[2], num, numPriors,
317+
_shareLocation, _numLocClasses, _backgroundLabelId,
318+
_codeType, _varianceEncodedInTarget, false,
319+
allDecodedBBoxes);
320+
if (!ret)
321+
return false;
322+
}
323+
324+
size_t numKept = 0;
325+
std::vector<std::map<int, std::vector<int> > > allIndices;
326+
for (int i = 0; i < num; ++i)
327+
{
328+
numKept += processDetections_(allDecodedBBoxes[i], allConfidenceScores[i], allIndices);
329+
}
330+
331+
if (numKept == 0)
332+
{
333+
// Set confidences to zeros.
334+
Range ranges[] = {Range::all(), Range::all(), Range::all(), Range(2, 3)};
335+
outputs[0](ranges).setTo(0);
336+
return true;
337+
}
338+
int outputShape[] = {1, 1, (int)numKept, 7};
339+
UMat umat = UMat(4, outputShape, CV_32F);
340+
{
341+
Mat mat = umat.getMat(ACCESS_WRITE);
342+
float* outputsData = mat.ptr<float>();
343+
344+
size_t count = 0;
345+
for (int i = 0; i < num; ++i)
346+
{
347+
count += outputDetections_(i, &outputsData[count * 7],
348+
allDecodedBBoxes[i], allConfidenceScores[i],
349+
allIndices[i]);
350+
}
351+
CV_Assert(count == numKept);
352+
}
353+
outputs.clear();
354+
outputs.push_back(umat);
355+
outs.assign(outputs);
356+
return true;
357+
}
358+
#endif
359+
214360
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
215361
{
216362
CV_TRACE_FUNCTION();
217363
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
218364

365+
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
366+
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
367+
forward_ocl(inputs_arr, outputs_arr, internals_arr))
368+
219369
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
220370
}
221371

@@ -225,7 +375,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
225375
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
226376

227377
std::vector<LabelBBox> allDecodedBBoxes;
228-
std::vector<std::vector<std::vector<float> > > allConfidenceScores;
378+
std::vector<Mat> allConfidenceScores;
229379

230380
int num = inputs[0]->size[0];
231381

@@ -286,17 +436,17 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
286436

287437
size_t outputDetections_(
288438
const int i, float* outputsData,
289-
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
439+
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
290440
const std::map<int, std::vector<int> >& indicesMap
291441
)
292442
{
293443
size_t count = 0;
294444
for (std::map<int, std::vector<int> >::const_iterator it = indicesMap.begin(); it != indicesMap.end(); ++it)
295445
{
296446
int label = it->first;
297-
if (confidenceScores.size() <= label)
447+
if (confidenceScores.rows <= label)
298448
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", label));
299-
const std::vector<float>& scores = confidenceScores[label];
449+
const std::vector<float>& scores = confidenceScores.row(label);
300450
int locLabel = _shareLocation ? -1 : label;
301451
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(locLabel);
302452
if (label_bboxes == decodeBBoxes.end())
@@ -320,7 +470,7 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
320470
}
321471

322472
size_t processDetections_(
323-
const LabelBBox& decodeBBoxes, const std::vector<std::vector<float> >& confidenceScores,
473+
const LabelBBox& decodeBBoxes, Mat& confidenceScores,
324474
std::vector<std::map<int, std::vector<int> > >& allIndices
325475
)
326476
{
@@ -330,10 +480,10 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
330480
{
331481
if (c == _backgroundLabelId)
332482
continue; // Ignore background class.
333-
if (c >= confidenceScores.size())
483+
if (c >= confidenceScores.rows)
334484
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find confidence predictions for label %d", c));
335485

336-
const std::vector<float>& scores = confidenceScores[c];
486+
const std::vector<float> scores = confidenceScores.row(c);
337487
int label = _shareLocation ? -1 : c;
338488

339489
LabelBBox::const_iterator label_bboxes = decodeBBoxes.find(label);
@@ -351,9 +501,9 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
351501
{
352502
int label = it->first;
353503
const std::vector<int>& labelIndices = it->second;
354-
if (label >= confidenceScores.size())
504+
if (label >= confidenceScores.rows)
355505
CV_ErrorNoReturn_(cv::Error::StsError, ("Could not find location predictions for label %d", label));
356-
const std::vector<float>& scores = confidenceScores[label];
506+
const std::vector<float>& scores = confidenceScores.row(label);
357507
for (size_t j = 0; j < labelIndices.size(); ++j)
358508
{
359509
size_t idx = labelIndices[j];
@@ -630,20 +780,20 @@ class DetectionOutputLayerImpl : public DetectionOutputLayer
630780
// confidence prediction for an image.
631781
static void GetConfidenceScores(const float* confData, const int num,
632782
const int numPredsPerClass, const int numClasses,
633-
std::vector<std::vector<std::vector<float> > >& confPreds)
783+
std::vector<Mat>& confPreds)
634784
{
635-
confPreds.clear(); confPreds.resize(num);
785+
int shape[] = { numClasses, numPredsPerClass };
786+
for (int i = 0; i < num; i++)
787+
confPreds.push_back(Mat(2, shape, CV_32F));
788+
636789
for (int i = 0; i < num; ++i, confData += numPredsPerClass * numClasses)
637790
{
638-
std::vector<std::vector<float> >& labelScores = confPreds[i];
639-
labelScores.resize(numClasses);
791+
Mat labelScores = confPreds[i];
640792
for (int c = 0; c < numClasses; ++c)
641793
{
642-
std::vector<float>& classLabelScores = labelScores[c];
643-
classLabelScores.resize(numPredsPerClass);
644794
for (int p = 0; p < numPredsPerClass; ++p)
645795
{
646-
classLabelScores[p] = confData[p * numClasses + c];
796+
labelScores.at<float>(c, p) = confData[p * numClasses + c];
647797
}
648798
}
649799
}

modules/dnn/src/layers/permute_layer.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "layers_common.hpp"
4545
#include <float.h>
4646
#include <algorithm>
47+
#include "opencl_kernels_dnn.hpp"
4748

4849
namespace cv
4950
{
@@ -173,6 +174,24 @@ class PermuteLayerImpl : public PermuteLayer
173174
CV_Assert((int)_numAxes == inp0.dims);
174175

175176
computeStrides(shape(*inputs[0]), shape(outputs[0]));
177+
178+
#ifdef HAVE_OPENCL
179+
if (uorder.empty())
180+
{
181+
std::vector<int> orderVec(_order.begin(), _order.end());;
182+
Mat morder(1, orderVec.size(), CV_32SC1, &orderVec[0]);
183+
184+
std::vector<int> oldStrideVec(_oldStride.begin(), _oldStride.end());
185+
Mat mold_stride(1, _oldStride.size(), CV_32SC1, &oldStrideVec[0]);
186+
187+
std::vector<int> newStrideVec(_newStride.begin(), _newStride.end());
188+
Mat mnew_stride(1, newStrideVec.size(), CV_32SC1, &newStrideVec[0]);
189+
190+
morder.copyTo(uorder);
191+
mold_stride.copyTo(uold_stride);
192+
mnew_stride.copyTo(unew_stride);
193+
}
194+
#endif
176195
}
177196

178197
class PermuteInvoker : public ParallelLoopBody
@@ -247,11 +266,47 @@ class PermuteLayerImpl : public PermuteLayer
247266
}
248267
};
249268

269+
#ifdef HAVE_OPENCL
270+
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
271+
{
272+
std::vector<UMat> inputs;
273+
std::vector<UMat> outputs;
274+
275+
inps.getUMatVector(inputs);
276+
outs.getUMatVector(outputs);
277+
278+
if (!_needsPermute)
279+
return false;
280+
281+
for (size_t i = 0; i < inputs.size(); i++)
282+
{
283+
ocl::Kernel kernel("permute", ocl::dnn::permute_oclsrc);
284+
285+
kernel.set(0, (int)_count);
286+
kernel.set(1, ocl::KernelArg::PtrReadOnly(inputs[i]));
287+
kernel.set(2, ocl::KernelArg::PtrReadOnly(uorder));
288+
kernel.set(3, ocl::KernelArg::PtrReadOnly(uold_stride));
289+
kernel.set(4, ocl::KernelArg::PtrReadOnly(unew_stride));
290+
kernel.set(5, (int)_numAxes);
291+
kernel.set(6, ocl::KernelArg::PtrWriteOnly(outputs[i]));
292+
293+
if (!kernel.run(1, &_count, NULL, false))
294+
return false;
295+
}
296+
297+
return true;
298+
}
299+
#endif
300+
250301
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
251302
{
252303
CV_TRACE_FUNCTION();
253304
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
254305

306+
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
307+
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
308+
forward_ocl(inputs_arr, outputs_arr, internals_arr))
309+
255310
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
256311
}
257312

@@ -325,6 +380,10 @@ class PermuteLayerImpl : public PermuteLayer
325380
std::vector<size_t> _newStride;
326381
bool _needsPermute;
327382

383+
#ifdef HAVE_OPENCL
384+
UMat uorder, uold_stride, unew_stride;
385+
#endif
386+
328387
size_t _numAxes;
329388
};
330389

0 commit comments

Comments
 (0)