Skip to content

Commit 66feea6

Browse files
committed
region layer ocl implementation
Signed-off-by: Li Peng <peng.li@intel.com>
1 parent 7707c9b commit 66feea6

File tree

2 files changed

+182
-0
lines changed

2 files changed

+182
-0
lines changed

modules/dnn/src/layers/region_layer.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <opencv2/dnn/shape_utils.hpp>
4545
#include <opencv2/dnn/all_layers.hpp>
4646
#include <iostream>
47+
#include "opencl_kernels_dnn.hpp"
4748

4849
namespace cv
4950
{
@@ -114,11 +115,83 @@ class RegionLayerImpl : public RegionLayer
114115
}
115116
}
116117

118+
#ifdef HAVE_OPENCL
119+
bool forward_ocl(InputArrayOfArrays inps, OutputArrayOfArrays outs, OutputArrayOfArrays internals)
120+
{
121+
std::vector<UMat> inputs;
122+
std::vector<UMat> outputs;
123+
124+
inps.getUMatVector(inputs);
125+
outs.getUMatVector(outputs);
126+
127+
if (useSoftmaxTree) { // Yolo 9000
128+
CV_Error(cv::Error::StsNotImplemented, "Yolo9000 is not implemented");
129+
return false;
130+
}
131+
132+
CV_Assert(inputs.size() >= 1);
133+
int const cell_size = classes + coords + 1;
134+
UMat blob_umat = blobs[0].getUMat(ACCESS_READ);
135+
136+
for (size_t ii = 0; ii < outputs.size(); ii++)
137+
{
138+
UMat& inpBlob = inputs[ii];
139+
UMat& outBlob = outputs[ii];
140+
141+
int rows = inpBlob.size[1];
142+
int cols = inpBlob.size[2];
143+
144+
ocl::Kernel logistic_kernel("logistic_activ", ocl::dnn::region_oclsrc);
145+
size_t global = rows*cols*anchors;
146+
logistic_kernel.set(0, (int)global);
147+
logistic_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
148+
logistic_kernel.set(2, (int)cell_size);
149+
logistic_kernel.set(3, ocl::KernelArg::PtrWriteOnly(outBlob));
150+
logistic_kernel.run(1, &global, NULL, false);
151+
152+
if (useSoftmax)
153+
{
154+
// Yolo v2
155+
// softmax activation for Probability, for each grid cell (X x Y x Anchor-index)
156+
ocl::Kernel softmax_kernel("softmax_activ", ocl::dnn::region_oclsrc);
157+
size_t nthreads = rows*cols*anchors;
158+
softmax_kernel.set(0, (int)nthreads);
159+
softmax_kernel.set(1, ocl::KernelArg::PtrReadOnly(inpBlob));
160+
softmax_kernel.set(2, ocl::KernelArg::PtrReadOnly(blob_umat));
161+
softmax_kernel.set(3, (int)cell_size);
162+
softmax_kernel.set(4, (int)classes);
163+
softmax_kernel.set(5, (int)classfix);
164+
softmax_kernel.set(6, (int)rows);
165+
softmax_kernel.set(7, (int)cols);
166+
softmax_kernel.set(8, (int)anchors);
167+
softmax_kernel.set(9, (float)thresh);
168+
softmax_kernel.set(10, ocl::KernelArg::PtrWriteOnly(outBlob));
169+
if (!softmax_kernel.run(1, &nthreads, NULL, false))
170+
return false;
171+
}
172+
173+
if (nmsThreshold > 0) {
174+
Mat mat = outBlob.getMat(ACCESS_WRITE);
175+
float *dstData = mat.ptr<float>();
176+
do_nms_sort(dstData, rows*cols*anchors, nmsThreshold);
177+
//do_nms(dstData, rows*cols*anchors, nmsThreshold);
178+
}
179+
180+
}
181+
182+
return true;
183+
}
184+
#endif
185+
117186
void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr)
118187
{
119188
CV_TRACE_FUNCTION();
120189
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
121190

191+
CV_OCL_RUN((preferableTarget == DNN_TARGET_OPENCL) &&
192+
OCL_PERFORMANCE_CHECK(ocl::Device::getDefault().isIntel()),
193+
forward_ocl(inputs_arr, outputs_arr, internals_arr))
194+
122195
Layer::forward_fallback(inputs_arr, outputs_arr, internals_arr);
123196
}
124197

modules/dnn/src/opencl/region.cl

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*M///////////////////////////////////////////////////////////////////////////////////////
2+
//
3+
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4+
//
5+
// By downloading, copying, installing or using the software you agree to this license.
6+
// If you do not agree to this license, do not download, install,
7+
// copy or use the software.
8+
//
9+
//
10+
// License Agreement
11+
// For Open Source Computer Vision Library
12+
//
13+
// Copyright (c) 2016-2017 Fabian David Tschopp, all rights reserved.
14+
// Third party copyrights are property of their respective owners.
15+
//
16+
// Redistribution and use in source and binary forms, with or without modification,
17+
// are permitted provided that the following conditions are met:
18+
//
19+
// * Redistribution's of source code must retain the above copyright notice,
20+
// this list of conditions and the following disclaimer.
21+
//
22+
// * Redistribution's in binary form must reproduce the above copyright notice,
23+
// this list of conditions and the following disclaimer in the documentation
24+
// and/or other materials provided with the distribution.
25+
//
26+
// * The name of the copyright holders may not be used to endorse or promote products
27+
// derived from this software without specific prior written permission.
28+
//
29+
// This software is provided by the copyright holders and contributors "as is" and
30+
// any express or implied warranties, including, but not limited to, the implied
31+
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32+
// In no event shall the Intel Corporation or contributors be liable for any direct,
33+
// indirect, incidental, special, exemplary, or consequential damages
34+
// (including, but not limited to, procurement of substitute goods or services;
35+
// loss of use, data, or profits; or business interruption) however caused
36+
// and on any theory of liability, whether in contract, strict liability,
37+
// or tort (including negligence or otherwise) arising in any way out of
38+
// the use of this software, even if advised of the possibility of such damage.
39+
//
40+
//M*/
41+
42+
#define Dtype float
43+
44+
__kernel void logistic_activ(const int count,
45+
__global const Dtype* src,
46+
const int cell_size,
47+
__global Dtype* dst)
48+
{
49+
for (int i = get_global_id(0); i < count; i += get_global_size(0))
50+
{
51+
int index = cell_size * i;
52+
Dtype x = src[index + 4];
53+
dst[index + 4] = 1.f / (1.f + exp(-x));
54+
}
55+
}
56+
57+
__kernel void softmax_activ(const int count,
58+
__global const Dtype* src,
59+
__global const Dtype* biasData,
60+
const int cell_size,
61+
const int classes,
62+
const int classfix,
63+
const int rows,
64+
const int cols,
65+
const int anchors,
66+
const float thresh,
67+
__global Dtype* dst)
68+
{
69+
for (int index = get_global_id(0); index < count; index += get_global_size(0))
70+
{
71+
int box_index = index * cell_size;
72+
float largest = -FLT_MAX;
73+
__global const Dtype *input = src + box_index + 5;
74+
__global Dtype *output = dst + box_index + 5;
75+
76+
for (int i = 0; i < classes; ++i)
77+
largest = fmax(largest, input[i]);
78+
79+
float sum = 0;
80+
for (int i = 0; i < classes; ++i)
81+
{
82+
float e = exp((input[i] - largest));
83+
sum += e;
84+
output[i] = e;
85+
}
86+
87+
int y = index / anchors / cols;
88+
int x = index / anchors % cols;
89+
int a = index - anchors * (x + y * cols);
90+
float scale = dst[box_index + 4];
91+
if (classfix == -1 && scale < .5) scale = 0;
92+
93+
float v1 = src[box_index + 0];
94+
float v2 = src[box_index + 1];
95+
float l1 = 1.f / (1.f + exp(-v1));
96+
float l2 = 1.f / (1.f + exp(-v2));
97+
98+
dst[box_index + 0] = (x + l1) / cols;
99+
dst[box_index + 1] = (y + l2) / rows;
100+
dst[box_index + 2] = exp(src[box_index + 2]) * biasData[2 * a] / cols;
101+
dst[box_index + 3] = exp(src[box_index + 3]) * biasData[2 * a + 1] / rows;
102+
103+
for (int i = 0; i < classes; ++i)
104+
{
105+
float prob = scale * output[i] / sum;
106+
output[i] = (prob > thresh) ? prob : 0;
107+
}
108+
}
109+
}

0 commit comments

Comments
 (0)