Skip to content

Commit 04edc8f

Browse files
wzw-intelpli2-intel
authored andcommitted
cleanup ocl4dnn spatial convolution kernels
remove unused macros and half definition macros, also remove unused ocl::Queue Signed-off-by: Li Peng <peng.li@intel.com>
1 parent 55260a8 commit 04edc8f

File tree

6 files changed

+0
-21
lines changed

6 files changed

+0
-21
lines changed

modules/dnn/src/ocl4dnn/src/math_functions.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ ocl::Image2D ocl4dnnGEMMCopyBufferToImage(UMat buffer, int offset,
6565
int padded_width, int height,
6666
int width, int ld)
6767
{
68-
ocl::Context ctx = ocl::Context::getDefault();
69-
ocl::Queue queue = ocl::Queue::getDefault();
7068
ocl::Image2D image;
7169

7270
if (!is_matrix_a && transpose)
@@ -192,9 +190,6 @@ static bool ocl4dnnFastImageGEMM(const CBLAS_TRANSPOSE TransA,
192190
// just padding one line is enough as the sub group block read
193191
// will clamp to edge according to the spec.
194192

195-
ocl::Context ctx = ocl::Context::getDefault();
196-
ocl::Queue queue = ocl::Queue::getDefault();
197-
198193
ocl::Image2D ImA;
199194
ocl::Image2D ImB;
200195

@@ -446,7 +441,6 @@ bool ocl4dnnGEMV<float>(const CBLAS_TRANSPOSE TransA,
446441
const int32_t offx, const float beta, UMat y,
447442
const int32_t offy)
448443
{
449-
ocl::Queue queue = ocl::Queue::getDefault();
450444
bool ret = false;
451445

452446
if (TransA == CblasNoTrans)
@@ -507,8 +501,6 @@ bool ocl4dnnAXPY(const int32_t N, const Dtype alpha,
507501
const UMat X, const int32_t offX, UMat Y,
508502
const int32_t offY)
509503
{
510-
ocl::Context ctx = ocl::Context::getDefault();
511-
512504
ocl::Kernel oclk_axpy(CL_KERNEL_SELECT("axpy"), cv::ocl::dnn::math_oclsrc);
513505
if (oclk_axpy.empty())
514506
return false;

modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,6 @@ void OCL4DNNConvSpatial<Dtype>::collectCommonInformation()
184184
addDef("as_Dtype2", "as_float2");
185185
addDef("as_Dtype4", "as_float4");
186186
addDef("as_Dtype8", "as_float8");
187-
addDef("Dtype_ID", (int)CV_32F);
188-
addDef("Dtype_SIZE", (int)sizeof(Dtype));
189187
}
190188

191189
typedef enum {

modules/dnn/src/ocl4dnn/src/ocl4dnn_lrn.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ bool OCL4DNNLRN<Dtype>::Forward(const UMat& bottom, UMat& top)
9292
template<typename Dtype>
9393
bool OCL4DNNLRN<Dtype>::crossChannelForward(const UMat& bottom, UMat& top)
9494
{
95-
ocl::Queue queue = ocl::Queue::getDefault();
9695
CHECK_EQ(phase_test_, true) << "Only support forward inference.";
9796

9897
cl_uint argIdx = 0;

modules/dnn/src/ocl4dnn/src/ocl4dnn_pool.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ bool OCL4DNNPool<Dtype>::Forward(const UMat& bottom,
9797
UMat& top_mask)
9898
{
9999
bool ret = true;
100-
ocl::Queue queue = ocl::Queue::getDefault();
101100
size_t global[] = { 128 * 128 };
102101
size_t local[] = { 128 };
103102
cl_uint argIdx = 0;

modules/dnn/src/ocl4dnn/src/ocl4dnn_softmax.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ template<typename Dtype>
8383
bool OCL4DNNSoftmax<Dtype>::Forward(const UMat& bottom, UMat& top)
8484
{
8585
bool ret = false;
86-
ocl::Queue queue = ocl::Queue::getDefault();
8786
bool intel_subgroup = ocl::Device::getDefault().intelSubgroupsSupport();
8887
if (intel_subgroup && inner_num_ < 128)
8988
{

modules/dnn/src/opencl/conv_layer_spatial.cl

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@
8282
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
8383

8484
#if defined(convolve_simd) || defined(Conv_Interleaved)
85-
#if Dtype_SIZE == 4
8685
#define INT_TYPE uint
8786
#define INT_TYPE2 uint2
8887
#define INT_TYPE4 uint4
@@ -91,9 +90,6 @@
9190
#define SUB_GROUP_BLOCK_READ4 intel_sub_group_block_read4
9291
#define SUB_GROUP_BLOCK_READ8 intel_sub_group_block_read8
9392
#define SUB_GROUP_BLOCK_READ intel_sub_group_block_read
94-
#else
95-
#error "Unsupported type"
96-
#endif
9793
#endif
9894

9995
#ifdef KERNEL_BASIC
@@ -176,11 +172,7 @@ __kernel void ConvolveBasic(
176172

177173
#elif defined KERNEL_IDLF
178174

179-
#if TYPE == TYPE_HALF
180-
#define VLOAD4(_v, _p) do { (_v).s0 = *(_p); (_v).s1 = *(_p + 1); (_v).s2 = *(_p + 2); (_v).s3 = *(_p + 3); } while(0)
181-
#else
182175
#define VLOAD4(_v, _p) do { _v = vload4(0, _p); } while(0)
183-
#endif
184176

185177
// Each work-item computes a OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT region of one output map.
186178
// Each work-group (which will be mapped to 1 SIMD16/SIMD8 EU thread) will compute 16/8 different feature maps, but each feature map is for the same region of the imput image.

0 commit comments

Comments
 (0)