Skip to content

Commit c93fb14

Browse files
committed
Merge pull request opencv#7653 from pengli:deriv
2 parents ad888c8 + 8d4a7d3 commit c93fb14

File tree

3 files changed

+319
-0
lines changed

3 files changed

+319
-0
lines changed

modules/imgproc/src/deriv.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,66 @@ static bool ipp_sobel(InputArray _src, OutputArray _dst, int ddepth, int dx, int
517517
}
518518
#endif
519519

520+
#ifdef HAVE_OPENCL
521+
namespace cv
522+
{
523+
static bool ocl_sepFilter3x3_8UC1(InputArray _src, OutputArray _dst, int ddepth,
524+
InputArray _kernelX, InputArray _kernelY, double delta, int borderType)
525+
{
526+
const ocl::Device & dev = ocl::Device::getDefault();
527+
int type = _src.type(), sdepth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
528+
529+
if ( !(dev.isIntel() && (type == CV_8UC1) && (ddepth == CV_8U) &&
530+
(_src.offset() == 0) && (_src.step() % 4 == 0) &&
531+
(_src.cols() % 16 == 0) && (_src.rows() % 2 == 0)) )
532+
return false;
533+
534+
Mat kernelX = _kernelX.getMat().reshape(1, 1);
535+
if (kernelX.cols % 2 != 1)
536+
return false;
537+
Mat kernelY = _kernelY.getMat().reshape(1, 1);
538+
if (kernelY.cols % 2 != 1)
539+
return false;
540+
541+
if (ddepth < 0)
542+
ddepth = sdepth;
543+
544+
Size size = _src.size();
545+
size_t globalsize[2] = { 0, 0 };
546+
size_t localsize[2] = { 0, 0 };
547+
548+
globalsize[0] = size.width / 16;
549+
globalsize[1] = size.height / 2;
550+
551+
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
552+
char build_opts[1024];
553+
sprintf(build_opts, "-D %s %s%s", borderMap[borderType],
554+
ocl::kernelToStr(kernelX, CV_32F, "KERNEL_MATRIX_X").c_str(),
555+
ocl::kernelToStr(kernelY, CV_32F, "KERNEL_MATRIX_Y").c_str());
556+
557+
ocl::Kernel kernel("sepFilter3x3_8UC1_cols16_rows2", cv::ocl::imgproc::sepFilter3x3_oclsrc, build_opts);
558+
if (kernel.empty())
559+
return false;
560+
561+
UMat src = _src.getUMat();
562+
_dst.create(size, CV_MAKETYPE(ddepth, cn));
563+
if (!(_dst.offset() == 0 && _dst.step() % 4 == 0))
564+
return false;
565+
UMat dst = _dst.getUMat();
566+
567+
int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(src));
568+
idxArg = kernel.set(idxArg, (int)src.step);
569+
idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
570+
idxArg = kernel.set(idxArg, (int)dst.step);
571+
idxArg = kernel.set(idxArg, (int)dst.rows);
572+
idxArg = kernel.set(idxArg, (int)dst.cols);
573+
idxArg = kernel.set(idxArg, static_cast<float>(delta));
574+
575+
return kernel.run(2, globalsize, (localsize[0] == 0) ? NULL : localsize, false);
576+
}
577+
}
578+
#endif
579+
520580
void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
521581
int ksize, double scale, double delta, int borderType )
522582
{
@@ -554,6 +614,11 @@ void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
554614
else
555615
ky *= scale;
556616
}
617+
618+
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && ksize == 3 &&
619+
(size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
620+
ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
621+
557622
sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
558623
}
559624

@@ -593,6 +658,11 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
593658
else
594659
ky *= scale;
595660
}
661+
662+
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 &&
663+
(size_t)_src.rows() > ky.total() && (size_t)_src.cols() > kx.total(),
664+
ocl_sepFilter3x3_8UC1(_src, _dst, ddepth, kx, ky, delta, borderType));
665+
596666
sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType );
597667
}
598668

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#define DIG(a) a,
6+
__constant float kx[] = { KERNEL_MATRIX_X };
7+
__constant float ky[] = { KERNEL_MATRIX_Y };
8+
9+
#define OP(delta, y, x) (convert_float16(arr[(y + delta) * 3 + x]) * ky[y] * kx[x])
10+
11+
__kernel void sepFilter3x3_8UC1_cols16_rows2(__global const uint* src, int src_step,
12+
__global uint* dst, int dst_step,
13+
int rows, int cols, float delta)
14+
{
15+
int block_x = get_global_id(0);
16+
int y = get_global_id(1) * 2;
17+
int ssx, dsx;
18+
19+
if ((block_x * 16) >= cols || y >= rows) return;
20+
21+
uint4 line[4];
22+
uint4 line_out[2];
23+
uchar a; uchar16 b; uchar c;
24+
uchar d; uchar16 e; uchar f;
25+
uchar g; uchar16 h; uchar i;
26+
uchar j; uchar16 k; uchar l;
27+
28+
ssx = dsx = 1;
29+
int src_index = block_x * 4 * ssx + (y - 1) * (src_step / 4);
30+
line[1] = vload4(0, src + src_index + (src_step / 4));
31+
line[2] = vload4(0, src + src_index + 2 * (src_step / 4));
32+
33+
#ifdef BORDER_CONSTANT
34+
line[0] = (y == 0) ? (uint4)0 : vload4(0, src + src_index);
35+
line[3] = (y == (rows - 2)) ? (uint4)0 : vload4(0, src + src_index + 3 * (src_step / 4));
36+
#elif defined BORDER_REFLECT_101
37+
line[0] = (y == 0) ? line[2] : vload4(0, src + src_index);
38+
line[3] = (y == (rows - 2)) ? line[1] : vload4(0, src + src_index + 3 * (src_step / 4));
39+
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
40+
line[0] = (y == 0) ? line[1] : vload4(0, src + src_index);
41+
line[3] = (y == (rows - 2)) ? line[2] : vload4(0, src + src_index + 3 * (src_step / 4));
42+
#endif
43+
44+
__global uchar *src_p = (__global uchar *)src;
45+
46+
src_index = block_x * 16 * ssx + (y - 1) * src_step;
47+
bool line_end = ((block_x + 1) * 16 == cols);
48+
49+
b = as_uchar16(line[0]);
50+
e = as_uchar16(line[1]);
51+
h = as_uchar16(line[2]);
52+
k = as_uchar16(line[3]);
53+
54+
#ifdef BORDER_CONSTANT
55+
a = (block_x == 0 || y == 0) ? 0 : src_p[src_index - 1];
56+
c = (line_end || y == 0) ? 0 : src_p[src_index + 16];
57+
58+
d = (block_x == 0) ? 0 : src_p[src_index + src_step - 1];
59+
f = line_end ? 0 : src_p[src_index + src_step + 16];
60+
61+
g = (block_x == 0) ? 0 : src_p[src_index + 2 * src_step - 1];
62+
i = line_end ? 0 : src_p[src_index + 2 * src_step + 16];
63+
64+
j = (block_x == 0 || y == (rows - 2)) ? 0 : src_p[src_index + 3 * src_step - 1];
65+
l = (line_end || y == (rows - 2))? 0 : src_p[src_index + 3 * src_step + 16];
66+
67+
#elif defined BORDER_REFLECT_101
68+
int offset;
69+
offset = (y == 0) ? (2 * src_step) : 0;
70+
71+
a = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
72+
c = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
73+
74+
d = (block_x == 0) ? src_p[src_index + src_step + 1] : src_p[src_index + src_step - 1];
75+
f = line_end ? src_p[src_index + src_step + 14] : src_p[src_index + src_step + 16];
76+
77+
g = (block_x == 0) ? src_p[src_index + 2 * src_step + 1] : src_p[src_index + 2 * src_step - 1];
78+
i = line_end ? src_p[src_index + 2 * src_step + 14] : src_p[src_index + 2 * src_step + 16];
79+
80+
offset = (y == (rows - 2)) ? (1 * src_step) : (3 * src_step);
81+
82+
j = (block_x == 0) ? src_p[src_index + offset + 1] : src_p[src_index + offset - 1];
83+
l = line_end ? src_p[src_index + offset + 14] : src_p[src_index + offset + 16];
84+
85+
#elif defined (BORDER_REPLICATE) || defined(BORDER_REFLECT)
86+
int offset;
87+
offset = (y == 0) ? (1 * src_step) : 0;
88+
89+
a = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
90+
c = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
91+
92+
d = (block_x == 0) ? src_p[src_index + src_step] : src_p[src_index + src_step - 1];
93+
f = line_end ? src_p[src_index + src_step + 15] : src_p[src_index + src_step + 16];
94+
95+
g = (block_x == 0) ? src_p[src_index + 2 * src_step] : src_p[src_index + 2 * src_step - 1];
96+
i = line_end ? src_p[src_index + 2 * src_step + 15] : src_p[src_index + 2 * src_step + 16];
97+
98+
offset = (y == (rows - 2)) ? (2 * src_step) : (3 * src_step);
99+
100+
j = (block_x == 0) ? src_p[src_index + offset] : src_p[src_index + offset - 1];
101+
l = line_end ? src_p[src_index + offset + 15] : src_p[src_index + offset + 16];
102+
103+
#endif
104+
105+
uchar16 arr[12];
106+
float16 sum[2];
107+
108+
arr[0] = (uchar16)(a, b.s0123, b.s456789ab, b.scde);
109+
arr[1] = b;
110+
arr[2] = (uchar16)(b.s123, b.s4567, b.s89abcdef, c);
111+
arr[3] = (uchar16)(d, e.s0123, e.s456789ab, e.scde);
112+
arr[4] = e;
113+
arr[5] = (uchar16)(e.s123, e.s4567, e.s89abcdef, f);
114+
arr[6] = (uchar16)(g, h.s0123, h.s456789ab, h.scde);
115+
arr[7] = h;
116+
arr[8] = (uchar16)(h.s123, h.s4567, h.s89abcdef, i);
117+
arr[9] = (uchar16)(j, k.s0123, k.s456789ab, k.scde);
118+
arr[10] = k;
119+
arr[11] = (uchar16)(k.s123, k.s4567, k.s89abcdef, l);
120+
121+
sum[0] = OP(0, 0, 0) + OP(0, 0, 1) + OP(0, 0, 2) +
122+
OP(0, 1, 0) + OP(0, 1, 1) + OP(0, 1, 2) +
123+
OP(0, 2, 0) + OP(0, 2, 1) + OP(0, 2, 2);
124+
125+
sum[1] = OP(1, 0, 0) + OP(1, 0, 1) + OP(1, 0, 2) +
126+
OP(1, 1, 0) + OP(1, 1, 1) + OP(1, 1, 2) +
127+
OP(1, 2, 0) + OP(1, 2, 1) + OP(1, 2, 2);
128+
129+
line_out[0] = as_uint4(convert_uchar16_sat_rte(sum[0] + delta));
130+
line_out[1] = as_uint4(convert_uchar16_sat_rte(sum[1] + delta));
131+
132+
int dst_index = block_x * 4 * dsx + y * (dst_step / 4);
133+
vstore4(line_out[0], 0, dst + dst_index);
134+
vstore4(line_out[1], 0, dst + dst_index + (dst_step / 4));
135+
}

modules/imgproc/test/ocl/test_filters.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,84 @@ OCL_TEST_P(SobelTest, Mat)
187187
}
188188
}
189189

190+
PARAM_TEST_CASE(Deriv3x3_cols16_rows2_Base, MatType,
191+
int, // kernel size
192+
Size, // dx, dy
193+
BorderType, // border type
194+
double, // optional parameter
195+
bool, // roi or not
196+
int) // width multiplier
197+
{
198+
int type, borderType, ksize;
199+
Size size;
200+
double param;
201+
bool useRoi;
202+
int widthMultiple;
203+
204+
TEST_DECLARE_INPUT_PARAMETER(src);
205+
TEST_DECLARE_OUTPUT_PARAMETER(dst);
206+
207+
virtual void SetUp()
208+
{
209+
type = GET_PARAM(0);
210+
ksize = GET_PARAM(1);
211+
size = GET_PARAM(2);
212+
borderType = GET_PARAM(3);
213+
param = GET_PARAM(4);
214+
useRoi = GET_PARAM(5);
215+
widthMultiple = GET_PARAM(6);
216+
}
217+
218+
void random_roi()
219+
{
220+
size = Size(3, 3);
221+
222+
Size roiSize = randomSize(size.width, MAX_VALUE, size.height, MAX_VALUE);
223+
roiSize.width = std::max(size.width + 13, roiSize.width & (~0xf));
224+
roiSize.height = std::max(size.height + 1, roiSize.height & (~0x1));
225+
226+
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
227+
randomSubMat(src, src_roi, roiSize, srcBorder, type, 5, 256);
228+
229+
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
230+
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -60, 70);
231+
232+
UMAT_UPLOAD_INPUT_PARAMETER(src);
233+
UMAT_UPLOAD_OUTPUT_PARAMETER(dst);
234+
}
235+
236+
void Near()
237+
{
238+
Near(1, false);
239+
}
240+
241+
void Near(double threshold, bool relative)
242+
{
243+
if (relative)
244+
OCL_EXPECT_MATS_NEAR_RELATIVE(dst, threshold);
245+
else
246+
OCL_EXPECT_MATS_NEAR(dst, threshold);
247+
}
248+
};
249+
250+
typedef Deriv3x3_cols16_rows2_Base Sobel3x3_cols16_rows2;
251+
252+
OCL_TEST_P(Sobel3x3_cols16_rows2, Mat)
253+
{
254+
int dx = size.width, dy = size.height;
255+
double scale = param;
256+
257+
for (int j = 0; j < test_loop_times; j++)
258+
{
259+
random_roi();
260+
261+
OCL_OFF(cv::Sobel(src_roi, dst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
262+
OCL_ON(cv::Sobel(usrc_roi, udst_roi, -1, dx, dy, ksize, scale, /* delta */0, borderType));
263+
264+
Near();
265+
}
266+
}
267+
190268
/////////////////////////////////////////////////////////////////////////////////////////////////
191269
// Scharr
192270

@@ -208,6 +286,24 @@ OCL_TEST_P(ScharrTest, Mat)
208286
}
209287
}
210288

289+
typedef Deriv3x3_cols16_rows2_Base Scharr3x3_cols16_rows2;
290+
291+
OCL_TEST_P(Scharr3x3_cols16_rows2, Mat)
292+
{
293+
int dx = size.width, dy = size.height;
294+
double scale = param;
295+
296+
for (int j = 0; j < test_loop_times; j++)
297+
{
298+
random_roi();
299+
300+
OCL_OFF(cv::Scharr(src_roi, dst_roi, -1, dx, dy, scale, /* delta */ 0, borderType));
301+
OCL_ON(cv::Scharr(usrc_roi, udst_roi, -1, dx, dy, scale, /* delta */ 0, borderType));
302+
303+
Near();
304+
}
305+
}
306+
211307
/////////////////////////////////////////////////////////////////////////////////////////////////
212308
// GaussianBlur
213309

@@ -552,6 +648,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, SobelTest, Combine(
552648
Bool(),
553649
Values(1))); // not used
554650

651+
OCL_INSTANTIATE_TEST_CASE_P(Filter, Sobel3x3_cols16_rows2, Combine(
652+
Values((MatType)CV_8UC1),
653+
Values(3), // kernel size
654+
Values(Size(1, 0), Size(1, 1), Size(2, 0), Size(2, 1)), // dx, dy
655+
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
656+
Values(0.0), // not used
657+
Bool(),
658+
Values(1))); // not used
659+
555660
OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
556661
FILTER_TYPES,
557662
Values(0), // not used
@@ -561,6 +666,15 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, ScharrTest, Combine(
561666
Bool(),
562667
Values(1))); // not used
563668

669+
OCL_INSTANTIATE_TEST_CASE_P(Filter, Scharr3x3_cols16_rows2, Combine(
670+
FILTER_TYPES,
671+
Values(0), // not used
672+
Values(Size(0, 1), Size(1, 0)), // dx, dy
673+
FILTER_BORDER_SET_NO_WRAP_NO_ISOLATED,
674+
Values(1.0, 0.2), // kernel scale
675+
Bool(),
676+
Values(1))); // not used
677+
564678
OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
565679
FILTER_TYPES,
566680
Values(3, 5), // kernel size

0 commit comments

Comments
 (0)