Skip to content

Commit 5ce38e5

Browse files
vpisarevalalek
authored andcommitted
Merge pull request opencv#10223 from vpisarev:ocl_mac_fixes
* fixed OpenCL functions on Mac, so that the tests pass * fixed compile warnings; temporarily disabled OCL branch of TV L1 optical flow on mac * fixed other few warnings on macos
1 parent a3ec2ac commit 5ce38e5

File tree

4 files changed

+98
-10
lines changed

4 files changed

+98
-10
lines changed

modules/core/src/ocl.cpp

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4172,13 +4172,13 @@ class AlignedDataPtr2D
41724172
size_t step_;
41734173

41744174
public:
4175-
AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment)
4175+
AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment, size_t extrabytes=0)
41764176
: size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
41774177
{
41784178
CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
4179-
if (((size_t)ptr_ & (alignment - 1)) != 0)
4179+
if (ptr == 0 || ((size_t)ptr_ & (alignment - 1)) != 0)
41804180
{
4181-
allocatedPtr_ = new uchar[size_ + alignment - 1];
4181+
allocatedPtr_ = new uchar[size_ + extrabytes + alignment - 1];
41824182
ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
41834183
if (readAccess)
41844184
{
@@ -4978,6 +4978,25 @@ class OpenCLAllocator : public MatAllocator
49784978
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
49794979
srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
49804980
}
4981+
#ifdef __APPLE__
4982+
else
4983+
{
4984+
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
4985+
size_t new_srcrawofs = srcrawofs & ~(padding-1);
4986+
size_t membuf_ofs = srcrawofs - new_srcrawofs;
4987+
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_srcstep[0], new_srcstep[0],
4988+
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
4989+
uchar* ptr = alignedPtr.getAlignedPtr();
4990+
4991+
CV_Assert(new_srcstep[0] >= new_sz[0]);
4992+
total = alignSize(new_srcstep[0]*new_sz[1] + membuf_ofs, padding);
4993+
total = std::min(total, u->size - new_srcrawofs);
4994+
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
4995+
new_srcrawofs, total, ptr, 0, 0, 0));
4996+
for( size_t i = 0; i < new_sz[1]; i++ )
4997+
memcpy( (uchar*)dstptr + i*new_dststep[0], ptr + i*new_srcstep[0] + membuf_ofs, new_sz[0]);
4998+
}
4999+
#else
49815000
else
49825001
{
49835002
AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
@@ -4989,6 +5008,7 @@ class OpenCLAllocator : public MatAllocator
49895008
new_dststep[0], 0,
49905009
ptr, 0, 0, 0));
49915010
}
5011+
#endif
49925012
}
49935013
}
49945014

@@ -5095,6 +5115,30 @@ class OpenCLAllocator : public MatAllocator
50955115
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
50965116
dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0));
50975117
}
5118+
#ifdef __APPLE__
5119+
else
5120+
{
5121+
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5122+
size_t new_dstrawofs = dstrawofs & ~(padding-1);
5123+
size_t membuf_ofs = dstrawofs - new_dstrawofs;
5124+
AlignedDataPtr2D<false, false> alignedPtr(0, new_sz[1], new_dststep[0], new_dststep[0],
5125+
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5126+
uchar* ptr = alignedPtr.getAlignedPtr();
5127+
5128+
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5129+
total = alignSize(new_dststep[0]*new_sz[1] + membuf_ofs, padding);
5130+
total = std::min(total, u->size - new_dstrawofs);
5131+
/*printf("new_sz0=%d, new_sz1=%d, membuf_ofs=%d, total=%d (%08x), new_dstrawofs=%d (%08x)\n",
5132+
(int)new_sz[0], (int)new_sz[1], (int)membuf_ofs,
5133+
(int)total, (int)total, (int)new_dstrawofs, (int)new_dstrawofs);*/
5134+
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
5135+
new_dstrawofs, total, ptr, 0, 0, 0));
5136+
for( size_t i = 0; i < new_sz[1]; i++ )
5137+
memcpy( ptr + i*new_dststep[0] + membuf_ofs, (uchar*)srcptr + i*new_srcstep[0], new_sz[0]);
5138+
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
5139+
new_dstrawofs, total, ptr, 0, 0, 0));
5140+
}
5141+
#else
50985142
else
50995143
{
51005144
AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
@@ -5106,6 +5150,7 @@ class OpenCLAllocator : public MatAllocator
51065150
new_srcstep[0], 0,
51075151
ptr, 0, 0, 0));
51085152
}
5153+
#endif
51095154
}
51105155
u->markHostCopyObsolete(true);
51115156
#ifdef HAVE_OPENCL_SVM
@@ -5247,6 +5292,41 @@ class OpenCLAllocator : public MatAllocator
52475292
CV_OCL_CHECK(retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
52485293
srcrawofs, dstrawofs, total, 0, 0, 0));
52495294
}
5295+
#ifdef __APPLE__
5296+
else
5297+
{
5298+
const size_t padding = CV_OPENCL_DATA_PTR_ALIGNMENT;
5299+
size_t new_srcrawofs = srcrawofs & ~(padding-1);
5300+
size_t srcmembuf_ofs = srcrawofs - new_srcrawofs;
5301+
size_t new_dstrawofs = dstrawofs & ~(padding-1);
5302+
size_t dstmembuf_ofs = dstrawofs - new_dstrawofs;
5303+
5304+
AlignedDataPtr2D<false, false> srcBuf(0, new_sz[1], new_srcstep[0], new_srcstep[0],
5305+
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5306+
AlignedDataPtr2D<false, false> dstBuf(0, new_sz[1], new_dststep[0], new_dststep[0],
5307+
CV_OPENCL_DATA_PTR_ALIGNMENT, padding*2);
5308+
uchar* srcptr = srcBuf.getAlignedPtr();
5309+
uchar* dstptr = dstBuf.getAlignedPtr();
5310+
5311+
CV_Assert(new_dststep[0] >= new_sz[0] && new_srcstep[0] >= new_sz[0]);
5312+
5313+
size_t src_total = alignSize(new_srcstep[0]*new_sz[1] + srcmembuf_ofs, padding);
5314+
src_total = std::min(src_total, src->size - new_srcrawofs);
5315+
size_t dst_total = alignSize(new_dststep[0]*new_sz[1] + dstmembuf_ofs, padding);
5316+
dst_total = std::min(dst_total, dst->size - new_dstrawofs);
5317+
5318+
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)src->handle, CL_TRUE,
5319+
new_srcrawofs, src_total, srcptr, 0, 0, 0));
5320+
CV_OCL_CHECK(clEnqueueReadBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5321+
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5322+
5323+
for( size_t i = 0; i < new_sz[1]; i++ )
5324+
memcpy( dstptr + dstmembuf_ofs + i*new_dststep[0],
5325+
srcptr + srcmembuf_ofs + i*new_srcstep[0], new_sz[0]);
5326+
CV_OCL_CHECK(clEnqueueWriteBuffer(q, (cl_mem)dst->handle, CL_TRUE,
5327+
new_dstrawofs, dst_total, dstptr, 0, 0, 0));
5328+
}
5329+
#else
52505330
else
52515331
{
52525332
CV_OCL_CHECK(retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
@@ -5255,6 +5335,7 @@ class OpenCLAllocator : public MatAllocator
52555335
new_dststep[0], 0,
52565336
0, 0, 0));
52575337
}
5338+
#endif
52585339
}
52595340
if (retval == CL_SUCCESS)
52605341
{

modules/core/src/stat.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,11 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr
33593359
normType &= ~NORM_RELATIVE;
33603360
bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
33613361

3362+
#ifdef __APPLE__
3363+
if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
3364+
return false;
3365+
#endif
3366+
33623367
if (normsum)
33633368
{
33643369
if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?

modules/imgproc/src/morph.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,6 +1403,7 @@ void morph(int op, int src_type, int dst_type,
14031403

14041404
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
14051405

1406+
#ifndef __APPLE__
14061407
static bool ocl_morph3x3_8UC1( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor,
14071408
int op, int actual_op = -1, InputArray _extraMat = noArray())
14081409
{
@@ -1628,16 +1629,15 @@ static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kerne
16281629
}
16291630

16301631
return kernel.run(2, globalsize, NULL, false);
1631-
16321632
}
1633+
#endif
16331634

16341635
static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
16351636
Point anchor, int iterations, int op, int borderType,
16361637
const Scalar &, int actual_op = -1, InputArray _extraMat = noArray())
16371638
{
16381639
const ocl::Device & dev = ocl::Device::getDefault();
1639-
int type = _src.type(), depth = CV_MAT_DEPTH(type),
1640-
cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
1640+
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
16411641
Mat kernel = _kernel.getMat();
16421642
Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size();
16431643

@@ -1664,14 +1664,13 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
16641664
iterations = 1;
16651665
}
16661666

1667+
#ifndef __APPLE__
1668+
int esz = CV_ELEM_SIZE(type);
16671669
// try to use OpenCL kernel adopted for small morph kernel
1668-
if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
1670+
if (dev.isIntel() &&
16691671
((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
16701672
(ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
16711673
(iterations == 1)
1672-
#if defined __APPLE__
1673-
&& cn == 1
1674-
#endif
16751674
)
16761675
{
16771676
if (ocl_morph3x3_8UC1(_src, _dst, kernel, anchor, op, actual_op, _extraMat))
@@ -1680,6 +1679,7 @@ static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
16801679
if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
16811680
return true;
16821681
}
1682+
#endif
16831683

16841684
if (iterations == 0 || kernel.rows*kernel.cols == 1)
16851685
{

modules/video/src/tvl1flow.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,9 +392,11 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray
392392
{
393393
CV_INSTRUMENT_REGION()
394394

395+
#ifndef __APPLE__
395396
CV_OCL_RUN(_flow.isUMat() &&
396397
ocl::Image2D::isFormatSupported(CV_32F, 1, false),
397398
calc_ocl(_I0, _I1, _flow))
399+
#endif
398400

399401
Mat I0 = _I0.getMat();
400402
Mat I1 = _I1.getMat();

0 commit comments

Comments
 (0)