Skip to content

Commit ad70ab4

Browse files
committed
ocl: workaround for getUMat()
1 parent cea2daf commit ad70ab4

File tree

6 files changed

+159
-43
lines changed

6 files changed

+159
-43
lines changed

modules/core/include/opencv2/core/mat.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ struct CV_EXPORTS UMatData
497497
void* userdata;
498498
int allocatorFlags_;
499499
int mapcount;
500+
UMatData* originalUMatData;
500501
};
501502

502503

modules/core/src/matrix.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -208,17 +208,14 @@ class StdMatAllocator : public MatAllocator
208208
if(!u)
209209
return;
210210

211-
CV_Assert(u->urefcount >= 0);
212-
CV_Assert(u->refcount >= 0);
213-
if(u->refcount == 0)
211+
CV_Assert(u->urefcount == 0);
212+
CV_Assert(u->refcount == 0);
213+
if( !(u->flags & UMatData::USER_ALLOCATED) )
214214
{
215-
if( !(u->flags & UMatData::USER_ALLOCATED) )
216-
{
217-
fastFree(u->origdata);
218-
u->origdata = 0;
219-
}
220-
delete u;
215+
fastFree(u->origdata);
216+
u->origdata = 0;
221217
}
218+
delete u;
222219
}
223220
};
224221

modules/core/src/ocl.cpp

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4453,8 +4453,11 @@ class OpenCLAllocator : public MatAllocator
44534453
#endif
44544454
{
44554455
tempUMatFlags = UMatData::TEMP_UMAT;
4456-
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
4457-
u->size, u->origdata, &retval);
4456+
if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data
4457+
{
4458+
handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
4459+
u->size, u->origdata, &retval);
4460+
}
44584461
if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
44594462
{
44604463
handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
@@ -4510,17 +4513,17 @@ class OpenCLAllocator : public MatAllocator
45104513
if(!u)
45114514
return;
45124515

4513-
CV_Assert(u->urefcount >= 0);
4514-
CV_Assert(u->refcount >= 0);
4516+
CV_Assert(u->urefcount == 0);
4517+
CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
45154518

4516-
CV_Assert(u->handle != 0 && u->urefcount == 0);
4519+
CV_Assert(u->handle != 0);
45174520
CV_Assert(u->mapcount == 0);
45184521
if(u->tempUMat())
45194522
{
45204523
CV_Assert(u->origdata);
45214524
// UMatDataAutoLock lock(u);
45224525

4523-
if( u->hostCopyObsolete() && u->refcount > 0 )
4526+
if (u->hostCopyObsolete())
45244527
{
45254528
#ifdef HAVE_OPENCL_SVM
45264529
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
@@ -4579,14 +4582,23 @@ class OpenCLAllocator : public MatAllocator
45794582
void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
45804583
(CL_MAP_READ | CL_MAP_WRITE),
45814584
0, u->size, 0, 0, 0, &retval);
4585+
CV_Assert(u->origdata == data);
45824586
CV_OclDbgAssert(retval == CL_SUCCESS);
4587+
if (u->originalUMatData)
4588+
{
4589+
CV_Assert(u->originalUMatData->data == data);
4590+
}
45834591
CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
45844592
CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
45854593
}
45864594
}
45874595
}
45884596
u->markHostCopyObsolete(false);
45894597
}
4598+
else
4599+
{
4600+
// nothing
4601+
}
45904602
#ifdef HAVE_OPENCL_SVM
45914603
if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
45924604
{
@@ -4612,16 +4624,12 @@ class OpenCLAllocator : public MatAllocator
46124624
if(u->data && u->copyOnMap() && u->data != u->origdata)
46134625
fastFree(u->data);
46144626
u->data = u->origdata;
4615-
if(u->refcount == 0)
4616-
{
4617-
u->currAllocator->deallocate(u);
4618-
u = NULL;
4619-
}
4627+
u->currAllocator->deallocate(u);
4628+
u = NULL;
46204629
}
46214630
else
46224631
{
46234632
CV_Assert(u->origdata == NULL);
4624-
CV_Assert(u->refcount == 0);
46254633
if(u->data && u->copyOnMap() && u->data != u->origdata)
46264634
{
46274635
fastFree(u->data);
@@ -4670,17 +4678,13 @@ class OpenCLAllocator : public MatAllocator
46704678
delete u;
46714679
u = NULL;
46724680
}
4673-
CV_Assert(u == NULL || u->refcount);
4681+
CV_Assert(u == NULL);
46744682
}
46754683

4684+
// synchronized call (external UMatDataAutoLock, see UMat::getMat)
46764685
void map(UMatData* u, int accessFlags) const
46774686
{
4678-
if(!u)
4679-
return;
4680-
4681-
CV_Assert( u->handle != 0 );
4682-
4683-
UMatDataAutoLock autolock(u);
4687+
CV_Assert(u && u->handle);
46844688

46854689
if(accessFlags & ACCESS_WRITE)
46864690
u->markDeviceCopyObsolete(true);

modules/core/src/umatrix.cpp

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ UMatData::UMatData(const MatAllocator* allocator)
6767
handle = 0;
6868
userdata = 0;
6969
allocatorFlags_ = 0;
70+
originalUMatData = NULL;
7071
}
7172

7273
UMatData::~UMatData()
@@ -80,6 +81,50 @@ UMatData::~UMatData()
8081
handle = 0;
8182
userdata = 0;
8283
allocatorFlags_ = 0;
84+
if (originalUMatData)
85+
{
86+
UMatData* u = originalUMatData;
87+
CV_XADD(&(u->urefcount), -1);
88+
CV_XADD(&(u->refcount), -1);
89+
bool showWarn = false;
90+
if (u->refcount == 0)
91+
{
92+
if (u->urefcount > 0)
93+
showWarn = true;
94+
// simulate Mat::deallocate
95+
if (u->mapcount != 0)
96+
{
97+
(u->currAllocator ? u->currAllocator : /* TODO allocator ? allocator :*/ Mat::getStdAllocator())->unmap(u);
98+
}
99+
else
100+
{
101+
// we don't do "map", so we can't do "unmap"
102+
}
103+
}
104+
if (u->refcount == 0 && u->urefcount == 0) // oops, we need to free resources
105+
{
106+
showWarn = true;
107+
// simulate UMat::deallocate
108+
u->currAllocator->deallocate(u);
109+
}
110+
#ifndef NDEBUG
111+
if (showWarn)
112+
{
113+
static int warn_message_showed = 0;
114+
if (warn_message_showed++ < 100)
115+
{
116+
fflush(stdout);
117+
fprintf(stderr, "\n! OPENCV warning: getUMat()/getMat() call chain possible problem."
118+
"\n! Base object is dead, while nested/derived object is still alive or processed."
119+
"\n! Please check lifetime of UMat/Mat objects!\n");
120+
fflush(stderr);
121+
}
122+
}
123+
#else
124+
(void)showWarn;
125+
#endif
126+
originalUMatData = NULL;
127+
}
83128
}
84129

85130
void UMatData::lock()
@@ -222,35 +267,61 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
222267
UMat hdr;
223268
if(!data)
224269
return hdr;
225-
CV_Assert((!u || u->mapcount==0) && "Don't get UMat from temp-Mat!");
270+
Size wholeSize;
271+
Point ofs;
272+
locateROI(wholeSize, ofs);
273+
Size sz(cols, rows);
274+
if (ofs.x != 0 || ofs.y != 0)
275+
{
276+
Mat src = *this;
277+
int dtop = ofs.y;
278+
int dbottom = wholeSize.height - src.rows - ofs.y;
279+
int dleft = ofs.x;
280+
int dright = wholeSize.width - src.cols - ofs.x;
281+
src.adjustROI(dtop, dbottom, dleft, dright);
282+
return src.getUMat(accessFlags, usageFlags)(cv::Rect(ofs.x, ofs.y, sz.width, sz.height));
283+
}
284+
CV_Assert(data == datastart);
285+
226286
accessFlags |= ACCESS_RW;
227-
UMatData* temp_u = u;
228-
if(!temp_u)
287+
UMatData* new_u = NULL;
229288
{
230289
MatAllocator *a = allocator, *a0 = getStdAllocator();
231290
if(!a)
232291
a = a0;
233-
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
292+
new_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
234293
}
235294
bool allocated = false;
236295
try
237296
{
238-
allocated = UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
297+
allocated = UMat::getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
239298
}
240299
catch (const cv::Exception& e)
241300
{
242301
fprintf(stderr, "Exception: %s\n", e.what());
243302
}
244303
if (!allocated)
245304
{
246-
allocated = getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
305+
allocated = getStdAllocator()->allocate(new_u, accessFlags, usageFlags);
247306
CV_Assert(allocated);
248307
}
308+
if (u != NULL)
309+
{
310+
#ifdef HAVE_OPENCL
311+
if (ocl::useOpenCL() && new_u->currAllocator == ocl::getOpenCLAllocator())
312+
{
313+
CV_Assert(new_u->tempUMat());
314+
}
315+
#endif
316+
new_u->originalUMatData = u;
317+
CV_XADD(&(u->refcount), 1);
318+
CV_XADD(&(u->urefcount), 1);
319+
}
249320
hdr.flags = flags;
250321
setSize(hdr, dims, size.p, step.p);
251322
finalizeHdr(hdr);
252-
hdr.u = temp_u;
253-
hdr.offset = data - datastart;
323+
hdr.u = new_u;
324+
hdr.offset = 0; //data - datastart;
254325
hdr.addref();
255326
return hdr;
256327
}
@@ -639,7 +710,6 @@ Mat UMat::getMat(int accessFlags) const
639710
{
640711
if(!u)
641712
return Mat();
642-
CV_Assert(!u->tempUMat() && "Don't get Mat from temp UMat! Use copyTo().");
643713
// TODO Support ACCESS_READ (ACCESS_WRITE) without unnecessary data transfers
644714
accessFlags |= ACCESS_RW;
645715
UMatDataAutoLock autolock(u);
@@ -668,10 +738,10 @@ void* UMat::handle(int accessFlags) const
668738
if( !u )
669739
return 0;
670740

671-
// check flags: if CPU copy is newer, copy it back to GPU.
672-
if( u->deviceCopyObsolete() )
741+
CV_Assert(u->refcount == 0);
742+
CV_Assert(!u->deviceCopyObsolete() || u->copyOnMap());
743+
if (u->deviceCopyObsolete())
673744
{
674-
CV_Assert(u->refcount == 0 || u->origdata);
675745
u->currAllocator->unmap(u);
676746
}
677747

modules/core/test/test_umat.cpp

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ TEST_P(UMatBasicTests, GetUMat)
263263
}
264264
}
265265

266-
INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U), testing::Values(1, 2),
266+
INSTANTIATE_TEST_CASE_P(UMat, UMatBasicTests, Combine(testing::Values(CV_8U, CV_64F), testing::Values(1, 2),
267267
testing::Values(cv::Size(1, 1), cv::Size(1, 128), cv::Size(128, 1), cv::Size(128, 128), cv::Size(640, 480)), Bool()));
268268

269269
//////////////////////////////////////////////////////////////// Reshape ////////////////////////////////////////////////////////////////////////
@@ -1084,7 +1084,7 @@ TEST(UMat, unmap_in_class)
10841084
Mat dst;
10851085
m.convertTo(dst, CV_32FC1);
10861086
// some additional CPU-based per-pixel processing into dst
1087-
intermediateResult = dst.getUMat(ACCESS_READ);
1087+
intermediateResult = dst.getUMat(ACCESS_READ); // this violates lifetime of base(dst) / derived (intermediateResult) objects. Use copyTo?
10881088
std::cout << "data processed..." << std::endl;
10891089
} // problem is here: dst::~Mat()
10901090
std::cout << "leave ProcessData()" << std::endl;
@@ -1285,4 +1285,48 @@ TEST(UMat, mat_umat_sync)
12851285
ASSERT_EQ(0, countNonZero(uDiff));
12861286
}
12871287

1288+
TEST(UMat, testTempObjects_UMat)
1289+
{
1290+
UMat u(10, 10, CV_8UC1, Scalar(1));
1291+
{
1292+
UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
1293+
u2.setTo(Scalar(255));
1294+
}
1295+
1296+
UMat uDiff;
1297+
compare(u, 255, uDiff, CMP_NE);
1298+
ASSERT_EQ(0, countNonZero(uDiff));
1299+
}
1300+
1301+
TEST(UMat, testTempObjects_Mat)
1302+
{
1303+
Mat m(10, 10, CV_8UC1, Scalar(1));
1304+
{
1305+
Mat m2;
1306+
ASSERT_ANY_THROW(m2 = m.getUMat(ACCESS_RW).getMat(ACCESS_RW));
1307+
}
1308+
}
1309+
1310+
TEST(UMat, testWrongLifetime_UMat)
1311+
{
1312+
UMat u(10, 10, CV_8UC1, Scalar(1));
1313+
{
1314+
UMat u2 = u.getMat(ACCESS_RW).getUMat(ACCESS_RW);
1315+
u.release(); // base object
1316+
u2.release(); // derived object, should show warning message
1317+
}
1318+
}
1319+
1320+
TEST(UMat, testWrongLifetime_Mat)
1321+
{
1322+
Mat m(10, 10, CV_8UC1, Scalar(1));
1323+
{
1324+
UMat u = m.getUMat(ACCESS_RW);
1325+
Mat m2 = u.getMat(ACCESS_RW);
1326+
m.release(); // base object
1327+
m2.release(); // map of derived object
1328+
u.release(); // derived object, should show warning message
1329+
}
1330+
}
1331+
12881332
} } // namespace cvtest::ocl

modules/video/src/lkpyramid.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ namespace cv
10091009
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
10101010
idxArg = kernel.set(idxArg, (int)iters); // int c_iters
10111011
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
1012-
return kernel.run(2, globalThreads, localThreads, false);
1012+
return kernel.run(2, globalThreads, localThreads, true); // sync=true because ocl::Image2D lifetime is not handled well for temp UMat
10131013
}
10141014
private:
10151015
inline static bool isDeviceCPU()

0 commit comments

Comments
 (0)