Skip to content

Commit cc7f9f5

Browse files
committed
Fix an issue with Kernel object reset release when consecutive Kernel::run calls
Kernel::run launch OCL gpu kernels and set a event callback function to decreate the ref count of UMat or remove UMat when the lauched workloads are completed. However, for some OCL kernels requires multiple call of Kernel::run function with some kernel parameter changes (e.g., input and output buffer offset) to get the final computation result. In the case, the current implementation requires unnecessary synchronization and cleanupMat. This fix requires the user to specify whether there will be more work or not. If there is no remaining computation, the Kernel::run will reset the kernel object Signed-off-by: Woo, Insoo <insoo.woo@intel.com>
1 parent 12569dc commit cc7f9f5

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

modules/core/include/opencv2/core/ocl.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,9 +578,17 @@ class CV_EXPORTS Kernel
578578
@param localsize work-group size for each dimension.
579579
@param sync specify whether to wait for OpenCL computation to finish before return.
580580
@param q command queue
581+
@param moreWorkDone specify whether there will the remaining work to be computed (more Kernel::run calls).
582+
When a computation requires multiple kernel execution by changing input and output buffer offset to get
583+
the final computation results.
584+
kernel.setArg(0, ..);
585+
kernel.setArg(1, offset);
586+
kernel.run(..., q, true);
587+
kernel.setArg(1, offset+256);
588+
kernel.run(..., q, false);
581589
*/
582590
bool run(int dims, size_t globalsize[],
583-
size_t localsize[], bool sync, const Queue& q=Queue());
591+
size_t localsize[], bool sync, const Queue& q=Queue(), bool moreWorkDone = false);
584592
bool runTask(bool sync, const Queue& q=Queue());
585593

586594
size_t workGroupSize() const;

modules/core/src/ocl.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,7 +3185,7 @@ struct Kernel::Impl
31853185

31863186
void cleanupUMats()
31873187
{
3188-
for( int i = 0; i < MAX_ARRS; i++ )
3188+
for( int i = 0; i < nu; i++ )
31893189
if( u[i] )
31903190
{
31913191
if( CV_XADD(&u[i]->urefcount, -1) == 1 )
@@ -3446,9 +3446,8 @@ int Kernel::set(int i, const KernelArg& arg)
34463446
return i+1;
34473447
}
34483448

3449-
34503449
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
3451-
bool sync, const Queue& q)
3450+
bool sync, const Queue& q, bool moreWorkDone)
34523451
{
34533452
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
34543453

@@ -3469,11 +3468,11 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
34693468
}
34703469
if( total == 0 )
34713470
return true;
3472-
if( p->haveTempDstUMats )
3471+
if( p->haveTempDstUMats && !moreWorkDone)
34733472
sync = true;
34743473
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
34753474
offset, globalsize, _localsize, 0, 0,
3476-
sync ? 0 : &p->e);
3475+
sync ? 0 : (moreWorkDone? 0: &p->e ));
34773476
#if CV_OPENCL_SHOW_RUN_ERRORS
34783477
if (retval != CL_SUCCESS)
34793478
{
@@ -3484,9 +3483,9 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
34843483
if( sync || retval != CL_SUCCESS )
34853484
{
34863485
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
3487-
p->cleanupUMats();
3486+
if (!moreWorkDone) p->cleanupUMats();
34883487
}
3489-
else
3488+
else if (!moreWorkDone)
34903489
{
34913490
p->addref();
34923491
CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);

0 commit comments

Comments
 (0)