Skip to content

Commit 1283d62

Browse files
committed
ocl: Kernel::runProfiling()
1 parent d9ab314 commit 1283d62

File tree

2 files changed

+62
-15
lines changed

2 files changed

+62
-15
lines changed

modules/core/include/opencv2/core/ocl.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,12 @@ class CV_EXPORTS Kernel
573573
size_t localsize[], bool sync, const Queue& q=Queue());
574574
bool runTask(bool sync, const Queue& q=Queue());
575575

576+
/** @brief Similar to synchronized run() call with returning of kernel execution time
577+
* Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
578+
* @return Execution time in nanoseconds or negative number on error
579+
*/
580+
int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
581+
576582
size_t workGroupSize() const;
577583
size_t preferedWorkGroupSizeMultiple() const;
578584
bool compileWorkGroupSize(size_t wsz[]) const;

modules/core/src/ocl.cpp

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,6 +2094,9 @@ struct Kernel::Impl
20942094
release();
20952095
}
20962096

2097+
bool run(int dims, size_t _globalsize[], size_t _localsize[],
2098+
bool sync, int64* timeNS, const Queue& q);
2099+
20972100
~Impl()
20982101
{
20992102
if(handle)
@@ -2321,19 +2324,15 @@ int Kernel::set(int i, const KernelArg& arg)
23212324
return i+1;
23222325
}
23232326

2324-
23252327
bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
23262328
bool sync, const Queue& q)
23272329
{
2328-
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
2329-
2330-
if(!p || !p->handle || p->isInProgress)
2330+
if (!p)
23312331
return false;
23322332

2333-
cl_command_queue qq = getQueue(q);
23342333
size_t globalsize[CV_MAX_DIM] = {1,1,1};
23352334
size_t total = 1;
2336-
CV_Assert(_globalsize != 0);
2335+
CV_Assert(_globalsize != NULL);
23372336
for (int i = 0; i < dims; i++)
23382337
{
23392338
size_t val = _localsize ? _localsize[i] :
@@ -2345,29 +2344,60 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
23452344
globalsize[i] = divUp(_globalsize[i], (unsigned int)val) * val;
23462345
}
23472346
CV_Assert(total > 0);
2348-
if( p->haveTempDstUMats )
2347+
2348+
return p->run(dims, globalsize, _localsize, sync, NULL, q);
2349+
}
2350+
2351+
2352+
bool Kernel::Impl::run(int dims, size_t globalsize[], size_t localsize[],
2353+
bool sync, int64* timeNS, const Queue& q)
2354+
{
2355+
CV_INSTRUMENT_REGION_OPENCL_RUN(p->name.c_str());
2356+
2357+
if (!handle || isInProgress)
2358+
return false;
2359+
2360+
cl_command_queue qq = getQueue(q);
2361+
if (haveTempDstUMats)
2362+
sync = true;
2363+
if (timeNS)
23492364
sync = true;
23502365
cl_event asyncEvent = 0;
2351-
cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
2352-
NULL, globalsize, _localsize, 0, 0,
2353-
sync ? 0 : &asyncEvent);
2366+
cl_int retval = clEnqueueNDRangeKernel(qq, handle, (cl_uint)dims,
2367+
NULL, globalsize, localsize, 0, 0,
2368+
(sync && !timeNS) ? 0 : &asyncEvent);
23542369
#if CV_OPENCL_SHOW_RUN_ERRORS
23552370
if (retval != CL_SUCCESS)
23562371
{
23572372
printf("OpenCL program returns error: %d\n", retval);
23582373
fflush(stdout);
23592374
}
23602375
#endif
2361-
if( sync || retval != CL_SUCCESS )
2376+
if (sync || retval != CL_SUCCESS)
23622377
{
23632378
CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
2364-
p->cleanupUMats();
2379+
if (timeNS)
2380+
{
2381+
if (retval == CL_SUCCESS)
2382+
{
2383+
clWaitForEvents(1, &asyncEvent);
2384+
cl_ulong startTime, stopTime;
2385+
CV_Assert(CL_SUCCESS == clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_START, sizeof(startTime), &startTime, NULL));
2386+
CV_Assert(CL_SUCCESS == clGetEventProfilingInfo(asyncEvent, CL_PROFILING_COMMAND_END, sizeof(stopTime), &stopTime, NULL));
2387+
*timeNS = (int64)(stopTime - startTime);
2388+
}
2389+
else
2390+
{
2391+
*timeNS = -1;
2392+
}
2393+
}
2394+
cleanupUMats();
23652395
}
23662396
else
23672397
{
2368-
p->addref();
2369-
p->isInProgress = true;
2370-
CV_OclDbgAssert(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
2398+
addref();
2399+
isInProgress = true;
2400+
CV_OclDbgAssert(clSetEventCallback(asyncEvent, CL_COMPLETE, oclCleanupCallback, this) == CL_SUCCESS);
23712401
}
23722402
if (asyncEvent)
23732403
clReleaseEvent(asyncEvent);
@@ -2398,6 +2428,17 @@ bool Kernel::runTask(bool sync, const Queue& q)
23982428
return retval == CL_SUCCESS;
23992429
}
24002430

2431+
int64 Kernel::runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q_)
2432+
{
2433+
CV_Assert(p && p->handle && !p->isInProgress);
2434+
Queue q = q_.ptr() ? q_ : Queue::getDefault();
2435+
CV_Assert(q.ptr());
2436+
q.finish(); // call clFinish() on base queue
2437+
Queue profilingQueue = q.getProfilingQueue();
2438+
int64 timeNs = -1;
2439+
bool res = p->run(dims, globalsize, localsize, true, &timeNs, profilingQueue);
2440+
return res ? timeNs : -1;
2441+
}
24012442

24022443
size_t Kernel::workGroupSize() const
24032444
{

0 commit comments

Comments
 (0)