|
51 | 51 | #include <inttypes.h>
|
52 | 52 | #endif
|
53 | 53 |
|
| 54 | +#include <opencv2/core/utils/configuration.private.hpp> |
| 55 | + |
54 | 56 | #include "opencv2/core/ocl_genbase.hpp"
|
| 57 | +#include "opencl_kernels_core.hpp" |
55 | 58 |
|
56 | 59 | #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
|
57 | 60 | #define CV_OPENCL_SHOW_RUN_ERRORS 0
|
@@ -4718,6 +4721,102 @@ const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
|
4718 | 4721 | return buf;
|
4719 | 4722 | }
|
4720 | 4723 |
|
| 4724 | +const char* getOpenCLErrorString(int errorCode) |
| 4725 | +{ |
| 4726 | + switch (errorCode) |
| 4727 | + { |
| 4728 | + case 0: return "CL_SUCCESS"; |
| 4729 | + case -1: return "CL_DEVICE_NOT_FOUND"; |
| 4730 | + case -2: return "CL_DEVICE_NOT_AVAILABLE"; |
| 4731 | + case -3: return "CL_COMPILER_NOT_AVAILABLE"; |
| 4732 | + case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; |
| 4733 | + case -5: return "CL_OUT_OF_RESOURCES"; |
| 4734 | + case -6: return "CL_OUT_OF_HOST_MEMORY"; |
| 4735 | + case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; |
| 4736 | + case -8: return "CL_MEM_COPY_OVERLAP"; |
| 4737 | + case -9: return "CL_IMAGE_FORMAT_MISMATCH"; |
| 4738 | + case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; |
| 4739 | + case -11: return "CL_BUILD_PROGRAM_FAILURE"; |
| 4740 | + case -12: return "CL_MAP_FAILURE"; |
| 4741 | + case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; |
| 4742 | + case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; |
| 4743 | + case -15: return "CL_COMPILE_PROGRAM_FAILURE"; |
| 4744 | + case -16: return "CL_LINKER_NOT_AVAILABLE"; |
| 4745 | + case -17: return "CL_LINK_PROGRAM_FAILURE"; |
| 4746 | + case -18: return "CL_DEVICE_PARTITION_FAILED"; |
| 4747 | + case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; |
| 4748 | + case -30: return "CL_INVALID_VALUE"; |
| 4749 | + case -31: return "CL_INVALID_DEVICE_TYPE"; |
| 4750 | + case -32: return "CL_INVALID_PLATFORM"; |
| 4751 | + case -33: return "CL_INVALID_DEVICE"; |
| 4752 | + case -34: return "CL_INVALID_CONTEXT"; |
| 4753 | + case -35: return "CL_INVALID_QUEUE_PROPERTIES"; |
| 4754 | + case -36: return "CL_INVALID_COMMAND_QUEUE"; |
| 4755 | + case -37: return "CL_INVALID_HOST_PTR"; |
| 4756 | + case -38: return "CL_INVALID_MEM_OBJECT"; |
| 4757 | + case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; |
| 4758 | + case -40: return "CL_INVALID_IMAGE_SIZE"; |
| 4759 | + case -41: return "CL_INVALID_SAMPLER"; |
| 4760 | + case -42: return "CL_INVALID_BINARY"; |
| 4761 | + case -43: return "CL_INVALID_BUILD_OPTIONS"; |
| 4762 | + case -44: return "CL_INVALID_PROGRAM"; |
| 4763 | + case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; |
| 4764 | + case -46: return "CL_INVALID_KERNEL_NAME"; |
| 4765 | + case -47: return "CL_INVALID_KERNEL_DEFINITION"; |
| 4766 | + case -48: return "CL_INVALID_KERNEL"; |
| 4767 | + case -49: return "CL_INVALID_ARG_INDEX"; |
| 4768 | + case -50: return "CL_INVALID_ARG_VALUE"; |
| 4769 | + case -51: return "CL_INVALID_ARG_SIZE"; |
| 4770 | + case -52: return "CL_INVALID_KERNEL_ARGS"; |
| 4771 | + case -53: return "CL_INVALID_WORK_DIMENSION"; |
| 4772 | + case -54: return "CL_INVALID_WORK_GROUP_SIZE"; |
| 4773 | + case -55: return "CL_INVALID_WORK_ITEM_SIZE"; |
| 4774 | + case -56: return "CL_INVALID_GLOBAL_OFFSET"; |
| 4775 | + case -57: return "CL_INVALID_EVENT_WAIT_LIST"; |
| 4776 | + case -58: return "CL_INVALID_EVENT"; |
| 4777 | + case -59: return "CL_INVALID_OPERATION"; |
| 4778 | + case -60: return "CL_INVALID_GL_OBJECT"; |
| 4779 | + case -61: return "CL_INVALID_BUFFER_SIZE"; |
| 4780 | + case -62: return "CL_INVALID_MIP_LEVEL"; |
| 4781 | + case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; |
| 4782 | + case -64: return "CL_INVALID_PROPERTY"; |
| 4783 | + case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; |
| 4784 | + case -66: return "CL_INVALID_COMPILER_OPTIONS"; |
| 4785 | + case -67: return "CL_INVALID_LINKER_OPTIONS"; |
| 4786 | + case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; |
| 4787 | + case -69: return "CL_INVALID_PIPE_SIZE"; |
| 4788 | + case -70: return "CL_INVALID_DEVICE_QUEUE"; |
| 4789 | + case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; |
| 4790 | + case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; |
| 4791 | + case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; |
| 4792 | + case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; |
| 4793 | + case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; |
| 4794 | + case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; |
| 4795 | + case -1024: return "clBLAS: Functionality is not implemented"; |
| 4796 | + case -1023: return "clBLAS: Library is not initialized yet"; |
| 4797 | + case -1022: return "clBLAS: Matrix A is not a valid memory object"; |
| 4798 | + case -1021: return "clBLAS: Matrix B is not a valid memory object"; |
| 4799 | + case -1020: return "clBLAS: Matrix C is not a valid memory object"; |
| 4800 | + case -1019: return "clBLAS: Vector X is not a valid memory object"; |
| 4801 | + case -1018: return "clBLAS: Vector Y is not a valid memory object"; |
| 4802 | + case -1017: return "clBLAS: An input dimension (M:N:K) is invalid"; |
| 4803 | + case -1016: return "clBLAS: Leading dimension A must not be less than the " |
| 4804 | + "size of the first dimension"; |
| 4805 | + case -1015: return "clBLAS: Leading dimension B must not be less than the " |
| 4806 | + "size of the second dimension"; |
| 4807 | + case -1014: return "clBLAS: Leading dimension C must not be less than the " |
| 4808 | + "size of the third dimension"; |
| 4809 | + case -1013: return "clBLAS: The increment for a vector X must not be 0"; |
| 4810 | + case -1012: return "clBLAS: The increment for a vector Y must not be 0"; |
| 4811 | + case -1011: return "clBLAS: The memory object for Matrix A is too small"; |
| 4812 | + case -1010: return "clBLAS: The memory object for Matrix B is too small"; |
| 4813 | + case -1009: return "clBLAS: The memory object for Matrix C is too small"; |
| 4814 | + case -1008: return "clBLAS: The memory object for Vector X is too small"; |
| 4815 | + case -1007: return "clBLAS: The memory object for Vector Y is too small"; |
| 4816 | + default: return "Unknown OpenCL error"; |
| 4817 | + } |
| 4818 | +} |
| 4819 | + |
4721 | 4820 | template <typename T>
|
4722 | 4821 | static std::string kerToStr(const Mat & k)
|
4723 | 4822 | {
|
@@ -5134,4 +5233,175 @@ bool internal::isCLBuffer(UMat& u)
|
5134 | 5233 | return true;
|
5135 | 5234 | }
|
5136 | 5235 |
|
| 5236 | +struct Timer::Impl |
| 5237 | +{ |
| 5238 | + const Queue queue; |
| 5239 | + |
| 5240 | + Impl(const Queue& q) |
| 5241 | + : queue(q) |
| 5242 | + , initted_(false) |
| 5243 | + , running_(false) |
| 5244 | + , has_run_at_least_once_(false) |
| 5245 | + { |
| 5246 | + init(); |
| 5247 | + } |
| 5248 | + |
| 5249 | + ~Impl() |
| 5250 | + { |
| 5251 | + clWaitForEvents(1, &start_gpu_cl_); |
| 5252 | + clWaitForEvents(1, &stop_gpu_cl_); |
| 5253 | + clReleaseEvent(start_gpu_cl_); |
| 5254 | + clReleaseEvent(stop_gpu_cl_); |
| 5255 | + } |
| 5256 | + |
| 5257 | + void start() |
| 5258 | + { |
| 5259 | +#ifdef HAVE_OPENCL |
| 5260 | + if (!running()) |
| 5261 | + { |
| 5262 | + clWaitForEvents(1, &start_gpu_cl_); |
| 5263 | + clReleaseEvent(start_gpu_cl_); |
| 5264 | + ocl::Kernel kernel("null_kernel_float", ocl::core::benchmark_oclsrc); |
| 5265 | + float arg = 0; |
| 5266 | + clSetKernelArg((cl_kernel)kernel.ptr(), 0, sizeof(arg), &arg); |
| 5267 | + clEnqueueTask((cl_command_queue)queue.ptr(), (cl_kernel)kernel.ptr(), 0, |
| 5268 | + NULL, &start_gpu_cl_); |
| 5269 | + clFinish((cl_command_queue)queue.ptr()); |
| 5270 | + running_ = true; |
| 5271 | + has_run_at_least_once_ = true; |
| 5272 | + } |
| 5273 | +#endif |
| 5274 | + } |
| 5275 | + |
| 5276 | + void stop() |
| 5277 | + { |
| 5278 | +#ifdef HAVE_OPENCL |
| 5279 | + if (running()) |
| 5280 | + { |
| 5281 | + clWaitForEvents(1, &stop_gpu_cl_); |
| 5282 | + clReleaseEvent(stop_gpu_cl_); |
| 5283 | + ocl::Kernel kernel("null_kernel_float", ocl::core::benchmark_oclsrc); |
| 5284 | + float arg = 0; |
| 5285 | + clSetKernelArg((cl_kernel)kernel.ptr(), 0, sizeof(arg), &arg); |
| 5286 | + clEnqueueTask((cl_command_queue)queue.ptr(), (cl_kernel)kernel.ptr(), 0, |
| 5287 | + NULL, &stop_gpu_cl_); |
| 5288 | + clFinish((cl_command_queue)queue.ptr()); |
| 5289 | + running_ = false; |
| 5290 | + } |
| 5291 | +#endif |
| 5292 | + } |
| 5293 | + |
| 5294 | + float microSeconds() |
| 5295 | + { |
| 5296 | +#ifdef HAVE_OPENCL |
| 5297 | + if (!has_run_at_least_once()) |
| 5298 | + { |
| 5299 | + return 0; |
| 5300 | + } |
| 5301 | + if (running()) |
| 5302 | + { |
| 5303 | + stop(); |
| 5304 | + } |
| 5305 | + cl_ulong startTime, stopTime; |
| 5306 | + clWaitForEvents(1, &stop_gpu_cl_); |
| 5307 | + clGetEventProfilingInfo(start_gpu_cl_, CL_PROFILING_COMMAND_END, |
| 5308 | + sizeof startTime, &startTime, NULL); |
| 5309 | + clGetEventProfilingInfo(stop_gpu_cl_, CL_PROFILING_COMMAND_START, |
| 5310 | + sizeof stopTime, &stopTime, NULL); |
| 5311 | + double us = static_cast<double>(stopTime - startTime) / 1000.0; |
| 5312 | + elapsed_microseconds_ = static_cast<float>(us); |
| 5313 | + return elapsed_microseconds_; |
| 5314 | +#else |
| 5315 | + return 0; |
| 5316 | +#endif |
| 5317 | + } |
| 5318 | + |
| 5319 | + float milliSeconds() |
| 5320 | + { |
| 5321 | +#ifdef HAVE_OPENCL |
| 5322 | + if (!has_run_at_least_once()) |
| 5323 | + { |
| 5324 | + return 0; |
| 5325 | + } |
| 5326 | + if (running()) |
| 5327 | + { |
| 5328 | + stop(); |
| 5329 | + } |
| 5330 | + cl_ulong startTime = 0, stopTime = 0; |
| 5331 | + clGetEventProfilingInfo(start_gpu_cl_, CL_PROFILING_COMMAND_END, |
| 5332 | + sizeof startTime, &startTime, NULL); |
| 5333 | + clGetEventProfilingInfo(stop_gpu_cl_, CL_PROFILING_COMMAND_START, |
| 5334 | + sizeof stopTime, &stopTime, NULL); |
| 5335 | + double ms = static_cast<double>(stopTime - startTime) / 1000000.0; |
| 5336 | + elapsed_milliseconds_ = static_cast<float>(ms); |
| 5337 | + return elapsed_milliseconds_; |
| 5338 | +#else |
| 5339 | + return 0; |
| 5340 | +#endif |
| 5341 | + } |
| 5342 | + |
| 5343 | + float seconds() |
| 5344 | + { |
| 5345 | + return milliSeconds() / 1000.f; |
| 5346 | + } |
| 5347 | + |
| 5348 | + void init() |
| 5349 | + { |
| 5350 | + CV_Assert(queue.getImpl() && queue.getImpl()->isProfilingQueue_); |
| 5351 | + if (!initted()) |
| 5352 | + { |
| 5353 | + start_gpu_cl_ = 0; |
| 5354 | + stop_gpu_cl_ = 0; |
| 5355 | + initted_ = true; |
| 5356 | + } |
| 5357 | + } |
| 5358 | + |
| 5359 | + inline bool initted() { return initted_; } |
| 5360 | + inline bool running() { return running_; } |
| 5361 | + inline bool has_run_at_least_once() { return has_run_at_least_once_; } |
| 5362 | + |
| 5363 | + bool initted_; |
| 5364 | + bool running_; |
| 5365 | + bool has_run_at_least_once_; |
| 5366 | + float elapsed_milliseconds_; |
| 5367 | + float elapsed_microseconds_; |
| 5368 | + cl_event start_gpu_cl_; |
| 5369 | + cl_event stop_gpu_cl_; |
| 5370 | +}; |
| 5371 | + |
| 5372 | +Timer::Timer(const Queue& q) |
| 5373 | +{ |
| 5374 | + p = new Impl(q); |
| 5375 | +} |
| 5376 | + |
| 5377 | +Timer::~Timer() |
| 5378 | +{ |
| 5379 | + if(p) |
| 5380 | + { |
| 5381 | + delete p; |
| 5382 | + p = 0; |
| 5383 | + } |
| 5384 | +} |
| 5385 | + |
| 5386 | +void Timer::start() |
| 5387 | +{ |
| 5388 | + if(p) |
| 5389 | + p->start(); |
| 5390 | +} |
| 5391 | + |
| 5392 | +void Timer::stop() |
| 5393 | +{ |
| 5394 | + if(p) |
| 5395 | + p->stop(); |
| 5396 | +} |
| 5397 | + |
| 5398 | +float Timer::microSeconds() |
| 5399 | +{ return p ? p->microSeconds() : 0; } |
| 5400 | + |
| 5401 | +float Timer::milliSeconds() |
| 5402 | +{ return p ? p->milliSeconds() : 0; } |
| 5403 | + |
| 5404 | +float Timer::seconds() |
| 5405 | +{ return p ? p->seconds() : 0; } |
| 5406 | + |
5137 | 5407 | }}
|
0 commit comments