Skip to content

Commit 55dabd2

Browse files
committed
Merge pull request opencv#10097 from wzw-intel:tuning_time
2 parents 1fbdca8 + 88e6daa commit 55dabd2

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,7 @@ class OCL4DNNConvSpatial
217217
bool convolve(const UMat &bottom, UMat &top,
218218
const UMat &weight, const UMat &bias,
219219
int32_t numImages,
220-
kernelConfig* config,
221-
const cv::ocl::Queue& queue);
220+
kernelConfig* config);
222221
float timedConvolve(const UMat &bottom, UMat &top,
223222
const UMat &weight, const UMat &bias,
224223
int32_t numImages, kernelConfig* config);

modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
381381
prepareKernel(bottom, top, weight, bias, numImages);
382382
if (bestKernelConfig.empty())
383383
return false;
384-
return convolve(bottom, top, weight, bias, numImages, bestKernelConfig, cv::ocl::Queue::getDefault());
384+
return convolve(bottom, top, weight, bias, numImages, bestKernelConfig);
385385
}
386386

387387
template<typename Dtype>
@@ -392,7 +392,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
392392
options_.str(""); options_.clear(); // clear contents and state flags
393393
createBasicKernel(1, 1, 1);
394394
kernel_index_ = kernelQueue.size() - 1;
395-
convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_], cv::ocl::Queue::getDefault());
395+
convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_]);
396396
CV_Assert(phash.find(kernelQueue[kernel_index_]->kernelName) != phash.end());
397397
//unloadProgram(kernelQueue[kernel_index_]->kernelName);
398398
kernelQueue.pop_back();
@@ -649,8 +649,7 @@ void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_bu
649649
template<>
650650
bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
651651
const UMat &weight, const UMat &bias,
652-
int32_t numImages, kernelConfig* config,
653-
const cv::ocl::Queue& queue)
652+
int32_t numImages, kernelConfig* config)
654653
{
655654
ocl::Program program;
656655
phash_t::iterator it = phash.find(config->kernelName);
@@ -926,17 +925,17 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
926925
const UMat &weight, const UMat &bias,
927926
int32_t numImages, kernelConfig* config)
928927
{
929-
cv::ocl::Queue profilingQueue;
928+
cv::ocl::Queue queue;
930929
try
931930
{
932-
profilingQueue = cv::ocl::Queue::getDefault().getProfilingQueue();
931+
queue = cv::ocl::Queue::getDefault();
933932
}
934933
catch (const cv::Exception&)
935934
{
936935
static int warn_ = 0;
937936
if (!warn_)
938937
{
939-
std::cout << "OpenCV(ocl4dnn): Can't create OpenCL profiling queue for auto-tuning." << std::endl;
938+
std::cout << "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning." << std::endl;
940939
warn_ = true;
941940
}
942941
return 1e6;
@@ -945,16 +944,16 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
945944
// warm up.
946945
bool saved_tuned = tuned_;
947946
tuned_ = false;
948-
convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
947+
convolve(bottom, top, weight, bias, numImages, config);
949948

950-
cv::ocl::Timer timer(profilingQueue);
949+
cv::ocl::Timer timer(queue);
951950
timer.start();
952951
bool res = true;;
953952
dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
954953
tuned_ = true;
955954
int loop_cnt = 4;
956955
for (int i = 0; i < loop_cnt; i++) {
957-
res = convolve(bottom, top, weight, bias, numImages, config, profilingQueue);
956+
res = convolve(bottom, top, weight, bias, numImages, config);
958957
if (!res)
959958
break;
960959
}
@@ -1009,7 +1008,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
10091008
top.zeros(4, sz, CV_32FC1);
10101009
bool saved_tuned = tuned_;
10111010
tuned_ = false;
1012-
convolve(bottom, top, weight, bias, numImages, config, cv::ocl::Queue::getDefault());
1011+
convolve(bottom, top, weight, bias, numImages, config);
10131012
tuned_ = saved_tuned;
10141013

10151014
float *data = (float *)top.getMat(ACCESS_READ).ptr<float>();

0 commit comments

Comments
 (0)