Skip to content

Commit 4709b9d

Browse files
Add cuda::streams to by_rows and 8UC1 functions
Fix opencv#8177
1 parent dd3655f commit 4709b9d

File tree

2 files changed

+78
-38
lines changed

2 files changed

+78
-38
lines changed

modules/cudaobjdetect/src/cuda/hog.cu

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -518,8 +518,10 @@ namespace cv { namespace cuda { namespace device
518518

519519

520520
template <int nthreads>
521-
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
522-
const float* block_hists, PtrStepf descriptors)
521+
__global__ void extract_descrs_by_rows_kernel(const int img_block_width,
522+
const int win_block_stride_x, const int win_block_stride_y,
523+
const float* block_hists,
524+
PtrStepf descriptors)
523525
{
524526
// Get left top corner of the window in src
525527
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
@@ -538,8 +540,14 @@ namespace cv { namespace cuda { namespace device
538540
}
539541

540542

541-
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
542-
int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors)
543+
void extract_descrs_by_rows(int win_height, int win_width,
544+
int block_stride_y, int block_stride_x,
545+
int win_stride_y, int win_stride_x,
546+
int height, int width,
547+
float* block_hists, int cell_size_x,
548+
int ncells_block_x,
549+
PtrStepSzf descriptors,
550+
const cudaStream_t& stream)
543551
{
544552
const int nthreads = 256;
545553

@@ -551,17 +559,16 @@ namespace cv { namespace cuda { namespace device
551559
dim3 grid(img_win_width, img_win_height);
552560

553561
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
554-
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
555-
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
556-
cudaSafeCall( cudaGetLastError() );
562+
extract_descrs_by_rows_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
557563

558-
cudaSafeCall( cudaDeviceSynchronize() );
564+
cudaSafeCall( cudaGetLastError() );
559565
}
560566

561567

562568
template <int nthreads>
563-
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
564-
const int win_block_stride_y, const float* block_hists,
569+
__global__ void extract_descrs_by_cols_kernel(const int img_block_width,
570+
const int win_block_stride_x, const int win_block_stride_y,
571+
const float* block_hists,
565572
PtrStepf descriptors)
566573
{
567574
// Get left top corner of the window in src
@@ -792,8 +799,12 @@ namespace cv { namespace cuda { namespace device
792799
}
793800

794801

795-
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
796-
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
802+
void compute_gradients_8UC1(int nbins,
803+
int height, int width, const PtrStepSzb& img,
804+
float angle_scale,
805+
PtrStepSzf grad, PtrStepSzb qangle,
806+
bool correct_gamma,
807+
const cudaStream_t& stream)
797808
{
798809
(void)nbins;
799810
const int nthreads = 256;
@@ -802,13 +813,11 @@ namespace cv { namespace cuda { namespace device
802813
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
803814

804815
if (correct_gamma)
805-
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
816+
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
806817
else
807-
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
818+
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
808819

809820
cudaSafeCall( cudaGetLastError() );
810-
811-
cudaSafeCall( cudaDeviceSynchronize() );
812821
}
813822

814823

modules/cudaobjdetect/src/hog.cpp

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,29 @@ namespace cv { namespace cuda { namespace device
6464
{
6565
namespace hog
6666
{
67-
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
67+
void set_up_constants(int nbins,
68+
int block_stride_x, int block_stride_y,
6869
int nblocks_win_x, int nblocks_win_y,
6970
int ncells_block_x, int ncells_block_y,
7071
const cudaStream_t& stream);
7172

72-
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
73-
int height, int width, const PtrStepSzf& grad,
74-
const PtrStepSzb& qangle, float sigma, float* block_hists,
75-
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y,
73+
void compute_hists(int nbins,
74+
int block_stride_x, int block_stride_y,
75+
int height, int width,
76+
const PtrStepSzf& grad, const PtrStepSzb& qangle,
77+
float sigma,
78+
float* block_hists,
79+
int cell_size_x, int cell_size_y,
80+
int ncells_block_x, int ncells_block_y,
7681
const cudaStream_t& stream);
7782

78-
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
79-
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y,
83+
void normalize_hists(int nbins,
84+
int block_stride_x, int block_stride_y,
85+
int height, int width,
86+
float* block_hists,
87+
float threshold,
88+
int cell_size_x, int cell_size_y,
89+
int ncells_block_x, int ncells_block_y,
8090
const cudaStream_t& stream);
8191

8292
void classify_hists(int win_height, int win_width, int block_stride_y,
@@ -85,21 +95,37 @@ namespace cv { namespace cuda { namespace device
8595
float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels);
8696

8797
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
88-
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
89-
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
90-
91-
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
92-
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
93-
cv::cuda::PtrStepSzf descriptors);
94-
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
95-
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
98+
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
99+
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
100+
101+
void extract_descrs_by_rows(int win_height, int win_width,
102+
int block_stride_y, int block_stride_x,
103+
int win_stride_y, int win_stride_x,
104+
int height, int width,
105+
float* block_hists,
106+
int cell_size_x, int ncells_block_x,
107+
cv::cuda::PtrStepSzf descriptors,
108+
const cudaStream_t& stream);
109+
void extract_descrs_by_cols(int win_height, int win_width,
110+
int block_stride_y, int block_stride_x,
111+
int win_stride_y, int win_stride_x,
112+
int height, int width,
113+
float* block_hists,
114+
int cell_size_x, int ncells_block_x,
96115
cv::cuda::PtrStepSzf descriptors,
97116
const cudaStream_t& stream);
98117

99-
void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
100-
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma);
101-
void compute_gradients_8UC4(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
102-
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma,
118+
void compute_gradients_8UC1(int nbins,
119+
int height, int width, const cv::cuda::PtrStepSzb& img,
120+
float angle_scale,
121+
cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
122+
bool correct_gamma,
123+
const cudaStream_t& stream);
124+
void compute_gradients_8UC4(int nbins,
125+
int height, int width, const cv::cuda::PtrStepSzb& img,
126+
float angle_scale,
127+
cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
128+
bool correct_gamma,
103129
const cudaStream_t& stream);
104130

105131
void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
@@ -483,7 +509,8 @@ namespace
483509
img.rows, img.cols,
484510
block_hists.ptr<float>(),
485511
cell_size_.width, cells_per_block_.width,
486-
descriptors);
512+
descriptors,
513+
StreamAccessor::getStream(stream));
487514
break;
488515
case DESCR_FORMAT_COL_BY_COL:
489516
hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
@@ -524,8 +551,12 @@ namespace
524551
switch (img.type())
525552
{
526553
case CV_8UC1:
527-
hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img,
528-
angleScale, grad, qangle, gamma_correction_);
554+
hog::compute_gradients_8UC1(nbins_,
555+
img.rows, img.cols, img,
556+
angleScale,
557+
grad, qangle,
558+
gamma_correction_,
559+
StreamAccessor::getStream(stream));
529560
break;
530561
case CV_8UC4:
531562
hog::compute_gradients_8UC4(nbins_,

0 commit comments

Comments
 (0)