Skip to content

Commit 27cf6e5

Browse files
khnabaalalek
authored andcommitted
Merge pull request opencv#8367 from khnaba:cuda-calchist-with-mask
Implement cv::cuda::calcHist with mask support (opencv#8367) * Implement cuda::calcHist with mask * Fix documentation build warning * Have their own step sizes for src and mask. Fix review comment.
1 parent a83a1ca commit 27cf6e5

File tree

4 files changed

+131
-1
lines changed

4 files changed

+131
-1
lines changed

modules/cudaimgproc/include/opencv2/cudaimgproc.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,15 @@ CV_EXPORTS void alphaComp(InputArray img1, InputArray img2, OutputArray dst, int
201201
*/
202202
CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stream::Null());
203203

204+
/** @brief Calculates histogram for one channel 8-bit image confined in given mask.
205+
206+
@param src Source image with CV_8UC1 type.
207+
@param hist Destination histogram with one row, 256 columns, and the CV_32SC1 type.
208+
@param mask A mask image same size as src and of type CV_8UC1.
209+
@param stream Stream for the asynchronous version.
210+
*/
211+
CV_EXPORTS void calcHist(InputArray src, InputArray mask, OutputArray hist, Stream& stream = Stream::Null());
212+
204213
/** @brief Equalizes the histogram of a grayscale image.
205214
206215
@param src Source image with CV_8UC1 type.

modules/cudaimgproc/src/cuda/hist.cu

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,72 @@ namespace hist
105105
if (stream == 0)
106106
cudaSafeCall( cudaDeviceSynchronize() );
107107
}
108+
109+
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t srcStep, const uchar* mask, size_t maskStep, int* hist)
110+
{
111+
__shared__ int shist[256];
112+
113+
const int y = blockIdx.x * blockDim.y + threadIdx.y;
114+
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
115+
116+
shist[tid] = 0;
117+
__syncthreads();
118+
119+
if (y < rows)
120+
{
121+
const unsigned int* rowPtr = (const unsigned int*) (src + y * srcStep);
122+
const unsigned int* maskRowPtr = (const unsigned int*) (mask + y * maskStep);
123+
124+
const int cols_4 = cols / 4;
125+
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
126+
{
127+
unsigned int data = rowPtr[x];
128+
unsigned int m = maskRowPtr[x];
129+
130+
if ((m >> 0) & 0xFFU)
131+
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
132+
133+
if ((m >> 8) & 0xFFU)
134+
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
135+
136+
if ((m >> 16) & 0xFFU)
137+
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
138+
139+
if ((m >> 24) & 0xFFU)
140+
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
141+
}
142+
143+
if (cols % 4 != 0 && threadIdx.x == 0)
144+
{
145+
for (int x = cols_4 * 4; x < cols; ++x)
146+
{
147+
unsigned int data = ((const uchar*)rowPtr)[x];
148+
unsigned int m = ((const uchar*)maskRowPtr)[x];
149+
150+
if (m)
151+
Emulation::smem::atomicAdd(&shist[data], 1);
152+
}
153+
}
154+
}
155+
156+
__syncthreads();
157+
158+
const int histVal = shist[tid];
159+
if (histVal > 0)
160+
::atomicAdd(hist + tid, histVal);
161+
}
162+
163+
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream)
164+
{
165+
const dim3 block(32, 8);
166+
const dim3 grid(divUp(src.rows, block.y));
167+
168+
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, mask.data, mask.step, hist);
169+
cudaSafeCall( cudaGetLastError() );
170+
171+
if (stream == 0)
172+
cudaSafeCall( cudaDeviceSynchronize() );
173+
}
108174
}
109175

110176
/////////////////////////////////////////////////////////////////////////

modules/cudaimgproc/src/histogram.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,32 @@ void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, Stream&) { throw_no
6969
namespace hist
7070
{
7171
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
72+
void histogram256(PtrStepSzb src, PtrStepSzb mask, int* hist, cudaStream_t stream);
7273
}
7374

7475
void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
76+
{
77+
calcHist(_src, cv::cuda::GpuMat(), _hist, stream);
78+
}
79+
80+
void cv::cuda::calcHist(InputArray _src, InputArray _mask, OutputArray _hist, Stream& stream)
7581
{
7682
GpuMat src = _src.getGpuMat();
83+
GpuMat mask = _mask.getGpuMat();
7784

7885
CV_Assert( src.type() == CV_8UC1 );
86+
CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
87+
CV_Assert( mask.empty() || mask.size() == src.size() );
7988

8089
_hist.create(1, 256, CV_32SC1);
8190
GpuMat hist = _hist.getGpuMat();
8291

8392
hist.setTo(Scalar::all(0), stream);
8493

85-
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
94+
if (mask.empty())
95+
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
96+
else
97+
hist::histogram256(src, mask, hist.ptr<int>(), StreamAccessor::getStream(stream));
8698
}
8799

88100
////////////////////////////////////////////////////////////////////////

modules/cudaimgproc/test/test_histogram.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,49 @@ INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHist, testing::Combine(
136136
ALL_DEVICES,
137137
DIFFERENT_SIZES));
138138

139+
PARAM_TEST_CASE(CalcHistWithMask, cv::cuda::DeviceInfo, cv::Size)
140+
{
141+
cv::cuda::DeviceInfo devInfo;
142+
143+
cv::Size size;
144+
145+
virtual void SetUp()
146+
{
147+
devInfo = GET_PARAM(0);
148+
size = GET_PARAM(1);
149+
150+
cv::cuda::setDevice(devInfo.deviceID());
151+
}
152+
};
153+
154+
CUDA_TEST_P(CalcHistWithMask, Accuracy)
155+
{
156+
cv::Mat src = randomMat(size, CV_8UC1);
157+
cv::Mat mask = randomMat(size, CV_8UC1);
158+
cv::Mat(mask, cv::Rect(0, 0, size.width / 2, size.height / 2)).setTo(0);
159+
160+
cv::cuda::GpuMat hist;
161+
cv::cuda::calcHist(loadMat(src), loadMat(mask), hist);
162+
163+
cv::Mat hist_gold;
164+
165+
const int hbins = 256;
166+
const float hranges[] = {0.0f, 256.0f};
167+
const int histSize[] = {hbins};
168+
const float* ranges[] = {hranges};
169+
const int channels[] = {0};
170+
171+
cv::calcHist(&src, 1, channels, mask, hist_gold, 1, histSize, ranges);
172+
hist_gold = hist_gold.reshape(1, 1);
173+
hist_gold.convertTo(hist_gold, CV_32S);
174+
175+
EXPECT_MAT_NEAR(hist_gold, hist, 0.0);
176+
}
177+
178+
INSTANTIATE_TEST_CASE_P(CUDA_ImgProc, CalcHistWithMask, testing::Combine(
179+
ALL_DEVICES,
180+
DIFFERENT_SIZES));
181+
139182
///////////////////////////////////////////////////////////////////////////////////////////////////////
140183
// EqualizeHist
141184

0 commit comments

Comments
 (0)