Skip to content

Commit e5dbd2c

Browse files
committed
Merge pull request opencv#8406 from khnaba:dft-as-algorithm
2 parents a57d144 + 00f3ad7 commit e5dbd2c

File tree

5 files changed

+180
-81
lines changed

5 files changed

+180
-81
lines changed

modules/core/include/opencv2/core/base.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ enum DftFlags {
239239
into a real array and inverse transformation is executed, the function treats the input as a
240240
packed complex-conjugate symmetrical array, and the output will also be a real array). */
241241
DFT_REAL_OUTPUT = 32,
242+
/** specifies that input is complex input. If this flag is set, the input must have 2 channels.
243+
On the other hand, for backwards compatibility reason, if input has 2 channels, input is
244+
already considered complex. */
245+
DFT_COMPLEX_INPUT = 64,
242246
/** performs an inverse 1D or 2D transform instead of the default forward transform. */
243247
DCT_INVERSE = DFT_INVERSE,
244248
/** performs a forward or inverse transform of every individual row of the input

modules/core/src/dxt.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3342,6 +3342,9 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
33423342

33433343
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
33443344

3345+
// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3346+
CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
3347+
33453348
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
33463349
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
33473350
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )

modules/cudaarithm/include/opencv2/cudaarithm.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr
788788
(obtained from dft_size ).
789789
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
790790
cases are always forward and inverse, respectively).
791+
- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels.
791792
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
792793
real-complex transform, so the destination matrix must be real.
793794
@param stream Stream for the asynchronous version.
@@ -813,6 +814,35 @@ instead of the width.
813814
*/
814815
CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null());
815816

817+
/** @brief Base class for DFT operator as a cv::Algorithm. :
818+
*/
819+
class CV_EXPORTS DFT : public Algorithm
820+
{
821+
public:
822+
/** @brief Computes an FFT of a given image.
823+
824+
@param image Source image. Only CV_32FC1 images are supported for now.
825+
@param result Result image.
826+
@param stream Stream for the asynchronous version.
827+
*/
828+
virtual void compute(InputArray image, OutputArray result, Stream& stream = Stream::Null()) = 0;
829+
};
830+
831+
/** @brief Creates implementation for cuda::DFT.
832+
833+
@param dft_size The image size.
834+
@param flags Optional flags:
835+
- **DFT_ROWS** transforms each individual row of the source matrix.
836+
- **DFT_SCALE** scales the result: divide it by the number of elements in the transform
837+
(obtained from dft_size ).
838+
- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real
839+
cases are always forward and inverse, respectively).
840+
- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels.
841+
- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of
842+
real-complex transform, so the destination matrix must be real.
843+
*/
844+
CV_EXPORTS Ptr<DFT> createDFT(Size dft_size, int flags);
845+
816846
/** @brief Base class for convolution (or cross-correlation) operator. :
817847
*/
818848
class CV_EXPORTS Convolution : public Algorithm

modules/cudaarithm/src/arithm.cpp

Lines changed: 116 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -286,111 +286,146 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
286286
}
287287

288288
//////////////////////////////////////////////////////////////////////////////
289-
// dft
289+
// DFT function
290290

291291
void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
292292
{
293-
#ifndef HAVE_CUFFT
294-
(void) _src;
295-
(void) _dst;
296-
(void) dft_size;
297-
(void) flags;
298-
(void) stream;
299-
throw_no_cuda();
300-
#else
301-
GpuMat src = getInputMat(_src, stream);
293+
if (getInputMat(_src, stream).channels() == 2)
294+
flags |= DFT_COMPLEX_INPUT;
302295

303-
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
296+
Ptr<DFT> dft = createDFT(dft_size, flags);
297+
dft->compute(_src, _dst, stream);
298+
}
304299

305-
// We don't support unpacked output (in the case of real input)
306-
CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
300+
//////////////////////////////////////////////////////////////////////////////
301+
// DFT algorithm
307302

308-
const bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1);
309-
const bool is_row_dft = (flags & DFT_ROWS) != 0;
310-
const bool is_scaled_dft = (flags & DFT_SCALE) != 0;
311-
const bool is_inverse = (flags & DFT_INVERSE) != 0;
312-
const bool is_complex_input = src.channels() == 2;
313-
const bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
303+
#ifdef HAVE_CUFFT
314304

315-
// We don't support real-to-real transform
316-
CV_Assert( is_complex_input || is_complex_output );
305+
namespace
306+
{
317307

318-
// Make sure here we work with the continuous input,
319-
// as CUFFT can't handle gaps
320-
GpuMat src_cont;
321-
if (src.isContinuous())
308+
class DFTImpl : public DFT
322309
{
323-
src_cont = src;
324-
}
325-
else
326-
{
327-
BufferPool pool(stream);
328-
src_cont.allocator = pool.getAllocator();
329-
createContinuous(src.rows, src.cols, src.type(), src_cont);
330-
src.copyTo(src_cont, stream);
331-
}
310+
Size dft_size, dft_size_opt;
311+
bool is_1d_input, is_row_dft, is_scaled_dft, is_inverse, is_complex_input, is_complex_output;
332312

333-
Size dft_size_opt = dft_size;
334-
if (is_1d_input && !is_row_dft)
335-
{
336-
// If the source matrix is single column handle it as single row
337-
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
338-
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
339-
}
313+
cufftType dft_type;
314+
cufftHandle plan;
340315

341-
CV_Assert( dft_size_opt.width > 1 );
316+
public:
317+
DFTImpl(Size dft_size, int flags)
318+
: dft_size(dft_size),
319+
dft_size_opt(dft_size),
320+
is_1d_input((dft_size.height == 1) || (dft_size.width == 1)),
321+
is_row_dft((flags & DFT_ROWS) != 0),
322+
is_scaled_dft((flags & DFT_SCALE) != 0),
323+
is_inverse((flags & DFT_INVERSE) != 0),
324+
is_complex_input((flags & DFT_COMPLEX_INPUT) != 0),
325+
is_complex_output(!(flags & DFT_REAL_OUTPUT)),
326+
dft_type(!is_complex_input ? CUFFT_R2C : (is_complex_output ? CUFFT_C2C : CUFFT_C2R))
327+
{
328+
// We don't support unpacked output (in the case of real input)
329+
CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
342330

343-
cufftType dft_type = CUFFT_R2C;
344-
if (is_complex_input)
345-
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
331+
// We don't support real-to-real transform
332+
CV_Assert( is_complex_input || is_complex_output );
346333

347-
cufftHandle plan;
348-
if (is_1d_input || is_row_dft)
349-
cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
350-
else
351-
cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
334+
if (is_1d_input && !is_row_dft)
335+
{
336+
// If the source matrix is single column handle it as single row
337+
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
338+
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
339+
}
352340

353-
cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
341+
CV_Assert( dft_size_opt.width > 1 );
354342

355-
if (is_complex_input)
356-
{
357-
if (is_complex_output)
358-
{
359-
createContinuous(dft_size, CV_32FC2, _dst);
360-
GpuMat dst = _dst.getGpuMat();
343+
if (is_1d_input || is_row_dft)
344+
cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
345+
else
346+
cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
347+
}
361348

362-
cufftSafeCall(cufftExecC2C(
363-
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
364-
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
349+
~DFTImpl()
350+
{
351+
cufftSafeCall( cufftDestroy(plan) );
365352
}
366-
else
353+
354+
void compute(InputArray _src, OutputArray _dst, Stream& stream)
367355
{
368-
createContinuous(dft_size, CV_32F, _dst);
369-
GpuMat dst = _dst.getGpuMat();
356+
GpuMat src = getInputMat(_src, stream);
370357

371-
cufftSafeCall(cufftExecC2R(
372-
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
373-
}
374-
}
375-
else
376-
{
377-
// We could swap dft_size for efficiency. Here we must reflect it
378-
if (dft_size == dft_size_opt)
379-
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
380-
else
381-
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
358+
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
359+
CV_Assert( is_complex_input == (src.channels() == 2) );
382360

383-
GpuMat dst = _dst.getGpuMat();
361+
// Make sure here we work with the continuous input,
362+
// as CUFFT can't handle gaps
363+
GpuMat src_cont;
364+
if (src.isContinuous())
365+
{
366+
src_cont = src;
367+
}
368+
else
369+
{
370+
BufferPool pool(stream);
371+
src_cont.allocator = pool.getAllocator();
372+
createContinuous(src.rows, src.cols, src.type(), src_cont);
373+
src.copyTo(src_cont, stream);
374+
}
384375

385-
cufftSafeCall(cufftExecR2C(
386-
plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
387-
}
376+
cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
388377

389-
cufftSafeCall( cufftDestroy(plan) );
378+
if (is_complex_input)
379+
{
380+
if (is_complex_output)
381+
{
382+
createContinuous(dft_size, CV_32FC2, _dst);
383+
GpuMat dst = _dst.getGpuMat();
384+
385+
cufftSafeCall(cufftExecC2C(
386+
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
387+
is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
388+
}
389+
else
390+
{
391+
createContinuous(dft_size, CV_32F, _dst);
392+
GpuMat dst = _dst.getGpuMat();
393+
394+
cufftSafeCall(cufftExecC2R(
395+
plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
396+
}
397+
}
398+
else
399+
{
400+
// We could swap dft_size for efficiency. Here we must reflect it
401+
if (dft_size == dft_size_opt)
402+
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
403+
else
404+
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
390405

391-
if (is_scaled_dft)
392-
cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
406+
GpuMat dst = _dst.getGpuMat();
393407

408+
cufftSafeCall(cufftExecR2C(
409+
plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
410+
}
411+
412+
if (is_scaled_dft)
413+
cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
414+
}
415+
};
416+
}
417+
418+
#endif
419+
420+
Ptr<DFT> cv::cuda::createDFT(Size dft_size, int flags)
421+
{
422+
#ifndef HAVE_CUFFT
423+
(void) dft_size;
424+
(void) flags;
425+
CV_Error(Error::StsNotImplemented, "The library was build without CUFFT");
426+
return Ptr<DFT>();
427+
#else
428+
return makePtr<DFTImpl>(dft_size, flags);
394429
#endif
395430
}
396431

modules/cudaarithm/test/test_arithm.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,33 @@ CUDA_TEST_P(Dft, C2C)
250250
}
251251
}
252252

253+
CUDA_TEST_P(Dft, Algorithm)
254+
{
255+
int cols = randomInt(2, 100);
256+
int rows = randomInt(2, 100);
257+
258+
int flags = 0;
259+
cv::Ptr<cv::cuda::DFT> dft = cv::cuda::createDFT(cv::Size(cols, rows), flags);
260+
261+
for (int i = 0; i < 5; ++i)
262+
{
263+
SCOPED_TRACE("dft algorithm");
264+
265+
cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
266+
267+
cv::cuda::GpuMat d_b;
268+
cv::cuda::GpuMat d_b_data;
269+
dft->compute(loadMat(a), d_b);
270+
271+
cv::Mat b_gold;
272+
cv::dft(a, b_gold, flags);
273+
274+
ASSERT_EQ(CV_32F, d_b.depth());
275+
ASSERT_EQ(2, d_b.channels());
276+
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
277+
}
278+
}
279+
253280
namespace
254281
{
255282
void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)

0 commit comments

Comments
 (0)