@@ -286,111 +286,146 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
286
286
}
287
287
288
288
// ////////////////////////////////////////////////////////////////////////////
289
- // dft
289
+ // DFT function
290
290
291
291
void cv::cuda::dft (InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
292
292
{
293
- #ifndef HAVE_CUFFT
294
- (void ) _src;
295
- (void ) _dst;
296
- (void ) dft_size;
297
- (void ) flags;
298
- (void ) stream;
299
- throw_no_cuda ();
300
- #else
301
- GpuMat src = getInputMat (_src, stream);
293
+ if (getInputMat (_src, stream).channels () == 2 )
294
+ flags |= DFT_COMPLEX_INPUT;
302
295
303
- CV_Assert ( src.type () == CV_32FC1 || src.type () == CV_32FC2 );
296
+ Ptr<DFT> dft = createDFT (dft_size, flags);
297
+ dft->compute (_src, _dst, stream);
298
+ }
304
299
305
- // We don't support unpacked output (in the case of real input)
306
- CV_Assert ( !(flags & DFT_COMPLEX_OUTPUT) );
300
+ // ////////////////////////////////////////////////////////////////////////////
301
+ // DFT algorithm
307
302
308
- const bool is_1d_input = (dft_size.height == 1 ) || (dft_size.width == 1 );
309
- const bool is_row_dft = (flags & DFT_ROWS) != 0 ;
310
- const bool is_scaled_dft = (flags & DFT_SCALE) != 0 ;
311
- const bool is_inverse = (flags & DFT_INVERSE) != 0 ;
312
- const bool is_complex_input = src.channels () == 2 ;
313
- const bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
303
+ #ifdef HAVE_CUFFT
314
304
315
- // We don't support real-to-real transform
316
- CV_Assert ( is_complex_input || is_complex_output );
305
+ namespace
306
+ {
317
307
318
- // Make sure here we work with the continuous input,
319
- // as CUFFT can't handle gaps
320
- GpuMat src_cont;
321
- if (src.isContinuous ())
308
+ class DFTImpl : public DFT
322
309
{
323
- src_cont = src;
324
- }
325
- else
326
- {
327
- BufferPool pool (stream);
328
- src_cont.allocator = pool.getAllocator ();
329
- createContinuous (src.rows , src.cols , src.type (), src_cont);
330
- src.copyTo (src_cont, stream);
331
- }
310
+ Size dft_size, dft_size_opt;
311
+ bool is_1d_input, is_row_dft, is_scaled_dft, is_inverse, is_complex_input, is_complex_output;
332
312
333
- Size dft_size_opt = dft_size;
334
- if (is_1d_input && !is_row_dft)
335
- {
336
- // If the source matrix is single column handle it as single row
337
- dft_size_opt.width = std::max (dft_size.width , dft_size.height );
338
- dft_size_opt.height = std::min (dft_size.width , dft_size.height );
339
- }
313
+ cufftType dft_type;
314
+ cufftHandle plan;
340
315
341
- CV_Assert ( dft_size_opt.width > 1 );
316
+ public:
317
+ DFTImpl (Size dft_size, int flags)
318
+ : dft_size(dft_size),
319
+ dft_size_opt (dft_size),
320
+ is_1d_input((dft_size.height == 1 ) || (dft_size.width == 1 )),
321
+ is_row_dft((flags & DFT_ROWS) != 0),
322
+ is_scaled_dft((flags & DFT_SCALE) != 0),
323
+ is_inverse((flags & DFT_INVERSE) != 0),
324
+ is_complex_input((flags & DFT_COMPLEX_INPUT) != 0),
325
+ is_complex_output(!(flags & DFT_REAL_OUTPUT)),
326
+ dft_type(!is_complex_input ? CUFFT_R2C : (is_complex_output ? CUFFT_C2C : CUFFT_C2R))
327
+ {
328
+ // We don't support unpacked output (in the case of real input)
329
+ CV_Assert ( !(flags & DFT_COMPLEX_OUTPUT) );
342
330
343
- cufftType dft_type = CUFFT_R2C;
344
- if (is_complex_input)
345
- dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
331
+ // We don't support real-to-real transform
332
+ CV_Assert ( is_complex_input || is_complex_output );
346
333
347
- cufftHandle plan;
348
- if (is_1d_input || is_row_dft)
349
- cufftSafeCall ( cufftPlan1d (&plan, dft_size_opt.width , dft_type, dft_size_opt.height ) );
350
- else
351
- cufftSafeCall ( cufftPlan2d (&plan, dft_size_opt.height , dft_size_opt.width , dft_type) );
334
+ if (is_1d_input && !is_row_dft)
335
+ {
336
+ // If the source matrix is single column handle it as single row
337
+ dft_size_opt.width = std::max (dft_size.width , dft_size.height );
338
+ dft_size_opt.height = std::min (dft_size.width , dft_size.height );
339
+ }
352
340
353
- cufftSafeCall ( cufftSetStream (plan, StreamAccessor::getStream (stream)) );
341
+ CV_Assert ( dft_size_opt. width > 1 );
354
342
355
- if (is_complex_input)
356
- {
357
- if (is_complex_output)
358
- {
359
- createContinuous (dft_size, CV_32FC2, _dst);
360
- GpuMat dst = _dst.getGpuMat ();
343
+ if (is_1d_input || is_row_dft)
344
+ cufftSafeCall ( cufftPlan1d (&plan, dft_size_opt.width , dft_type, dft_size_opt.height ) );
345
+ else
346
+ cufftSafeCall ( cufftPlan2d (&plan, dft_size_opt.height , dft_size_opt.width , dft_type) );
347
+ }
361
348
362
- cufftSafeCall ( cufftExecC2C (
363
- plan, src_cont. ptr <cufftComplex>(), dst. ptr <cufftComplex>(),
364
- is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD) );
349
+ ~DFTImpl ()
350
+ {
351
+ cufftSafeCall ( cufftDestroy (plan) );
365
352
}
366
- else
353
+
354
+ void compute (InputArray _src, OutputArray _dst, Stream& stream)
367
355
{
368
- createContinuous (dft_size, CV_32F, _dst);
369
- GpuMat dst = _dst.getGpuMat ();
356
+ GpuMat src = getInputMat (_src, stream);
370
357
371
- cufftSafeCall (cufftExecC2R (
372
- plan, src_cont.ptr <cufftComplex>(), dst.ptr <cufftReal>()));
373
- }
374
- }
375
- else
376
- {
377
- // We could swap dft_size for efficiency. Here we must reflect it
378
- if (dft_size == dft_size_opt)
379
- createContinuous (Size (dft_size.width / 2 + 1 , dft_size.height ), CV_32FC2, _dst);
380
- else
381
- createContinuous (Size (dft_size.width , dft_size.height / 2 + 1 ), CV_32FC2, _dst);
358
+ CV_Assert ( src.type () == CV_32FC1 || src.type () == CV_32FC2 );
359
+ CV_Assert ( is_complex_input == (src.channels () == 2 ) );
382
360
383
- GpuMat dst = _dst.getGpuMat ();
361
+ // Make sure here we work with the continuous input,
362
+ // as CUFFT can't handle gaps
363
+ GpuMat src_cont;
364
+ if (src.isContinuous ())
365
+ {
366
+ src_cont = src;
367
+ }
368
+ else
369
+ {
370
+ BufferPool pool (stream);
371
+ src_cont.allocator = pool.getAllocator ();
372
+ createContinuous (src.rows , src.cols , src.type (), src_cont);
373
+ src.copyTo (src_cont, stream);
374
+ }
384
375
385
- cufftSafeCall (cufftExecR2C (
386
- plan, src_cont.ptr <cufftReal>(), dst.ptr <cufftComplex>()));
387
- }
376
+ cufftSafeCall ( cufftSetStream (plan, StreamAccessor::getStream (stream)) );
388
377
389
- cufftSafeCall ( cufftDestroy (plan) );
378
+ if (is_complex_input)
379
+ {
380
+ if (is_complex_output)
381
+ {
382
+ createContinuous (dft_size, CV_32FC2, _dst);
383
+ GpuMat dst = _dst.getGpuMat ();
384
+
385
+ cufftSafeCall (cufftExecC2C (
386
+ plan, src_cont.ptr <cufftComplex>(), dst.ptr <cufftComplex>(),
387
+ is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
388
+ }
389
+ else
390
+ {
391
+ createContinuous (dft_size, CV_32F, _dst);
392
+ GpuMat dst = _dst.getGpuMat ();
393
+
394
+ cufftSafeCall (cufftExecC2R (
395
+ plan, src_cont.ptr <cufftComplex>(), dst.ptr <cufftReal>()));
396
+ }
397
+ }
398
+ else
399
+ {
400
+ // We could swap dft_size for efficiency. Here we must reflect it
401
+ if (dft_size == dft_size_opt)
402
+ createContinuous (Size (dft_size.width / 2 + 1 , dft_size.height ), CV_32FC2, _dst);
403
+ else
404
+ createContinuous (Size (dft_size.width , dft_size.height / 2 + 1 ), CV_32FC2, _dst);
390
405
391
- if (is_scaled_dft)
392
- cuda::multiply (_dst, Scalar::all (1 . / dft_size.area ()), _dst, 1 , -1 , stream);
406
+ GpuMat dst = _dst.getGpuMat ();
393
407
408
+ cufftSafeCall (cufftExecR2C (
409
+ plan, src_cont.ptr <cufftReal>(), dst.ptr <cufftComplex>()));
410
+ }
411
+
412
+ if (is_scaled_dft)
413
+ cuda::multiply (_dst, Scalar::all (1 . / dft_size.area ()), _dst, 1 , -1 , stream);
414
+ }
415
+ };
416
+ }
417
+
418
+ #endif
419
+
420
+ Ptr<DFT> cv::cuda::createDFT (Size dft_size, int flags)
421
+ {
422
+ #ifndef HAVE_CUFFT
423
+ (void ) dft_size;
424
+ (void ) flags;
425
+ CV_Error (Error::StsNotImplemented, " The library was build without CUFFT" );
426
+ return Ptr<DFT>();
427
+ #else
428
+ return makePtr<DFTImpl>(dft_size, flags);
394
429
#endif
395
430
}
396
431
0 commit comments