@@ -3412,6 +3412,125 @@ static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3412
3412
3413
3413
#endif
3414
3414
3415
+ namespace {
3416
+
3417
+ #define VAL (buf, elem ) (((T*)((char *)data ## buf + (step ## buf * (elem))))[0 ])
3418
+ #define MUL_SPECTRUMS_COL (A, B, C ) \
3419
+ VAL (C, 0 ) = VAL(A, 0 ) * VAL (B, 0 ); \
3420
+ for (size_t j = 1 ; j <= rows - 2 ; j += 2 ) \
3421
+ { \
3422
+ double a_re = VAL (A, j), a_im = VAL (A, j + 1 ); \
3423
+ double b_re = VAL (B, j), b_im = VAL (B, j + 1 ); \
3424
+ if (conjB) b_im = -b_im; \
3425
+ double c_re = a_re * b_re - a_im * b_im; \
3426
+ double c_im = a_re * b_im + a_im * b_re; \
3427
+ VAL (C, j) = (T)c_re; VAL (C, j + 1 ) = (T)c_im; \
3428
+ } \
3429
+ if ((rows & 1 ) == 0 ) \
3430
+ VAL (C, rows-1 ) = VAL(A, rows-1 ) * VAL (B, rows-1 )
3431
+
3432
+ template <typename T, bool conjB> static inline
3433
+ void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3434
+ {
3435
+ MUL_SPECTRUMS_COL (A, B, C);
3436
+ }
3437
+
3438
+ template <typename T, bool conjB> static inline
3439
+ void mulSpectrums_processCol_inplaceA (const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3440
+ {
3441
+ MUL_SPECTRUMS_COL (AC, B, AC);
3442
+ }
3443
+ template <typename T, bool conjB, bool inplaceA> static inline
3444
+ void mulSpectrums_processCol (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3445
+ {
3446
+ if (inplaceA)
3447
+ mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3448
+ else
3449
+ mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3450
+ }
3451
+ #undef MUL_SPECTRUMS_COL
3452
+ #undef VAL
3453
+
3454
+ template <typename T, bool conjB, bool inplaceA> static inline
3455
+ void mulSpectrums_processCols (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3456
+ {
3457
+ mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3458
+ if ((cols & 1 ) == 0 )
3459
+ {
3460
+ mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1 , dataB + cols - 1 , dataC + cols - 1 , stepA, stepB, stepC, rows);
3461
+ }
3462
+ }
3463
+
3464
+ #define VAL (buf, elem ) (data ## buf[(elem)])
3465
+ #define MUL_SPECTRUMS_ROW (A, B, C ) \
3466
+ for (size_t j = j0; j < j1; j += 2 ) \
3467
+ { \
3468
+ double a_re = VAL (A, j), a_im = VAL (A, j + 1 ); \
3469
+ double b_re = VAL (B, j), b_im = VAL (B, j + 1 ); \
3470
+ if (conjB) b_im = -b_im; \
3471
+ double c_re = a_re * b_re - a_im * b_im; \
3472
+ double c_im = a_re * b_im + a_im * b_re; \
3473
+ VAL (C, j) = (T)c_re; VAL (C, j + 1 ) = (T)c_im; \
3474
+ }
3475
+ template <typename T, bool conjB> static inline
3476
+ void mulSpectrums_processRow_noinplace (const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3477
+ {
3478
+ MUL_SPECTRUMS_ROW (A, B, C);
3479
+ }
3480
+ template <typename T, bool conjB> static inline
3481
+ void mulSpectrums_processRow_inplaceA (const T* dataB, T* dataAC, size_t j0, size_t j1)
3482
+ {
3483
+ MUL_SPECTRUMS_ROW (AC, B, AC);
3484
+ }
3485
+ template <typename T, bool conjB, bool inplaceA> static inline
3486
+ void mulSpectrums_processRow (const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3487
+ {
3488
+ if (inplaceA)
3489
+ mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3490
+ else
3491
+ mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3492
+ }
3493
+ #undef MUL_SPECTRUMS_ROW
3494
+ #undef VAL
3495
+
3496
+ template <typename T, bool conjB, bool inplaceA> static inline
3497
+ void mulSpectrums_processRows (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3498
+ {
3499
+ while (rows-- > 0 )
3500
+ {
3501
+ if (is_1d_CN1)
3502
+ dataC[0 ] = dataA[0 ]*dataB[0 ];
3503
+ mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3504
+ if (is_1d_CN1 && (cols & 1 ) == 0 )
3505
+ dataC[j1] = dataA[j1]*dataB[j1];
3506
+
3507
+ dataA = (const T*)(((char *)dataA) + stepA);
3508
+ dataB = (const T*)(((char *)dataB) + stepB);
3509
+ dataC = (T*)(((char *)dataC) + stepC);
3510
+ }
3511
+ }
3512
+
3513
+
3514
+ template <typename T, bool conjB, bool inplaceA> static inline
3515
+ void mulSpectrums_Impl_ (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3516
+ {
3517
+ if (!is_1d && isCN1)
3518
+ {
3519
+ mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3520
+ }
3521
+ mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3522
+ }
3523
+ template <typename T, bool conjB> static inline
3524
+ void mulSpectrums_Impl (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3525
+ {
3526
+ if (dataA == dataC)
3527
+ mulSpectrums_Impl_<T, conjB, true >(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3528
+ else
3529
+ mulSpectrums_Impl_<T, conjB, false >(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3530
+ }
3531
+
3532
+ } // namespace
3533
+
3415
3534
void cv::mulSpectrums ( InputArray _srcA, InputArray _srcB,
3416
3535
OutputArray _dst, int flags, bool conjB )
3417
3536
{
@@ -3422,158 +3541,50 @@ void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3422
3541
3423
3542
Mat srcA = _srcA.getMat (), srcB = _srcB.getMat ();
3424
3543
int depth = srcA.depth (), cn = srcA.channels (), type = srcA.type ();
3425
- int rows = srcA.rows , cols = srcA.cols ;
3426
- int j, k;
3544
+ size_t rows = srcA.rows , cols = srcA.cols ;
3427
3545
3428
3546
CV_Assert ( type == srcB.type () && srcA.size () == srcB.size () );
3429
3547
CV_Assert ( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3430
3548
3431
3549
_dst.create ( srcA.rows , srcA.cols , type );
3432
3550
Mat dst = _dst.getMat ();
3433
3551
3434
- bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
3435
- srcA.isContinuous () && srcB.isContinuous () && dst.isContinuous ()));
3552
+ // correct inplace support
3553
+ // Case 'dst.data == srcA.data' is handled by implementation,
3554
+ // because it is used frequently (filter2D, matchTemplate)
3555
+ if (dst.data == srcB.data )
3556
+ srcB = srcB.clone (); // workaround for B only
3557
+
3558
+ bool is_1d = (flags & DFT_ROWS)
3559
+ || (rows == 1 )
3560
+ || (cols == 1 && srcA.isContinuous () && srcB.isContinuous () && dst.isContinuous ());
3436
3561
3437
3562
if ( is_1d && !(flags & DFT_ROWS) )
3438
3563
cols = cols + rows - 1 , rows = 1 ;
3439
3564
3440
- int ncols = cols*cn ;
3441
- int j0 = cn == 1 ;
3442
- int j1 = ncols - (cols % 2 == 0 && cn == 1 );
3565
+ bool isCN1 = cn == 1 ;
3566
+ size_t j0 = isCN1 ? 1 : 0 ;
3567
+ size_t j1 = cols*cn - ((( cols & 1 ) == 0 && cn == 1 ) ? 1 : 0 );
3443
3568
3444
- if ( depth == CV_32F )
3569
+ if ( depth == CV_32F)
3445
3570
{
3446
3571
const float * dataA = srcA.ptr <float >();
3447
3572
const float * dataB = srcB.ptr <float >();
3448
3573
float * dataC = dst.ptr <float >();
3449
-
3450
- size_t stepA = srcA.step /sizeof (dataA[0 ]);
3451
- size_t stepB = srcB.step /sizeof (dataB[0 ]);
3452
- size_t stepC = dst.step /sizeof (dataC[0 ]);
3453
-
3454
- if ( !is_1d && cn == 1 )
3455
- {
3456
- for ( k = 0 ; k < (cols % 2 ? 1 : 2 ); k++ )
3457
- {
3458
- if ( k == 1 )
3459
- dataA += cols - 1 , dataB += cols - 1 , dataC += cols - 1 ;
3460
- dataC[0 ] = dataA[0 ]*dataB[0 ];
3461
- if ( rows % 2 == 0 )
3462
- dataC[(rows-1 )*stepC] = dataA[(rows-1 )*stepA]*dataB[(rows-1 )*stepB];
3463
- if ( !conjB )
3464
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
3465
- {
3466
- double re = (double )dataA[j*stepA]*dataB[j*stepB] -
3467
- (double )dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
3468
- double im = (double )dataA[j*stepA]*dataB[(j+1 )*stepB] +
3469
- (double )dataA[(j+1 )*stepA]*dataB[j*stepB];
3470
- dataC[j*stepC] = (float )re; dataC[(j+1 )*stepC] = (float )im;
3471
- }
3472
- else
3473
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
3474
- {
3475
- double re = (double )dataA[j*stepA]*dataB[j*stepB] +
3476
- (double )dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
3477
- double im = (double )dataA[(j+1 )*stepA]*dataB[j*stepB] -
3478
- (double )dataA[j*stepA]*dataB[(j+1 )*stepB];
3479
- dataC[j*stepC] = (float )re; dataC[(j+1 )*stepC] = (float )im;
3480
- }
3481
- if ( k == 1 )
3482
- dataA -= cols - 1 , dataB -= cols - 1 , dataC -= cols - 1 ;
3483
- }
3484
- }
3485
-
3486
- for ( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
3487
- {
3488
- if ( is_1d && cn == 1 )
3489
- {
3490
- dataC[0 ] = dataA[0 ]*dataB[0 ];
3491
- if ( cols % 2 == 0 )
3492
- dataC[j1] = dataA[j1]*dataB[j1];
3493
- }
3494
-
3495
- if ( !conjB )
3496
- for ( j = j0; j < j1; j += 2 )
3497
- {
3498
- double re = (double )dataA[j]*dataB[j] - (double )dataA[j+1 ]*dataB[j+1 ];
3499
- double im = (double )dataA[j+1 ]*dataB[j] + (double )dataA[j]*dataB[j+1 ];
3500
- dataC[j] = (float )re; dataC[j+1 ] = (float )im;
3501
- }
3502
- else
3503
- for ( j = j0; j < j1; j += 2 )
3504
- {
3505
- double re = (double )dataA[j]*dataB[j] + (double )dataA[j+1 ]*dataB[j+1 ];
3506
- double im = (double )dataA[j+1 ]*dataB[j] - (double )dataA[j]*dataB[j+1 ];
3507
- dataC[j] = (float )re; dataC[j+1 ] = (float )im;
3508
- }
3509
- }
3574
+ if (!conjB)
3575
+ mulSpectrums_Impl<float , false >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
3576
+ else
3577
+ mulSpectrums_Impl<float , true >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
3510
3578
}
3511
3579
else
3512
3580
{
3513
3581
const double * dataA = srcA.ptr <double >();
3514
3582
const double * dataB = srcB.ptr <double >();
3515
3583
double * dataC = dst.ptr <double >();
3516
-
3517
- size_t stepA = srcA.step /sizeof (dataA[0 ]);
3518
- size_t stepB = srcB.step /sizeof (dataB[0 ]);
3519
- size_t stepC = dst.step /sizeof (dataC[0 ]);
3520
-
3521
- if ( !is_1d && cn == 1 )
3522
- {
3523
- for ( k = 0 ; k < (cols % 2 ? 1 : 2 ); k++ )
3524
- {
3525
- if ( k == 1 )
3526
- dataA += cols - 1 , dataB += cols - 1 , dataC += cols - 1 ;
3527
- dataC[0 ] = dataA[0 ]*dataB[0 ];
3528
- if ( rows % 2 == 0 )
3529
- dataC[(rows-1 )*stepC] = dataA[(rows-1 )*stepA]*dataB[(rows-1 )*stepB];
3530
- if ( !conjB )
3531
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
3532
- {
3533
- double re = dataA[j*stepA]*dataB[j*stepB] -
3534
- dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
3535
- double im = dataA[j*stepA]*dataB[(j+1 )*stepB] +
3536
- dataA[(j+1 )*stepA]*dataB[j*stepB];
3537
- dataC[j*stepC] = re; dataC[(j+1 )*stepC] = im;
3538
- }
3539
- else
3540
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
3541
- {
3542
- double re = dataA[j*stepA]*dataB[j*stepB] +
3543
- dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
3544
- double im = dataA[(j+1 )*stepA]*dataB[j*stepB] -
3545
- dataA[j*stepA]*dataB[(j+1 )*stepB];
3546
- dataC[j*stepC] = re; dataC[(j+1 )*stepC] = im;
3547
- }
3548
- if ( k == 1 )
3549
- dataA -= cols - 1 , dataB -= cols - 1 , dataC -= cols - 1 ;
3550
- }
3551
- }
3552
-
3553
- for ( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
3554
- {
3555
- if ( is_1d && cn == 1 )
3556
- {
3557
- dataC[0 ] = dataA[0 ]*dataB[0 ];
3558
- if ( cols % 2 == 0 )
3559
- dataC[j1] = dataA[j1]*dataB[j1];
3560
- }
3561
-
3562
- if ( !conjB )
3563
- for ( j = j0; j < j1; j += 2 )
3564
- {
3565
- double re = dataA[j]*dataB[j] - dataA[j+1 ]*dataB[j+1 ];
3566
- double im = dataA[j+1 ]*dataB[j] + dataA[j]*dataB[j+1 ];
3567
- dataC[j] = re; dataC[j+1 ] = im;
3568
- }
3569
- else
3570
- for ( j = j0; j < j1; j += 2 )
3571
- {
3572
- double re = dataA[j]*dataB[j] + dataA[j+1 ]*dataB[j+1 ];
3573
- double im = dataA[j+1 ]*dataB[j] - dataA[j]*dataB[j+1 ];
3574
- dataC[j] = re; dataC[j+1 ] = im;
3575
- }
3576
- }
3584
+ if (!conjB)
3585
+ mulSpectrums_Impl<double , false >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
3586
+ else
3587
+ mulSpectrums_Impl<double , true >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
3577
3588
}
3578
3589
}
3579
3590
0 commit comments