@@ -1891,163 +1891,174 @@ void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
1891
1891
dft ( src, dst, flags | DFT_INVERSE, nonzero_rows );
1892
1892
}
1893
1893
1894
+ namespace {
1895
+
1896
+ #define VAL (buf, elem ) (((T*)((char *)data ## buf + (step ## buf * (elem))))[0 ])
1897
+ #define MUL_SPECTRUMS_COL (A, B, C ) \
1898
+ VAL (C, 0 ) = VAL(A, 0 ) * VAL (B, 0 ); \
1899
+ for (size_t j = 1 ; j <= rows - 2 ; j += 2 ) \
1900
+ { \
1901
+ double a_re = VAL (A, j), a_im = VAL (A, j + 1 ); \
1902
+ double b_re = VAL (B, j), b_im = VAL (B, j + 1 ); \
1903
+ if (conjB) b_im = -b_im; \
1904
+ double c_re = a_re * b_re - a_im * b_im; \
1905
+ double c_im = a_re * b_im + a_im * b_re; \
1906
+ VAL (C, j) = (T)c_re; VAL (C, j + 1 ) = (T)c_im; \
1907
+ } \
1908
+ if ((rows & 1 ) == 0 ) \
1909
+ VAL (C, rows-1 ) = VAL(A, rows-1 ) * VAL (B, rows-1 )
1910
+
1911
+ template <typename T, bool conjB> static inline
1912
+ void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1913
+ {
1914
+ MUL_SPECTRUMS_COL (A, B, C);
1915
+ }
1916
+
1917
+ template <typename T, bool conjB> static inline
1918
+ void mulSpectrums_processCol_inplaceA (const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
1919
+ {
1920
+ MUL_SPECTRUMS_COL (AC, B, AC);
1921
+ }
1922
+ template <typename T, bool conjB, bool inplaceA> static inline
1923
+ void mulSpectrums_processCol (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1924
+ {
1925
+ if (inplaceA)
1926
+ mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
1927
+ else
1928
+ mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1929
+ }
1930
+ #undef MUL_SPECTRUMS_COL
1931
+ #undef VAL
1932
+
1933
+ template <typename T, bool conjB, bool inplaceA> static inline
1934
+ void mulSpectrums_processCols (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
1935
+ {
1936
+ mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1937
+ if ((cols & 1 ) == 0 )
1938
+ {
1939
+ mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1 , dataB + cols - 1 , dataC + cols - 1 , stepA, stepB, stepC, rows);
1940
+ }
1941
+ }
1942
+
1943
+ #define VAL (buf, elem ) (data ## buf[(elem)])
1944
+ #define MUL_SPECTRUMS_ROW (A, B, C ) \
1945
+ for (size_t j = j0; j < j1; j += 2 ) \
1946
+ { \
1947
+ double a_re = VAL (A, j), a_im = VAL (A, j + 1 ); \
1948
+ double b_re = VAL (B, j), b_im = VAL (B, j + 1 ); \
1949
+ if (conjB) b_im = -b_im; \
1950
+ double c_re = a_re * b_re - a_im * b_im; \
1951
+ double c_im = a_re * b_im + a_im * b_re; \
1952
+ VAL (C, j) = (T)c_re; VAL (C, j + 1 ) = (T)c_im; \
1953
+ }
1954
+ template <typename T, bool conjB> static inline
1955
+ void mulSpectrums_processRow_noinplace (const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1956
+ {
1957
+ MUL_SPECTRUMS_ROW (A, B, C);
1958
+ }
1959
+ template <typename T, bool conjB> static inline
1960
+ void mulSpectrums_processRow_inplaceA (const T* dataB, T* dataAC, size_t j0, size_t j1)
1961
+ {
1962
+ MUL_SPECTRUMS_ROW (AC, B, AC);
1963
+ }
1964
+ template <typename T, bool conjB, bool inplaceA> static inline
1965
+ void mulSpectrums_processRow (const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1966
+ {
1967
+ if (inplaceA)
1968
+ mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
1969
+ else
1970
+ mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
1971
+ }
1972
+ #undef MUL_SPECTRUMS_ROW
1973
+ #undef VAL
1974
+
1975
+ template <typename T, bool conjB, bool inplaceA> static inline
1976
+ void mulSpectrums_processRows (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
1977
+ {
1978
+ while (rows-- > 0 )
1979
+ {
1980
+ if (is_1d_CN1)
1981
+ dataC[0 ] = dataA[0 ]*dataB[0 ];
1982
+ mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
1983
+ if (is_1d_CN1 && (cols & 1 ) == 0 )
1984
+ dataC[j1] = dataA[j1]*dataB[j1];
1985
+
1986
+ dataA = (const T*)(((char *)dataA) + stepA);
1987
+ dataB = (const T*)(((char *)dataB) + stepB);
1988
+ dataC = (T*)(((char *)dataC) + stepC);
1989
+ }
1990
+ }
1991
+
1992
+
1993
+ template <typename T, bool conjB, bool inplaceA> static inline
1994
+ void mulSpectrums_Impl_ (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
1995
+ {
1996
+ if (!is_1d && isCN1)
1997
+ {
1998
+ mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
1999
+ }
2000
+ mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
2001
+ }
2002
+ template <typename T, bool conjB> static inline
2003
+ void mulSpectrums_Impl (const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
2004
+ {
2005
+ if (dataA == dataC)
2006
+ mulSpectrums_Impl_<T, conjB, true >(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2007
+ else
2008
+ mulSpectrums_Impl_<T, conjB, false >(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2009
+ }
2010
+
2011
+ } // namespace
2012
+
1894
2013
void cv::mulSpectrums ( InputArray _srcA, InputArray _srcB,
1895
2014
OutputArray _dst, int flags, bool conjB )
1896
2015
{
1897
2016
Mat srcA = _srcA.getMat (), srcB = _srcB.getMat ();
1898
2017
int depth = srcA.depth (), cn = srcA.channels (), type = srcA.type ();
1899
- int rows = srcA.rows , cols = srcA.cols ;
1900
- int j, k;
2018
+ size_t rows = srcA.rows , cols = srcA.cols ;
1901
2019
1902
2020
CV_Assert ( type == srcB.type () && srcA.size () == srcB.size () );
1903
2021
CV_Assert ( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
1904
2022
1905
2023
_dst.create ( srcA.rows , srcA.cols , type );
1906
2024
Mat dst = _dst.getMat ();
1907
2025
1908
- bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
1909
- srcA.isContinuous () && srcB.isContinuous () && dst.isContinuous ()));
2026
+ // correct inplace support
2027
+ // Case 'dst.data == srcA.data' is handled by implementation,
2028
+ // because it is used frequently (filter2D, matchTemplate)
2029
+ if (dst.data == srcB.data )
2030
+ srcB = srcB.clone (); // workaround for B only
2031
+
2032
+ bool is_1d = (flags & DFT_ROWS)
2033
+ || (rows == 1 )
2034
+ || (cols == 1 && srcA.isContinuous () && srcB.isContinuous () && dst.isContinuous ());
1910
2035
1911
2036
if ( is_1d && !(flags & DFT_ROWS) )
1912
2037
cols = cols + rows - 1 , rows = 1 ;
1913
2038
1914
- int ncols = cols*cn ;
1915
- int j0 = cn == 1 ;
1916
- int j1 = ncols - (cols % 2 == 0 && cn == 1 );
2039
+ bool isCN1 = cn == 1 ;
2040
+ size_t j0 = isCN1 ? 1 : 0 ;
2041
+ size_t j1 = cols*cn - ((( cols & 1 ) == 0 && cn == 1 ) ? 1 : 0 );
1917
2042
1918
- if ( depth == CV_32F )
2043
+ if ( depth == CV_32F)
1919
2044
{
1920
2045
const float * dataA = (const float *)srcA.data ;
1921
2046
const float * dataB = (const float *)srcB.data ;
1922
2047
float * dataC = (float *)dst.data ;
1923
-
1924
- size_t stepA = srcA.step /sizeof (dataA[0 ]);
1925
- size_t stepB = srcB.step /sizeof (dataB[0 ]);
1926
- size_t stepC = dst.step /sizeof (dataC[0 ]);
1927
-
1928
- if ( !is_1d && cn == 1 )
1929
- {
1930
- for ( k = 0 ; k < (cols % 2 ? 1 : 2 ); k++ )
1931
- {
1932
- if ( k == 1 )
1933
- dataA += cols - 1 , dataB += cols - 1 , dataC += cols - 1 ;
1934
- dataC[0 ] = dataA[0 ]*dataB[0 ];
1935
- if ( rows % 2 == 0 )
1936
- dataC[(rows-1 )*stepC] = dataA[(rows-1 )*stepA]*dataB[(rows-1 )*stepB];
1937
- if ( !conjB )
1938
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
1939
- {
1940
- double re = (double )dataA[j*stepA]*dataB[j*stepB] -
1941
- (double )dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
1942
- double im = (double )dataA[j*stepA]*dataB[(j+1 )*stepB] +
1943
- (double )dataA[(j+1 )*stepA]*dataB[j*stepB];
1944
- dataC[j*stepC] = (float )re; dataC[(j+1 )*stepC] = (float )im;
1945
- }
1946
- else
1947
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
1948
- {
1949
- double re = (double )dataA[j*stepA]*dataB[j*stepB] +
1950
- (double )dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
1951
- double im = (double )dataA[(j+1 )*stepA]*dataB[j*stepB] -
1952
- (double )dataA[j*stepA]*dataB[(j+1 )*stepB];
1953
- dataC[j*stepC] = (float )re; dataC[(j+1 )*stepC] = (float )im;
1954
- }
1955
- if ( k == 1 )
1956
- dataA -= cols - 1 , dataB -= cols - 1 , dataC -= cols - 1 ;
1957
- }
1958
- }
1959
-
1960
- for ( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
1961
- {
1962
- if ( is_1d && cn == 1 )
1963
- {
1964
- dataC[0 ] = dataA[0 ]*dataB[0 ];
1965
- if ( cols % 2 == 0 )
1966
- dataC[j1] = dataA[j1]*dataB[j1];
1967
- }
1968
-
1969
- if ( !conjB )
1970
- for ( j = j0; j < j1; j += 2 )
1971
- {
1972
- double re = (double )dataA[j]*dataB[j] - (double )dataA[j+1 ]*dataB[j+1 ];
1973
- double im = (double )dataA[j+1 ]*dataB[j] + (double )dataA[j]*dataB[j+1 ];
1974
- dataC[j] = (float )re; dataC[j+1 ] = (float )im;
1975
- }
1976
- else
1977
- for ( j = j0; j < j1; j += 2 )
1978
- {
1979
- double re = (double )dataA[j]*dataB[j] + (double )dataA[j+1 ]*dataB[j+1 ];
1980
- double im = (double )dataA[j+1 ]*dataB[j] - (double )dataA[j]*dataB[j+1 ];
1981
- dataC[j] = (float )re; dataC[j+1 ] = (float )im;
1982
- }
1983
- }
2048
+ if (!conjB)
2049
+ mulSpectrums_Impl<float , false >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
2050
+ else
2051
+ mulSpectrums_Impl<float , true >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
1984
2052
}
1985
2053
else
1986
2054
{
1987
2055
const double * dataA = (const double *)srcA.data ;
1988
2056
const double * dataB = (const double *)srcB.data ;
1989
2057
double * dataC = (double *)dst.data ;
1990
-
1991
- size_t stepA = srcA.step /sizeof (dataA[0 ]);
1992
- size_t stepB = srcB.step /sizeof (dataB[0 ]);
1993
- size_t stepC = dst.step /sizeof (dataC[0 ]);
1994
-
1995
- if ( !is_1d && cn == 1 )
1996
- {
1997
- for ( k = 0 ; k < (cols % 2 ? 1 : 2 ); k++ )
1998
- {
1999
- if ( k == 1 )
2000
- dataA += cols - 1 , dataB += cols - 1 , dataC += cols - 1 ;
2001
- dataC[0 ] = dataA[0 ]*dataB[0 ];
2002
- if ( rows % 2 == 0 )
2003
- dataC[(rows-1 )*stepC] = dataA[(rows-1 )*stepA]*dataB[(rows-1 )*stepB];
2004
- if ( !conjB )
2005
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
2006
- {
2007
- double re = dataA[j*stepA]*dataB[j*stepB] -
2008
- dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
2009
- double im = dataA[j*stepA]*dataB[(j+1 )*stepB] +
2010
- dataA[(j+1 )*stepA]*dataB[j*stepB];
2011
- dataC[j*stepC] = re; dataC[(j+1 )*stepC] = im;
2012
- }
2013
- else
2014
- for ( j = 1 ; j <= rows - 2 ; j += 2 )
2015
- {
2016
- double re = dataA[j*stepA]*dataB[j*stepB] +
2017
- dataA[(j+1 )*stepA]*dataB[(j+1 )*stepB];
2018
- double im = dataA[(j+1 )*stepA]*dataB[j*stepB] -
2019
- dataA[j*stepA]*dataB[(j+1 )*stepB];
2020
- dataC[j*stepC] = re; dataC[(j+1 )*stepC] = im;
2021
- }
2022
- if ( k == 1 )
2023
- dataA -= cols - 1 , dataB -= cols - 1 , dataC -= cols - 1 ;
2024
- }
2025
- }
2026
-
2027
- for ( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
2028
- {
2029
- if ( is_1d && cn == 1 )
2030
- {
2031
- dataC[0 ] = dataA[0 ]*dataB[0 ];
2032
- if ( cols % 2 == 0 )
2033
- dataC[j1] = dataA[j1]*dataB[j1];
2034
- }
2035
-
2036
- if ( !conjB )
2037
- for ( j = j0; j < j1; j += 2 )
2038
- {
2039
- double re = dataA[j]*dataB[j] - dataA[j+1 ]*dataB[j+1 ];
2040
- double im = dataA[j+1 ]*dataB[j] + dataA[j]*dataB[j+1 ];
2041
- dataC[j] = re; dataC[j+1 ] = im;
2042
- }
2043
- else
2044
- for ( j = j0; j < j1; j += 2 )
2045
- {
2046
- double re = dataA[j]*dataB[j] + dataA[j+1 ]*dataB[j+1 ];
2047
- double im = dataA[j+1 ]*dataB[j] - dataA[j]*dataB[j+1 ];
2048
- dataC[j] = re; dataC[j+1 ] = im;
2049
- }
2050
- }
2058
+ if (!conjB)
2059
+ mulSpectrums_Impl<double , false >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
2060
+ else
2061
+ mulSpectrums_Impl<double , true >(dataA, dataB, dataC, srcA.step , srcB.step , dst.step , rows, cols, j0, j1, is_1d, isCN1);
2051
2062
}
2052
2063
}
2053
2064
0 commit comments