Skip to content

Commit 6946f51

Browse files
committed
mulSpectrums: refactor code
1 parent 62c9ff2 commit 6946f51

File tree

1 file changed

+138
-133
lines changed

1 file changed

+138
-133
lines changed

modules/core/src/dxt.cpp

Lines changed: 138 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,13 +1891,131 @@ void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
18911891
dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
18921892
}
18931893

1894+
namespace {
1895+
1896+
#define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
1897+
#define MUL_SPECTRUMS_COL(A, B, C) \
1898+
VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
1899+
for (size_t j = 1; j <= rows - 2; j += 2) \
1900+
{ \
1901+
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
1902+
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
1903+
if (conjB) b_im = -b_im; \
1904+
double c_re = a_re * b_re - a_im * b_im; \
1905+
double c_im = a_re * b_im + a_im * b_re; \
1906+
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
1907+
} \
1908+
if ((rows & 1) == 0) \
1909+
VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
1910+
1911+
template <typename T, bool conjB> static inline
1912+
void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1913+
{
1914+
MUL_SPECTRUMS_COL(A, B, C);
1915+
}
1916+
1917+
template <typename T, bool conjB> static inline
1918+
void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
1919+
{
1920+
MUL_SPECTRUMS_COL(AC, B, AC);
1921+
}
1922+
template <typename T, bool conjB, bool inplaceA> static inline
1923+
void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1924+
{
1925+
if (inplaceA)
1926+
mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
1927+
else
1928+
mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1929+
}
1930+
#undef MUL_SPECTRUMS_COL
1931+
#undef VAL
1932+
1933+
template <typename T, bool conjB, bool inplaceA> static inline
1934+
void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
1935+
{
1936+
mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1937+
if ((cols & 1) == 0)
1938+
{
1939+
mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
1940+
}
1941+
}
1942+
1943+
#define VAL(buf, elem) (data ## buf[(elem)])
1944+
#define MUL_SPECTRUMS_ROW(A, B, C) \
1945+
for (size_t j = j0; j < j1; j += 2) \
1946+
{ \
1947+
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
1948+
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
1949+
if (conjB) b_im = -b_im; \
1950+
double c_re = a_re * b_re - a_im * b_im; \
1951+
double c_im = a_re * b_im + a_im * b_re; \
1952+
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
1953+
}
1954+
template <typename T, bool conjB> static inline
1955+
void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1956+
{
1957+
MUL_SPECTRUMS_ROW(A, B, C);
1958+
}
1959+
template <typename T, bool conjB> static inline
1960+
void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
1961+
{
1962+
MUL_SPECTRUMS_ROW(AC, B, AC);
1963+
}
1964+
template <typename T, bool conjB, bool inplaceA> static inline
1965+
void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1966+
{
1967+
if (inplaceA)
1968+
mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
1969+
else
1970+
mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
1971+
}
1972+
#undef MUL_SPECTRUMS_ROW
1973+
#undef VAL
1974+
1975+
template <typename T, bool conjB, bool inplaceA> static inline
1976+
void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
1977+
{
1978+
while (rows-- > 0)
1979+
{
1980+
if (is_1d_CN1)
1981+
dataC[0] = dataA[0]*dataB[0];
1982+
mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
1983+
if (is_1d_CN1 && (cols & 1) == 0)
1984+
dataC[j1] = dataA[j1]*dataB[j1];
1985+
1986+
dataA = (const T*)(((char*)dataA) + stepA);
1987+
dataB = (const T*)(((char*)dataB) + stepB);
1988+
dataC = (T*)(((char*)dataC) + stepC);
1989+
}
1990+
}
1991+
1992+
1993+
template <typename T, bool conjB, bool inplaceA> static inline
1994+
void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
1995+
{
1996+
if (!is_1d && isCN1)
1997+
{
1998+
mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
1999+
}
2000+
mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
2001+
}
2002+
template <typename T, bool conjB> static inline
2003+
void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
2004+
{
2005+
if (dataA == dataC)
2006+
mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2007+
else
2008+
mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2009+
}
2010+
2011+
} // namespace
2012+
18942013
void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
18952014
OutputArray _dst, int flags, bool conjB )
18962015
{
18972016
Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
18982017
int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
1899-
int rows = srcA.rows, cols = srcA.cols;
1900-
int j, k;
2018+
size_t rows = srcA.rows, cols = srcA.cols;
19012019

19022020
CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
19032021
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
@@ -1906,154 +2024,41 @@ void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
19062024
Mat dst = _dst.getMat();
19072025

19082026
// correct inplace support
1909-
if (dst.data == srcA.data)
1910-
srcA = srcA.clone();
2027+
// Case 'dst.data == srcA.data' is handled by implementation,
2028+
// because it is used frequently (filter2D, matchTemplate)
19112029
if (dst.data == srcB.data)
1912-
srcB = srcB.clone();
2030+
srcB = srcB.clone(); // workaround for B only
19132031

1914-
bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
1915-
srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
2032+
bool is_1d = (flags & DFT_ROWS)
2033+
|| (rows == 1)
2034+
|| (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
19162035

19172036
if( is_1d && !(flags & DFT_ROWS) )
19182037
cols = cols + rows - 1, rows = 1;
19192038

1920-
int ncols = cols*cn;
1921-
int j0 = cn == 1;
1922-
int j1 = ncols - (cols % 2 == 0 && cn == 1);
2039+
bool isCN1 = cn == 1;
2040+
size_t j0 = isCN1 ? 1 : 0;
2041+
size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
19232042

1924-
if( depth == CV_32F )
2043+
if (depth == CV_32F)
19252044
{
19262045
const float* dataA = (const float*)srcA.data;
19272046
const float* dataB = (const float*)srcB.data;
19282047
float* dataC = (float*)dst.data;
1929-
1930-
size_t stepA = srcA.step/sizeof(dataA[0]);
1931-
size_t stepB = srcB.step/sizeof(dataB[0]);
1932-
size_t stepC = dst.step/sizeof(dataC[0]);
1933-
1934-
if( !is_1d && cn == 1 )
1935-
{
1936-
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
1937-
{
1938-
if( k == 1 )
1939-
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
1940-
dataC[0] = dataA[0]*dataB[0];
1941-
if( rows % 2 == 0 )
1942-
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
1943-
if( !conjB )
1944-
for( j = 1; j <= rows - 2; j += 2 )
1945-
{
1946-
double re = (double)dataA[j*stepA]*dataB[j*stepB] -
1947-
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
1948-
double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
1949-
(double)dataA[(j+1)*stepA]*dataB[j*stepB];
1950-
dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
1951-
}
1952-
else
1953-
for( j = 1; j <= rows - 2; j += 2 )
1954-
{
1955-
double re = (double)dataA[j*stepA]*dataB[j*stepB] +
1956-
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
1957-
double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
1958-
(double)dataA[j*stepA]*dataB[(j+1)*stepB];
1959-
dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
1960-
}
1961-
if( k == 1 )
1962-
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
1963-
}
1964-
}
1965-
1966-
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
1967-
{
1968-
if( is_1d && cn == 1 )
1969-
{
1970-
dataC[0] = dataA[0]*dataB[0];
1971-
if( cols % 2 == 0 )
1972-
dataC[j1] = dataA[j1]*dataB[j1];
1973-
}
1974-
1975-
if( !conjB )
1976-
for( j = j0; j < j1; j += 2 )
1977-
{
1978-
double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
1979-
double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
1980-
dataC[j] = (float)re; dataC[j+1] = (float)im;
1981-
}
1982-
else
1983-
for( j = j0; j < j1; j += 2 )
1984-
{
1985-
double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
1986-
double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
1987-
dataC[j] = (float)re; dataC[j+1] = (float)im;
1988-
}
1989-
}
2048+
if (!conjB)
2049+
mulSpectrums_Impl<float, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
2050+
else
2051+
mulSpectrums_Impl<float, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
19902052
}
19912053
else
19922054
{
19932055
const double* dataA = (const double*)srcA.data;
19942056
const double* dataB = (const double*)srcB.data;
19952057
double* dataC = (double*)dst.data;
1996-
1997-
size_t stepA = srcA.step/sizeof(dataA[0]);
1998-
size_t stepB = srcB.step/sizeof(dataB[0]);
1999-
size_t stepC = dst.step/sizeof(dataC[0]);
2000-
2001-
if( !is_1d && cn == 1 )
2002-
{
2003-
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
2004-
{
2005-
if( k == 1 )
2006-
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
2007-
dataC[0] = dataA[0]*dataB[0];
2008-
if( rows % 2 == 0 )
2009-
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
2010-
if( !conjB )
2011-
for( j = 1; j <= rows - 2; j += 2 )
2012-
{
2013-
double re = dataA[j*stepA]*dataB[j*stepB] -
2014-
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2015-
double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
2016-
dataA[(j+1)*stepA]*dataB[j*stepB];
2017-
dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
2018-
}
2019-
else
2020-
for( j = 1; j <= rows - 2; j += 2 )
2021-
{
2022-
double re = dataA[j*stepA]*dataB[j*stepB] +
2023-
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2024-
double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
2025-
dataA[j*stepA]*dataB[(j+1)*stepB];
2026-
dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
2027-
}
2028-
if( k == 1 )
2029-
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
2030-
}
2031-
}
2032-
2033-
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
2034-
{
2035-
if( is_1d && cn == 1 )
2036-
{
2037-
dataC[0] = dataA[0]*dataB[0];
2038-
if( cols % 2 == 0 )
2039-
dataC[j1] = dataA[j1]*dataB[j1];
2040-
}
2041-
2042-
if( !conjB )
2043-
for( j = j0; j < j1; j += 2 )
2044-
{
2045-
double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
2046-
double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
2047-
dataC[j] = re; dataC[j+1] = im;
2048-
}
2049-
else
2050-
for( j = j0; j < j1; j += 2 )
2051-
{
2052-
double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
2053-
double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
2054-
dataC[j] = re; dataC[j+1] = im;
2055-
}
2056-
}
2058+
if (!conjB)
2059+
mulSpectrums_Impl<double, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
2060+
else
2061+
mulSpectrums_Impl<double, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
20572062
}
20582063
}
20592064

0 commit comments

Comments
 (0)