Skip to content

Commit af0ee30

Browse files
committed
Merge pull request opencv#7819 from alalek:fix_mulSpectrums_inplace_2.4
2 parents ffcf866 + 7bceef7 commit af0ee30

File tree

4 files changed

+179
-165
lines changed

4 files changed

+179
-165
lines changed

cmake/OpenCVCompilerOptions.cmake

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,13 @@ if(MSVC)
341341
string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
342342
string(REPLACE "/W3" "/W4" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
343343

344-
if(NOT ENABLE_NOISY_WARNINGS AND MSVC_VERSION EQUAL 1400)
345-
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267)
344+
if(NOT ENABLE_NOISY_WARNINGS)
345+
if(MSVC_VERSION EQUAL 1400)
346+
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4510 /wd4610 /wd4312 /wd4201 /wd4244 /wd4328 /wd4267)
347+
endif()
348+
if(MSVC_VERSION LESS 1900) # MSVS2015
349+
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127) # warning C4127: conditional expression is constant
350+
endif()
346351
endif()
347352

348353
# allow extern "C" functions throw exceptions

modules/core/src/dxt.cpp

Lines changed: 141 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,163 +1891,174 @@ void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
18911891
dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
18921892
}
18931893

1894+
namespace {
1895+
1896+
#define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
1897+
#define MUL_SPECTRUMS_COL(A, B, C) \
1898+
VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
1899+
for (size_t j = 1; j <= rows - 2; j += 2) \
1900+
{ \
1901+
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
1902+
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
1903+
if (conjB) b_im = -b_im; \
1904+
double c_re = a_re * b_re - a_im * b_im; \
1905+
double c_im = a_re * b_im + a_im * b_re; \
1906+
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
1907+
} \
1908+
if ((rows & 1) == 0) \
1909+
VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
1910+
1911+
template <typename T, bool conjB> static inline
1912+
void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1913+
{
1914+
MUL_SPECTRUMS_COL(A, B, C);
1915+
}
1916+
1917+
template <typename T, bool conjB> static inline
1918+
void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
1919+
{
1920+
MUL_SPECTRUMS_COL(AC, B, AC);
1921+
}
1922+
template <typename T, bool conjB, bool inplaceA> static inline
1923+
void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
1924+
{
1925+
if (inplaceA)
1926+
mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
1927+
else
1928+
mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1929+
}
1930+
#undef MUL_SPECTRUMS_COL
1931+
#undef VAL
1932+
1933+
template <typename T, bool conjB, bool inplaceA> static inline
1934+
void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
1935+
{
1936+
mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
1937+
if ((cols & 1) == 0)
1938+
{
1939+
mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
1940+
}
1941+
}
1942+
1943+
#define VAL(buf, elem) (data ## buf[(elem)])
1944+
#define MUL_SPECTRUMS_ROW(A, B, C) \
1945+
for (size_t j = j0; j < j1; j += 2) \
1946+
{ \
1947+
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
1948+
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
1949+
if (conjB) b_im = -b_im; \
1950+
double c_re = a_re * b_re - a_im * b_im; \
1951+
double c_im = a_re * b_im + a_im * b_re; \
1952+
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
1953+
}
1954+
template <typename T, bool conjB> static inline
1955+
void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1956+
{
1957+
MUL_SPECTRUMS_ROW(A, B, C);
1958+
}
1959+
template <typename T, bool conjB> static inline
1960+
void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
1961+
{
1962+
MUL_SPECTRUMS_ROW(AC, B, AC);
1963+
}
1964+
template <typename T, bool conjB, bool inplaceA> static inline
1965+
void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
1966+
{
1967+
if (inplaceA)
1968+
mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
1969+
else
1970+
mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
1971+
}
1972+
#undef MUL_SPECTRUMS_ROW
1973+
#undef VAL
1974+
1975+
template <typename T, bool conjB, bool inplaceA> static inline
1976+
void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
1977+
{
1978+
while (rows-- > 0)
1979+
{
1980+
if (is_1d_CN1)
1981+
dataC[0] = dataA[0]*dataB[0];
1982+
mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
1983+
if (is_1d_CN1 && (cols & 1) == 0)
1984+
dataC[j1] = dataA[j1]*dataB[j1];
1985+
1986+
dataA = (const T*)(((char*)dataA) + stepA);
1987+
dataB = (const T*)(((char*)dataB) + stepB);
1988+
dataC = (T*)(((char*)dataC) + stepC);
1989+
}
1990+
}
1991+
1992+
1993+
template <typename T, bool conjB, bool inplaceA> static inline
1994+
void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
1995+
{
1996+
if (!is_1d && isCN1)
1997+
{
1998+
mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
1999+
}
2000+
mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
2001+
}
2002+
template <typename T, bool conjB> static inline
2003+
void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
2004+
{
2005+
if (dataA == dataC)
2006+
mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2007+
else
2008+
mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
2009+
}
2010+
2011+
} // namespace
2012+
18942013
void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
18952014
OutputArray _dst, int flags, bool conjB )
18962015
{
18972016
Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
18982017
int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
1899-
int rows = srcA.rows, cols = srcA.cols;
1900-
int j, k;
2018+
size_t rows = srcA.rows, cols = srcA.cols;
19012019

19022020
CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
19032021
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
19042022

19052023
_dst.create( srcA.rows, srcA.cols, type );
19062024
Mat dst = _dst.getMat();
19072025

1908-
bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
1909-
srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
2026+
// correct inplace support
2027+
// Case 'dst.data == srcA.data' is handled by implementation,
2028+
// because it is used frequently (filter2D, matchTemplate)
2029+
if (dst.data == srcB.data)
2030+
srcB = srcB.clone(); // workaround for B only
2031+
2032+
bool is_1d = (flags & DFT_ROWS)
2033+
|| (rows == 1)
2034+
|| (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
19102035

19112036
if( is_1d && !(flags & DFT_ROWS) )
19122037
cols = cols + rows - 1, rows = 1;
19132038

1914-
int ncols = cols*cn;
1915-
int j0 = cn == 1;
1916-
int j1 = ncols - (cols % 2 == 0 && cn == 1);
2039+
bool isCN1 = cn == 1;
2040+
size_t j0 = isCN1 ? 1 : 0;
2041+
size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
19172042

1918-
if( depth == CV_32F )
2043+
if (depth == CV_32F)
19192044
{
19202045
const float* dataA = (const float*)srcA.data;
19212046
const float* dataB = (const float*)srcB.data;
19222047
float* dataC = (float*)dst.data;
1923-
1924-
size_t stepA = srcA.step/sizeof(dataA[0]);
1925-
size_t stepB = srcB.step/sizeof(dataB[0]);
1926-
size_t stepC = dst.step/sizeof(dataC[0]);
1927-
1928-
if( !is_1d && cn == 1 )
1929-
{
1930-
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
1931-
{
1932-
if( k == 1 )
1933-
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
1934-
dataC[0] = dataA[0]*dataB[0];
1935-
if( rows % 2 == 0 )
1936-
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
1937-
if( !conjB )
1938-
for( j = 1; j <= rows - 2; j += 2 )
1939-
{
1940-
double re = (double)dataA[j*stepA]*dataB[j*stepB] -
1941-
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
1942-
double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
1943-
(double)dataA[(j+1)*stepA]*dataB[j*stepB];
1944-
dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
1945-
}
1946-
else
1947-
for( j = 1; j <= rows - 2; j += 2 )
1948-
{
1949-
double re = (double)dataA[j*stepA]*dataB[j*stepB] +
1950-
(double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
1951-
double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
1952-
(double)dataA[j*stepA]*dataB[(j+1)*stepB];
1953-
dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
1954-
}
1955-
if( k == 1 )
1956-
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
1957-
}
1958-
}
1959-
1960-
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
1961-
{
1962-
if( is_1d && cn == 1 )
1963-
{
1964-
dataC[0] = dataA[0]*dataB[0];
1965-
if( cols % 2 == 0 )
1966-
dataC[j1] = dataA[j1]*dataB[j1];
1967-
}
1968-
1969-
if( !conjB )
1970-
for( j = j0; j < j1; j += 2 )
1971-
{
1972-
double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
1973-
double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
1974-
dataC[j] = (float)re; dataC[j+1] = (float)im;
1975-
}
1976-
else
1977-
for( j = j0; j < j1; j += 2 )
1978-
{
1979-
double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
1980-
double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
1981-
dataC[j] = (float)re; dataC[j+1] = (float)im;
1982-
}
1983-
}
2048+
if (!conjB)
2049+
mulSpectrums_Impl<float, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
2050+
else
2051+
mulSpectrums_Impl<float, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
19842052
}
19852053
else
19862054
{
19872055
const double* dataA = (const double*)srcA.data;
19882056
const double* dataB = (const double*)srcB.data;
19892057
double* dataC = (double*)dst.data;
1990-
1991-
size_t stepA = srcA.step/sizeof(dataA[0]);
1992-
size_t stepB = srcB.step/sizeof(dataB[0]);
1993-
size_t stepC = dst.step/sizeof(dataC[0]);
1994-
1995-
if( !is_1d && cn == 1 )
1996-
{
1997-
for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
1998-
{
1999-
if( k == 1 )
2000-
dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
2001-
dataC[0] = dataA[0]*dataB[0];
2002-
if( rows % 2 == 0 )
2003-
dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
2004-
if( !conjB )
2005-
for( j = 1; j <= rows - 2; j += 2 )
2006-
{
2007-
double re = dataA[j*stepA]*dataB[j*stepB] -
2008-
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2009-
double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
2010-
dataA[(j+1)*stepA]*dataB[j*stepB];
2011-
dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
2012-
}
2013-
else
2014-
for( j = 1; j <= rows - 2; j += 2 )
2015-
{
2016-
double re = dataA[j*stepA]*dataB[j*stepB] +
2017-
dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
2018-
double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
2019-
dataA[j*stepA]*dataB[(j+1)*stepB];
2020-
dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
2021-
}
2022-
if( k == 1 )
2023-
dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
2024-
}
2025-
}
2026-
2027-
for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
2028-
{
2029-
if( is_1d && cn == 1 )
2030-
{
2031-
dataC[0] = dataA[0]*dataB[0];
2032-
if( cols % 2 == 0 )
2033-
dataC[j1] = dataA[j1]*dataB[j1];
2034-
}
2035-
2036-
if( !conjB )
2037-
for( j = j0; j < j1; j += 2 )
2038-
{
2039-
double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
2040-
double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
2041-
dataC[j] = re; dataC[j+1] = im;
2042-
}
2043-
else
2044-
for( j = j0; j < j1; j += 2 )
2045-
{
2046-
double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
2047-
double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
2048-
dataC[j] = re; dataC[j+1] = im;
2049-
}
2050-
}
2058+
if (!conjB)
2059+
mulSpectrums_Impl<double, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
2060+
else
2061+
mulSpectrums_Impl<double, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
20512062
}
20522063
}
20532064

0 commit comments

Comments
 (0)