Skip to content

Commit 1e6ce1d

Browse files
committed
core(mathfuncs_core): cpu optimization dispatched code
1 parent 17e5e4c commit 1e6ce1d

File tree

3 files changed

+252
-157
lines changed

3 files changed

+252
-157
lines changed

modules/core/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
set(the_description "The Core Functionality")
2+
3+
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
4+
25
ocv_add_module(core
36
"${OPENCV_HAL_LINKER_LIBS}"
47
OPTIONAL opencv_cudev
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// This file is part of OpenCV project.
2+
// It is subject to the license terms in the LICENSE file found in the top-level directory
3+
// of this distribution and at http://opencv.org/license.html.
4+
5+
#include "precomp.hpp"
6+
7+
#include "mathfuncs_core.simd.hpp"
8+
#include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
9+
10+
namespace cv { namespace hal {
11+
12+
///////////////////////////////////// ATAN2 ////////////////////////////////////
13+
14+
void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
15+
{
16+
CV_INSTRUMENT_REGION()
17+
18+
CALL_HAL(fastAtan32f, cv_hal_fastAtan32f, Y, X, angle, len, angleInDegrees);
19+
20+
CV_CPU_DISPATCH(fastAtan32f, (Y, X, angle, len, angleInDegrees),
21+
CV_CPU_DISPATCH_MODES_ALL);
22+
}
23+
24+
void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees)
25+
{
26+
CV_INSTRUMENT_REGION()
27+
28+
CALL_HAL(fastAtan64f, cv_hal_fastAtan64f, Y, X, angle, len, angleInDegrees);
29+
30+
CV_CPU_DISPATCH(fastAtan64f, (Y, X, angle, len, angleInDegrees),
31+
CV_CPU_DISPATCH_MODES_ALL);
32+
}
33+
34+
// deprecated
35+
void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
36+
{
37+
CV_INSTRUMENT_REGION()
38+
39+
fastAtan32f(Y, X, angle, len, angleInDegrees);
40+
}
41+
42+
void magnitude32f(const float* x, const float* y, float* mag, int len)
43+
{
44+
CV_INSTRUMENT_REGION()
45+
46+
CALL_HAL(magnitude32f, cv_hal_magnitude32f, x, y, mag, len);
47+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_32f, x, y, mag, len) >= 0);
48+
49+
CV_CPU_DISPATCH(magnitude32f, (x, y, mag, len),
50+
CV_CPU_DISPATCH_MODES_ALL);
51+
}
52+
53+
void magnitude64f(const double* x, const double* y, double* mag, int len)
54+
{
55+
CV_INSTRUMENT_REGION()
56+
57+
CALL_HAL(magnitude64f, cv_hal_magnitude64f, x, y, mag, len);
58+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsMagnitude_64f, x, y, mag, len) >= 0);
59+
60+
CV_CPU_DISPATCH(magnitude64f, (x, y, mag, len),
61+
CV_CPU_DISPATCH_MODES_ALL);
62+
}
63+
64+
65+
void invSqrt32f(const float* src, float* dst, int len)
66+
{
67+
CV_INSTRUMENT_REGION()
68+
69+
CALL_HAL(invSqrt32f, cv_hal_invSqrt32f, src, dst, len);
70+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_32f_A21, src, dst, len) >= 0);
71+
72+
CV_CPU_DISPATCH(invSqrt32f, (src, dst, len),
73+
CV_CPU_DISPATCH_MODES_ALL);
74+
}
75+
76+
77+
void invSqrt64f(const double* src, double* dst, int len)
78+
{
79+
CV_INSTRUMENT_REGION()
80+
81+
CALL_HAL(invSqrt64f, cv_hal_invSqrt64f, src, dst, len);
82+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsInvSqrt_64f_A50, src, dst, len) >= 0);
83+
84+
CV_CPU_DISPATCH(invSqrt64f, (src, dst, len),
85+
CV_CPU_DISPATCH_MODES_ALL);
86+
}
87+
88+
89+
void sqrt32f(const float* src, float* dst, int len)
90+
{
91+
CV_INSTRUMENT_REGION()
92+
93+
CALL_HAL(sqrt32f, cv_hal_sqrt32f, src, dst, len);
94+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_32f_A21, src, dst, len) >= 0);
95+
96+
CV_CPU_DISPATCH(sqrt32f, (src, dst, len),
97+
CV_CPU_DISPATCH_MODES_ALL);
98+
}
99+
100+
101+
void sqrt64f(const double* src, double* dst, int len)
102+
{
103+
CV_INSTRUMENT_REGION()
104+
105+
CALL_HAL(sqrt64f, cv_hal_sqrt64f, src, dst, len);
106+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsSqrt_64f_A50, src, dst, len) >= 0);
107+
108+
CV_CPU_DISPATCH(sqrt64f, (src, dst, len),
109+
CV_CPU_DISPATCH_MODES_ALL);
110+
}
111+
112+
void exp32f(const float *src, float *dst, int n)
113+
{
114+
CV_INSTRUMENT_REGION()
115+
116+
CALL_HAL(exp32f, cv_hal_exp32f, src, dst, n);
117+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_32f_A21, src, dst, n) >= 0);
118+
119+
CV_CPU_DISPATCH(exp32f, (src, dst, n),
120+
CV_CPU_DISPATCH_MODES_ALL);
121+
}
122+
123+
void exp64f(const double *src, double *dst, int n)
124+
{
125+
CV_INSTRUMENT_REGION()
126+
127+
CALL_HAL(exp64f, cv_hal_exp64f, src, dst, n);
128+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsExp_64f_A50, src, dst, n) >= 0);
129+
130+
CV_CPU_DISPATCH(exp64f, (src, dst, n),
131+
CV_CPU_DISPATCH_MODES_ALL);
132+
}
133+
134+
void log32f(const float *src, float *dst, int n)
135+
{
136+
CV_INSTRUMENT_REGION()
137+
138+
CALL_HAL(log32f, cv_hal_log32f, src, dst, n);
139+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_32f_A21, src, dst, n) >= 0);
140+
141+
CV_CPU_DISPATCH(log32f, (src, dst, n),
142+
CV_CPU_DISPATCH_MODES_ALL);
143+
}
144+
145+
void log64f(const double *src, double *dst, int n)
146+
{
147+
CV_INSTRUMENT_REGION()
148+
149+
CALL_HAL(log64f, cv_hal_log64f, src, dst, n);
150+
CV_IPP_RUN_FAST(CV_INSTRUMENT_FUN_IPP(ippsLn_64f_A50, src, dst, n) >= 0);
151+
152+
CV_CPU_DISPATCH(log64f, (src, dst, n),
153+
CV_CPU_DISPATCH_MODES_ALL);
154+
}
155+
156+
//=============================================================================
157+
// for compatibility with 3.0
158+
159+
void exp(const float* src, float* dst, int n)
160+
{
161+
exp32f(src, dst, n);
162+
}
163+
164+
void exp(const double* src, double* dst, int n)
165+
{
166+
exp64f(src, dst, n);
167+
}
168+
169+
void log(const float* src, float* dst, int n)
170+
{
171+
log32f(src, dst, n);
172+
}
173+
174+
void log(const double* src, double* dst, int n)
175+
{
176+
log64f(src, dst, n);
177+
}
178+
179+
void magnitude(const float* x, const float* y, float* dst, int n)
180+
{
181+
magnitude32f(x, y, dst, n);
182+
}
183+
184+
void magnitude(const double* x, const double* y, double* dst, int n)
185+
{
186+
magnitude64f(x, y, dst, n);
187+
}
188+
189+
void sqrt(const float* src, float* dst, int len)
190+
{
191+
sqrt32f(src, dst, len);
192+
}
193+
194+
void sqrt(const double* src, double* dst, int len)
195+
{
196+
sqrt64f(src, dst, len);
197+
}
198+
199+
void invSqrt(const float* src, float* dst, int len)
200+
{
201+
invSqrt32f(src, dst, len);
202+
}
203+
204+
void invSqrt(const double* src, double* dst, int len)
205+
{
206+
invSqrt64f(src, dst, len);
207+
}
208+
209+
}} // namespace cv::hal::
210+
211+
float cv::fastAtan2( float y, float x )
212+
{
213+
using namespace cv::hal;
214+
CV_CPU_CALL_BASELINE(fastAtan2, (y, x));
215+
}

0 commit comments

Comments
 (0)