|
5 | 5 | #ifndef OPENCV_HAL_VSX_UTILS_HPP
|
6 | 6 | #define OPENCV_HAL_VSX_UTILS_HPP
|
7 | 7 |
|
| 8 | +#if defined(__PPC64__) && defined(__power9__) |
| 9 | +#define CV_VSX_HAS_FLOAT64_CONVERT 0 |
| 10 | +#else |
| 11 | +#define CV_VSX_HAS_FLOAT64_CONVERT 1 |
| 12 | +#endif |
| 13 | + |
8 | 14 | #include "opencv2/core/cvdef.h"
|
9 | 15 |
|
10 | 16 | #ifndef SKIP_INCLUDES
|
@@ -257,8 +263,27 @@ VSX_IMPL_1VRG(vec_udword2, vec_udword2, vpopcntd, vec_popcntu)
|
257 | 263 | VSX_IMPL_1VRG(vec_udword2, vec_dword2, vpopcntd, vec_popcntu)
|
258 | 264 |
|
259 | 265 | // converts between single and double-precision
|
260 |
| -VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, vec_floate) |
261 |
| -VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, vec_doubleo) |
| 266 | + |
| 267 | +#if CV_VSX_HAS_FLOAT64_CONVERT |
| 268 | +// Use VSX double<->float conversion instructions (if supported by the architecture) |
| 269 | + VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, vec_floate) |
| 270 | + VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, vec_doubleo) |
| 271 | +#else |
| 272 | +// Fallback: implement vec_cvfo using scalar operations (to ensure successful linking) |
| 273 | + static inline vec_float4 vec_cvfo(const vec_double2& a) |
| 274 | + { |
| 275 | + float r0 = static_cast<float>(reinterpret_cast<const double*>(&a)[0]); |
| 276 | + float r1 = static_cast<float>(reinterpret_cast<const double*>(&a)[1]); |
| 277 | + return (vec_float4){r0, r1, 0.f, 0.f}; |
| 278 | + } |
| 279 | + |
| 280 | + static inline vec_double2 vec_cvfo(const vec_float4& a) |
| 281 | + { |
| 282 | + double r0 = static_cast<double>(reinterpret_cast<const float*>(&a)[0]); |
| 283 | + double r1 = static_cast<double>(reinterpret_cast<const float*>(&a)[1]); |
| 284 | + return (vec_double2){r0, r1}; |
| 285 | + } |
| 286 | +#endif |
262 | 287 |
|
263 | 288 | // converts word and doubleword to double-precision
|
264 | 289 | #undef vec_ctd
|
|
0 commit comments