Skip to content

Commit 7d55c09

Browse files
committed
Merge pull request opencv#9763 from seiko2plus:addVsxCore
2 parents 0be1f4a + 4b968d1 commit 7d55c09

15 files changed

+1974
-3
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ OCV_OPTION(ENABLE_PROFILING "Enable profiling in the GCC compiler (Add
298298
OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" OFF IF CMAKE_COMPILER_IS_GNUCXX )
299299
OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) )
300300
OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) )
301+
OCV_OPTION(ENABLE_VSX "Enable POWER8 and above VSX (64-bit little-endian)" ON IF (CMAKE_COMPILER_IS_GNUCXX AND PPC64LE) )
301302
OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
302303
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" (NEON OR ANDROID_ARM_NEON OR AARCH64) IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )
303304
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) )

cmake/OpenCVCompilerOptimizations.cmake

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512
3030
list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16)
31+
list(APPEND CPU_ALL_OPTIMIZATIONS VSX)
3132
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
3233

3334
ocv_update(CPU_VFPV3_FEATURE_ALIAS "")
@@ -79,6 +80,7 @@ ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON)
7980
ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF)
8081
ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF)
8182

83+
ocv_optimization_process_obsolete_option(ENABLE_VSX VSX OFF)
8284

8385
macro(ocv_is_optimization_in_list resultvar check_opt)
8486
set(__checked "")
@@ -266,6 +268,15 @@ elseif(ARM OR AARCH64)
266268
ocv_update(CPU_FP16_IMPLIES "NEON")
267269
set(CPU_BASELINE "NEON;FP16" CACHE STRING "${HELP_CPU_BASELINE}")
268270
endif()
271+
elseif(PPC64LE)
272+
ocv_update(CPU_KNOWN_OPTIMIZATIONS "VSX")
273+
ocv_update(CPU_VSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_vsx.cpp")
274+
275+
if(CMAKE_COMPILER_IS_CLANGCXX AND (NOT ${CMAKE_CXX_COMPILER} MATCHES "xlc"))
276+
ocv_update(CPU_VSX_FLAGS_ON "-mvsx -maltivec")
277+
else()
278+
ocv_update(CPU_VSX_FLAGS_ON "-mcpu=power8")
279+
endif()
269280
endif()
270281

271282
# Helper values for cmake-gui

cmake/OpenCVDetectCXXCompiler.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
7272
set(ARM 1)
7373
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
7474
set(AARCH64 1)
75+
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^ppc64le.*|PPC64LE.*")
76+
set(PPC64LE 1)
7577
endif()
7678

7779
# Workaround for 32-bit operating systems on 64-bit x86_64 processor

cmake/OpenCVPackaging.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ elseif(ARM)
3131
elseif(AARCH64)
3232
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "arm64")
3333
set(CPACK_RPM_PACKAGE_ARCHITECTURE "aarch64")
34+
elseif(PPC64LE)
35+
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "ppc64el")
36+
set(CPACK_RPM_PACKAGE_ARCHITECTURE "ppc64le")
3437
else()
3538
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
3639
set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
@@ -164,4 +167,4 @@ endif(NOT OPENCV_CUSTOM_PACKAGE_INFO)
164167

165168
include(CPack)
166169

167-
ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
170+
ENDif(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")

cmake/checks/cpu_vsx.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# if defined(__VSX__)
2+
# include <altivec.h>
3+
# else
4+
# error "VSX is not supported"
5+
# endif
6+
7+
int main()
8+
{
9+
__vector float testF = vec_splats(0.f);
10+
testF = vec_madd(testF, testF, testF);
11+
return 0;
12+
}

modules/core/include/opencv2/core/base.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,5 +740,6 @@ CV_EXPORTS_W void setUseIPP_NE(bool flag);
740740
} // cv
741741

742742
#include "opencv2/core/neon_utils.hpp"
743+
#include "opencv2/core/vsx_utils.hpp"
743744

744745
#endif //OPENCV_CORE_BASE_HPP

modules/core/include/opencv2/core/cv_cpu_dispatch.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@
9999
# include <arm_neon.h>
100100
#endif
101101

102+
#if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
103+
# include <altivec.h>
104+
# undef vector
105+
# undef pixel
106+
# undef bool
107+
# define CV_VSX 1
108+
#endif
109+
102110
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
103111

104112
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
@@ -135,6 +143,12 @@ struct VZeroUpperGuard {
135143
#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
136144
# include <arm_neon.h>
137145
# define CV_NEON 1
146+
#elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
147+
# include <altivec.h>
148+
# undef vector
149+
# undef pixel
150+
# undef bool
151+
# define CV_VSX 1
138152
#endif
139153

140154
#endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
@@ -208,3 +222,7 @@ struct VZeroUpperGuard {
208222
#ifndef CV_NEON
209223
# define CV_NEON 0
210224
#endif
225+
226+
#ifndef CV_VSX
227+
# define CV_VSX 0
228+
#endif

modules/core/include/opencv2/core/cv_cpu_helper.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,5 +180,20 @@
180180
#endif
181181
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
182182

183+
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
184+
# define CV_TRY_VSX 1
185+
# define CV_CPU_HAS_SUPPORT_VSX 1
186+
# define CV_CPU_CALL_VSX(fn, args) return (opt_VSX::fn args)
187+
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
188+
# define CV_TRY_VSX 1
189+
# define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
190+
# define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
191+
#else
192+
# define CV_TRY_VSX 0
193+
# define CV_CPU_HAS_SUPPORT_VSX 0
194+
# define CV_CPU_CALL_VSX(fn, args)
195+
#endif
196+
#define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...) CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
197+
183198
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
184199
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */

modules/core/include/opencv2/core/cvdef.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ namespace cv { namespace debug_build_guard { } using namespace debug_build_guard
153153

154154
#define CV_CPU_NEON 100
155155

156+
#define CV_CPU_VSX 200
157+
156158
// when adding to this list remember to update the following enum
157159
#define CV_HARDWARE_MAX_FEATURE 255
158160

@@ -182,7 +184,9 @@ enum CpuFeatures {
182184
CPU_AVX_512VBMI = 20,
183185
CPU_AVX_512VL = 21,
184186

185-
CPU_NEON = 100
187+
CPU_NEON = 100,
188+
189+
CPU_VSX = 200
186190
};
187191

188192

modules/core/include/opencv2/core/hal/intrin.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
308308
#ifdef CV_DOXYGEN
309309
# undef CV_SSE2
310310
# undef CV_NEON
311+
# undef CV_VSX
311312
#endif
312313

313314
#if CV_SSE2
@@ -318,6 +319,10 @@ CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
318319

319320
#include "opencv2/core/hal/intrin_neon.hpp"
320321

322+
#elif CV_VSX
323+
324+
#include "opencv2/core/hal/intrin_vsx.hpp"
325+
321326
#else
322327

323328
#include "opencv2/core/hal/intrin_cpp.hpp"

0 commit comments

Comments
 (0)