Skip to content

Commit 297ba85

Browse files
committed
Merge pull request opencv#8441 from alalek:dispatch_mathfuncs_core
2 parents 36e8017 + 1e6ce1d commit 297ba85

16 files changed

+617
-207
lines changed

cmake/OpenCVCompilerOptimizations.cmake

Lines changed: 92 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,11 @@ set(CPU_BASELINE_FLAGS "")
275275
set(CPU_BASELINE_FINAL "")
276276
set(CPU_DISPATCH_FINAL "")
277277

278+
if(CV_DISABLE_OPTIMIZATION)
279+
set(CPU_DISPATCH "")
280+
set(CPU_DISPATCH_REQUIRE "")
281+
endif()
282+
278283
macro(ocv_check_compiler_optimization OPT)
279284
if(NOT DEFINED CPU_${OPT}_SUPPORTED)
280285
if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE)
@@ -319,7 +324,7 @@ macro(ocv_check_compiler_optimization OPT)
319324
endmacro()
320325

321326
foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS})
322-
set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE)
327+
set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "")
323328
if(NOT DEFINED CPU_${OPT}_FORCE)
324329
set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}")
325330
endif()
@@ -515,15 +520,27 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
515520
endforeach()
516521
foreach(fname ${${SOURCES_VAR_NAME}})
517522
string(TOLOWER "${fname}" fname_LOWER)
518-
if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$")
519-
if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
520-
message(STATUS "Excluding from source files list: ${fname}")
523+
if(fname_LOWER MATCHES "\\.(.*)\\.cpp$")
524+
string(TOUPPER "${CMAKE_MATCH_1}" OPT_)
525+
if(OPT_ MATCHES "(CUDA.*|DISPATCH.*|OCL)") # don't touch files like filename.cuda.cpp
526+
list(APPEND __result "${fname}")
527+
#continue()
528+
elseif(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
529+
message(STATUS "Excluding from source files list (optimization is disabled): ${fname}")
521530
#continue()
522531
else()
532+
get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
533+
if(__definitions)
534+
list(APPEND __definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
535+
else()
536+
set(__definitions "CV_CPU_DISPATCH_MODE=${OPT_}")
537+
endif()
538+
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
539+
523540
set(__opt_found 0)
524541
foreach(OPT ${CPU_BASELINE_FINAL})
525542
string(TOLOWER "${OPT}" OPT_LOWER)
526-
if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$")
543+
if(fname_LOWER MATCHES "\\.${OPT_LOWER}\\.cpp$")
527544
#message("${fname} BASELINE-${OPT}")
528545
set(__opt_found 1)
529546
list(APPEND __result "${fname}")
@@ -533,11 +550,11 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
533550
foreach(OPT ${CPU_DISPATCH_FINAL})
534551
foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED})
535552
string(TOLOWER "${OPT2}" OPT2_LOWER)
536-
if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$")
553+
if(fname_LOWER MATCHES "\\.${OPT2_LOWER}\\.cpp$")
537554
list(APPEND __result_${OPT} "${fname}")
538555
math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1")
539556
set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE)
540-
#message("${fname} ${OPT}")
557+
#message("(${CPU_${OPT}_USAGE_COUNT})${fname} ${OPT}")
541558
#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
542559
#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
543560
#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
@@ -573,7 +590,13 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
573590
list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>")
574591
else()
575592
foreach(fname ${__result_${OPT}})
576-
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
593+
get_source_file_property(__definitions "${fname}" COMPILE_DEFINITIONS)
594+
if(__definitions)
595+
list(APPEND __definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
596+
else()
597+
set(__definitions "${CPU_DISPATCH_DEFINITIONS_${OPT}}")
598+
endif()
599+
set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${__definitions}")
577600
set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}")
578601
endforeach()
579602
list(APPEND __result ${__result_${OPT}})
@@ -620,18 +643,25 @@ macro(ocv_compiler_optimization_fill_cpu_config)
620643
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
621644
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT}
622645
# define CV_CPU_HAS_SUPPORT_${OPT} 1
623-
# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__
646+
# define CV_CPU_CALL_${OPT}(fn, args) return (opt_${OPT}::fn args)
624647
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT}
625648
# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT}))
626-
# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__
649+
# define CV_CPU_CALL_${OPT}(fn, args) if (CV_CPU_HAS_SUPPORT_${OPT}) return (opt_${OPT}::fn args)
627650
#else
628651
# define CV_CPU_HAS_SUPPORT_${OPT} 0
629-
# define CV_CPU_CALL_${OPT}(...)
652+
# define CV_CPU_CALL_${OPT}(fn, args)
630653
#endif
654+
#define __CV_CPU_DISPATCH_CHAIN_${OPT}(fn, args, mode, ...) CV_CPU_CALL_${OPT}(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
631655
")
632656
endif()
633657
endforeach()
634658

659+
set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}
660+
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
661+
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
662+
")
663+
664+
635665
set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h")
636666
if(EXISTS "${__file}")
637667
file(READ "${__file}" __content)
@@ -644,6 +674,57 @@ macro(ocv_compiler_optimization_fill_cpu_config)
644674
endif()
645675
endmacro()
646676

677+
macro(ocv_add_dispatched_file filename)
678+
if(NOT OPENCV_INITIAL_PASS)
679+
set(__codestr "
680+
#include \"precomp.hpp\"
681+
#include \"${filename}.simd.hpp\"
682+
")
683+
684+
set(__declarations_str "#define CV_CPU_SIMD_FILENAME \"${filename}.simd.hpp\"")
685+
set(__dispatch_modes "BASELINE")
686+
687+
set(__optimizations "${ARGN}")
688+
if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS)
689+
set(__optimizations "")
690+
endif()
691+
692+
foreach(OPT ${__optimizations})
693+
string(TOLOWER "${OPT}" OPT_LOWER)
694+
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.${OPT_LOWER}.cpp")
695+
if(EXISTS "${__file}")
696+
file(READ "${__file}" __content)
697+
endif()
698+
if(__content STREQUAL __codestr)
699+
#message(STATUS "${__file} contains up-to-date content")
700+
else()
701+
file(WRITE "${__file}" "${__codestr}")
702+
endif()
703+
list(APPEND OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED "${__file}")
704+
705+
set(__declarations_str "${__declarations_str}
706+
#define CV_CPU_DISPATCH_MODE ${OPT}
707+
#include \"opencv2/core/private/cv_cpu_include_simd_declarations.hpp\"
708+
")
709+
set(__dispatch_modes "${OPT}, ${__dispatch_modes}")
710+
endforeach()
711+
712+
set(__declarations_str "${__declarations_str}
713+
#define CV_CPU_DISPATCH_MODES_ALL ${__dispatch_modes}
714+
")
715+
716+
set(__file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.simd_declarations.hpp")
717+
if(EXISTS "${__file}")
718+
file(READ "${__file}" __content)
719+
endif()
720+
if(__content STREQUAL __declarations_str)
721+
#message(STATUS "${__file} contains up-to-date content")
722+
else()
723+
file(WRITE "${__file}" "${__declarations_str}")
724+
endif()
725+
endif()
726+
endmacro()
727+
647728
if(CV_DISABLE_OPTIMIZATION OR CV_ICC)
648729
ocv_update(CV_ENABLE_UNROLLED 0)
649730
else()

cmake/OpenCVModule.cmake

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ macro(ocv_glob_modules)
314314
set(OPENCV_INITIAL_PASS OFF)
315315
if(${BUILD_opencv_world})
316316
foreach(m ${OPENCV_MODULES_BUILD})
317+
set(the_module "${m}")
317318
if("${m}" STREQUAL opencv_world)
318319
add_subdirectory("${OPENCV_MODULE_opencv_world_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/world")
319320
elseif(NOT OPENCV_MODULE_${m}_IS_PART_OF_WORLD AND NOT ${m} STREQUAL opencv_world)
@@ -329,6 +330,7 @@ macro(ocv_glob_modules)
329330
endforeach()
330331
else()
331332
foreach(m ${OPENCV_MODULES_BUILD})
333+
set(the_module "${m}")
332334
if(m MATCHES "^opencv_")
333335
string(REGEX REPLACE "^opencv_" "" __shortname "${m}")
334336
add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${__shortname}")
@@ -646,11 +648,13 @@ macro(ocv_set_module_sources)
646648
ocv_get_module_external_sources()
647649
endif()
648650

651+
if(OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED)
652+
list(APPEND OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES_DISPATCHED})
653+
endif()
654+
649655
# use full paths for module to be independent from the module location
650656
ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS)
651657

652-
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
653-
654658
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
655659
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
656660
endmacro()
@@ -766,6 +770,11 @@ macro(ocv_create_module)
766770
endmacro()
767771

768772
macro(_ocv_create_module)
773+
774+
ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module})
775+
set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}")
776+
set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}")
777+
769778
# The condition we ought to be testing here is whether ocv_add_precompiled_headers will
770779
# be called at some point in the future. We can't look into the future, though,
771780
# so this will have to do.

cmake/OpenCVPCHSupport.cmake

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,12 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
288288
foreach(src ${_sources})
289289
if(NOT "${src}" MATCHES "\\.mm$")
290290
get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
291-
if(NOT oldProps)
291+
get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS)
292+
if(NOT oldProps AND NOT oldProps2)
292293
set(newProperties "-include \"${CMAKE_CURRENT_BINARY_DIR}/${_name}\"")
293294
set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}")
294295
else()
295-
ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}")
296+
ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}")
296297
endif()
297298
endif()
298299
endforeach()
@@ -339,11 +340,12 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
339340
AND NOT "${src}" MATCHES "^\$" # CMake generator expressions
340341
)
341342
get_source_file_property(oldProps "${src}" COMPILE_FLAGS)
342-
if(NOT oldProps)
343+
get_source_file_property(oldProps2 "${src}" COMPILE_DEFINITIONS)
344+
if(NOT oldProps AND NOT oldProps2)
343345
set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"")
344346
set_source_files_properties("${src}" PROPERTIES COMPILE_FLAGS "${newProperties}")
345347
else()
346-
ocv_debug_message("Skip PCH, flags: ${oldProps} , file: ${src}")
348+
ocv_debug_message("Skip PCH, flags: ${oldProps} defines: ${oldProps2}, file: ${src}")
347349
endif()
348350
endif()
349351
endforeach()

modules/core/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
set(the_description "The Core Functionality")
2+
3+
ocv_add_dispatched_file(mathfuncs_core SSE2 AVX AVX2)
4+
25
ocv_add_module(core
36
"${OPENCV_HAL_LINKER_LIBS}"
47
OPTIONAL opencv_cudev

modules/core/include/opencv2/core/cv_cpu_dispatch.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,23 @@
77
#include "cv_cpu_config.h"
88
#include "cv_cpu_helper.h"
99

10+
#ifdef CV_CPU_DISPATCH_MODE
11+
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
12+
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
13+
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
14+
#else
15+
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
16+
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
17+
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
18+
#endif
19+
20+
21+
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...) /* done */
22+
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
23+
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
24+
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
25+
26+
1027
#if defined CV_ENABLE_INTRINSICS \
1128
&& !defined CV_DISABLE_OPTIMIZATION \
1229
&& !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
@@ -76,6 +93,16 @@
7693

7794
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
7895

96+
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
97+
struct VZeroUpperGuard {
98+
#ifdef __GNUC__
99+
__attribute__((always_inline))
100+
#endif
101+
inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
102+
};
103+
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard;
104+
#endif
105+
79106
#endif // __OPENCV_BUILD
80107

81108

0 commit comments

Comments
 (0)