Skip to content

Commit c48807c

Browse files
borisfomalalek
authored andcommitted
Merge pull request opencv#9418 from borisfom:cuda9
CUDA9 build fixed, added detection (opencv#9418) * CUDA9 build fixed, added detection * Replacing deprecated __shfl_xxx with __shfl_sync, fixing bogus CUDA9 warnings
1 parent d0509f6 commit c48807c

File tree

7 files changed

+51
-11
lines changed

7 files changed

+51
-11
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ OCV_OPTION(WITH_CPUFEATURES "Use cpufeatures Android library" ON
195195
OCV_OPTION(WITH_VTK "Include VTK library support (and build opencv_viz module eiher)" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT AND NOT CMAKE_CROSSCOMPILING) )
196196
OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" ON IF (NOT IOS AND NOT WINRT) )
197197
OCV_OPTION(WITH_CUFFT "Include NVidia Cuda Fast Fourier Transform (FFT) library support" ON IF (NOT IOS AND NOT WINRT) )
198-
OCV_OPTION(WITH_CUBLAS "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" OFF IF (NOT IOS AND NOT WINRT) )
199-
OCV_OPTION(WITH_NVCUVID "Include NVidia Video Decoding library support" OFF IF (NOT IOS AND NOT APPLE) )
198+
OCV_OPTION(WITH_CUBLAS "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" ON IF (NOT IOS AND NOT WINRT) )
199+
OCV_OPTION(WITH_NVCUVID "Include NVidia Video Decoding library support" ON IF (NOT IOS AND NOT APPLE) )
200200
OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" ON IF (NOT WINRT) )
201201
OCV_OPTION(WITH_VFW "Include Video for Windows support" ON IF WIN32 )
202202
OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" ON IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )

cmake/FindCUDA.cmake

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,8 +790,18 @@ endif()
790790
if(CUDA_VERSION VERSION_GREATER "5.0")
791791
# In CUDA 5.5 NPP was splitted onto 3 separate libraries.
792792
find_cuda_helper_libs(nppc)
793-
find_cuda_helper_libs(nppi)
793+
find_cuda_helper_libs(nppial)
794+
find_cuda_helper_libs(nppicc)
795+
find_cuda_helper_libs(nppicom)
796+
find_cuda_helper_libs(nppidei)
797+
find_cuda_helper_libs(nppif)
798+
find_cuda_helper_libs(nppig)
799+
find_cuda_helper_libs(nppim)
800+
find_cuda_helper_libs(nppist)
801+
find_cuda_helper_libs(nppisu)
802+
find_cuda_helper_libs(nppitc)
794803
find_cuda_helper_libs(npps)
804+
set(CUDA_nppi_LIBRARY "${CUDA_nppial_LIBRARY};${CUDA_nppicc_LIBRARY};${CUDA_nppicom_LIBRARY};${CUDA_nppidei_LIBRARY};${CUDA_nppif_LIBRARY};${CUDA_nppig_LIBRARY};${CUDA_nppim_LIBRARY};${CUDA_nppist_LIBRARY};${CUDA_nppisu_LIBRARY};${CUDA_nppitc_LIBRARY}")
795805
set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}")
796806
elseif(NOT CUDA_VERSION VERSION_LESS "4.0")
797807
find_cuda_helper_libs(npp)

cmake/OpenCVDetectCUDA.cmake

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if(CUDA_FOUND)
4343

4444
message(STATUS "CUDA detected: " ${CUDA_VERSION})
4545

46-
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal")
46+
set(_generations "Fermi" "Kepler" "Maxwell" "Pascal" "Volta")
4747
if(NOT CMAKE_CROSSCOMPILING)
4848
list(APPEND _generations "Auto")
4949
endif()
@@ -70,6 +70,8 @@ if(CUDA_FOUND)
7070
set(__cuda_arch_bin "5.0 5.2")
7171
elseif(CUDA_GENERATION STREQUAL "Pascal")
7272
set(__cuda_arch_bin "6.0 6.1")
73+
elseif(CUDA_GENERATION STREQUAL "Volta")
74+
set(__cuda_arch_bin "7.0")
7375
elseif(CUDA_GENERATION STREQUAL "Auto")
7476
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
7577
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
@@ -94,17 +96,17 @@ if(CUDA_FOUND)
9496
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
9597
if(NOT _nvcc_res EQUAL 0)
9698
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
97-
set(__cuda_arch_bin "5.3 6.2")
99+
set(__cuda_arch_bin "5.3 6.2 7.0")
98100
else()
99101
set(__cuda_arch_bin "${_nvcc_out}")
100102
string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}")
101103
endif()
102104
set(__cuda_arch_ptx "")
103105
else()
104-
if(${CUDA_VERSION} VERSION_LESS "8.0")
105-
set(__cuda_arch_bin "2.0 3.0 3.5 3.7 5.0 5.2")
106-
else()
106+
if(${CUDA_VERSION} VERSION_LESS "9.0")
107107
set(__cuda_arch_bin "2.0 3.0 3.5 3.7 5.0 5.2 6.0 6.1")
108+
else()
109+
set(__cuda_arch_bin "3.0 3.5 3.7 5.0 5.2 6.0 6.1 7.0")
108110
endif()
109111
endif()
110112
endif()

modules/core/include/opencv2/core/private.cuda.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@
5858
#ifdef HAVE_CUDA
5959
# include <cuda.h>
6060
# include <cuda_runtime.h>
61+
# if defined (__GNUC__)
62+
# pragma GCC diagnostic push
63+
# pragma GCC diagnostic ignored "-Wstrict-aliasing"
64+
# include <cuda_fp16.h>
65+
# pragma GCC diagnostic pop
66+
# else
67+
# include <cuda_fp16.h>
68+
# endif /* __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) */
6169
# include <npp.h>
6270
# include "opencv2/core/cuda_stream_accessor.hpp"
6371
# include "opencv2/core/cuda/common.hpp"

modules/cudacodec/src/precomp.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
#include "opencv2/core/private.cuda.hpp"
5757

5858
#ifdef HAVE_NVCUVID
59-
#include <nvcuvid.h>
59+
#include <dynlink_nvcuvid.h>
6060

6161
#ifdef _WIN32
6262
#define NOMINMAX

modules/cudev/include/opencv2/cudev/util/saturate_cast.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#define OPENCV_CUDEV_UTIL_SATURATE_CAST_HPP
4848

4949
#include "../common.hpp"
50+
#include "opencv2/core/private.cuda.hpp"
5051

5152
namespace cv { namespace cudev {
5253

@@ -274,12 +275,21 @@ template <typename T, typename D> __device__ __forceinline__ D cast_fp16(T v);
274275

275276
template <> __device__ __forceinline__ float cast_fp16<short, float>(short v)
276277
{
278+
#if __CUDACC_VER_MAJOR__ >= 9
279+
return float(*(__half*)&v);
280+
#else
277281
return __half2float(v);
282+
#endif
278283
}
279284

280285
template <> __device__ __forceinline__ short cast_fp16<float, short>(float v)
281286
{
282-
return (short)__float2half_rn(v);
287+
#if __CUDACC_VER_MAJOR__ >= 9
288+
__half h(v);
289+
return *(short*)&v;
290+
#else
291+
return (short)__float2half_rn(v);
292+
#endif
283293
}
284294
//! @}
285295

modules/cudev/include/opencv2/cudev/warp/shuffle.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,14 @@ namespace cv { namespace cudev {
5656

5757
#if CV_CUDEV_ARCH >= 300
5858

59-
// shfl
59+
#if __CUDACC_VER_MAJOR__ >= 9
60+
# define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
61+
# define __shfl_xor(x, y, z) __shfl_xor_sync(0xFFFFFFFFU, x, y, z)
62+
# define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
63+
# define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
64+
#endif
6065

66+
// shfl
6167
__device__ __forceinline__ uchar shfl(uchar val, int srcLane, int width = warpSize)
6268
{
6369
return (uchar) __shfl((int) val, srcLane, width);
@@ -419,6 +425,10 @@ CV_CUDEV_SHFL_XOR_VEC_INST(float)
419425
CV_CUDEV_SHFL_XOR_VEC_INST(double)
420426

421427
#undef CV_CUDEV_SHFL_XOR_VEC_INST
428+
#undef __shfl
429+
#undef __shfl_xor
430+
#undef __shfl_up
431+
#undef __shfl_down
422432

423433
#endif // CV_CUDEV_ARCH >= 300
424434

0 commit comments

Comments
 (0)