Skip to content

Commit 739f87d

Browse files
committed
Merge pull request opencv#8518 from alalek:fix_fp16
2 parents bd5a6f8 + e5d9b60 commit 739f87d

File tree

8 files changed

+17
-53
lines changed

8 files changed

+17
-53
lines changed

cmake/OpenCVCompilerOptions.cmake

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -273,35 +273,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
273273
add_extra_compiler_option(-fvisibility-inlines-hidden)
274274
endif()
275275

276-
# TODO !!!!!
277-
if(NOT OPENCV_FP16_DISABLE AND NOT IOS)
278-
if(ARM AND ENABLE_NEON)
279-
set(FP16_OPTION "-mfpu=neon-fp16")
280-
elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
281-
set(FP16_OPTION "-mf16c")
282-
endif()
283-
try_compile(__VALID_FP16
284-
"${OpenCV_BINARY_DIR}"
285-
"${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp"
286-
COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
287-
OUTPUT_VARIABLE TRY_OUT
288-
)
289-
if(NOT __VALID_FP16)
290-
if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
291-
# GCC enables AVX when mf16c is passed
292-
message(STATUS "FP16: Feature disabled")
293-
else()
294-
message(STATUS "FP16: Compiler support is not available")
295-
endif()
296-
else()
297-
message(STATUS "FP16: Compiler support is available")
298-
set(HAVE_FP16 1)
299-
if(NOT ${FP16_OPTION} STREQUAL "")
300-
add_extra_compiler_option(${FP16_OPTION})
301-
endif()
302-
endif()
303-
endif()
304-
305276
#combine all "extra" options
306277
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
307278
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")

cmake/templates/cvconfig.h.in

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,6 @@
207207
/* Lapack */
208208
#cmakedefine HAVE_LAPACK
209209

210-
/* FP16 */
211-
#cmakedefine HAVE_FP16
212-
213210
/* Library was compiled with functions instrumentation */
214211
#cmakedefine ENABLE_INSTRUMENTATION
215212

modules/core/include/opencv2/core/cv_cpu_dispatch.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@
7070
# include <immintrin.h>
7171
# define CV_AVX 1
7272
#endif
73+
#ifdef CV_CPU_COMPILE_FP16
74+
# include <immintrin.h>
75+
# define CV_FP16 1
76+
#endif
7377
#ifdef CV_CPU_COMPILE_AVX2
7478
# include <immintrin.h>
7579
# define CV_AVX2 1
@@ -154,6 +158,9 @@ struct VZeroUpperGuard {
154158
#ifndef CV_AVX
155159
# define CV_AVX 0
156160
#endif
161+
#ifndef CV_FP16
162+
# define CV_FP16 0
163+
#endif
157164
#ifndef CV_AVX2
158165
# define CV_AVX2 0
159166
#endif

modules/core/include/opencv2/core/hal/intrin_cpp.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp,
721721
{
722722
typedef typename V_TypeTraits<_Tp>::abs_type rtype;
723723
v_reg<rtype, n> c;
724-
const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0;
724+
const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0);
725725
for( int i = 0; i < n; i++ )
726726
{
727727
rtype ua = a.s[i] ^ mask;

modules/core/include/opencv2/core/hal/intrin_neon.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ struct v_float64x2
278278
};
279279
#endif
280280

281-
#if defined (HAVE_FP16)
281+
#if CV_FP16
282282
// Workaround for old comiplers
283283
template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
284284
{ return (int16x4_t)a; }
@@ -775,7 +775,7 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
775775
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
776776
#endif
777777

778-
#if defined (HAVE_FP16)
778+
#if CV_FP16
779779
// Workaround for old comiplers
780780
inline v_float16x4 v_load_f16(const short* ptr)
781781
{ return v_float16x4(vld1_f16(ptr)); }
@@ -1223,7 +1223,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
12231223
}
12241224
#endif
12251225

1226-
#if defined (HAVE_FP16)
1226+
#if CV_FP16
12271227
inline v_float32x4 v_cvt_f32(const v_float16x4& a)
12281228
{
12291229
return v_float32x4(vcvt_f32_f16(a.val));

modules/core/include/opencv2/core/hal/intrin_sse.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ struct v_float64x2
255255
__m128d val;
256256
};
257257

258-
#if defined(HAVE_FP16)
258+
#if CV_FP16
259259
struct v_float16x4
260260
{
261261
typedef short lane_type;
@@ -1056,7 +1056,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
10561056
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps)
10571057
OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd)
10581058

1059-
#if defined(HAVE_FP16)
1059+
#if CV_FP16
10601060
inline v_float16x4 v_load_f16(const short* ptr)
10611061
{ return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); }
10621062
inline void v_store_f16(short* ptr, v_float16x4& a)
@@ -1776,7 +1776,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
17761776
return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8))));
17771777
}
17781778

1779-
#if defined(HAVE_FP16)
1779+
#if CV_FP16
17801780
inline v_float32x4 v_cvt_f32(const v_float16x4& a)
17811781
{
17821782
return v_float32x4(_mm_cvtph_ps(a.val));

modules/core/include/opencv2/core/private.hpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,6 @@
6666
# undef max
6767
#endif
6868

69-
#if defined HAVE_FP16 && (defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700))
70-
# include <immintrin.h>
71-
# define CV_FP16 1
72-
#elif defined HAVE_FP16 && defined __GNUC__
73-
# define CV_FP16 1
74-
#endif
75-
76-
#ifndef CV_FP16
77-
# define CV_FP16 0
78-
#endif
79-
8069
//! @cond IGNORED
8170

8271
namespace cv

modules/core/test/test_intrin.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ template<typename R> struct TheTest
743743

744744
TheTest & test_loadstore_fp16()
745745
{
746-
#if CV_FP16
746+
#if CV_FP16 && CV_SIMD128
747747
AlignedData<R> data;
748748
AlignedData<R> out;
749749

@@ -775,7 +775,7 @@ template<typename R> struct TheTest
775775

776776
TheTest & test_float_cvt_fp16()
777777
{
778-
#if CV_FP16
778+
#if CV_FP16 && CV_SIMD128
779779
AlignedData<v_float32x4> data;
780780

781781
if(checkHardwareSupport(CV_CPU_FP16))
@@ -1008,7 +1008,7 @@ TEST(hal_intrin, float64x2) {
10081008
}
10091009
#endif
10101010

1011-
#if CV_FP16
1011+
#if CV_FP16 && CV_SIMD128
10121012
TEST(hal_intrin, float16x4) {
10131013
TheTest<v_float16x4>()
10141014
.test_loadstore_fp16()

0 commit comments

Comments
 (0)