9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
12#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
18#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
20#if !defined(__HIPCC_RTC__)
23#ifdef __OPENMP_AMDGCN__
28#pragma push_macro("__DEVICE__")
30#ifdef __OPENMP_AMDGCN__
31#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
33#define __DEVICE__ static __device__ inline __attribute__((always_inline))
36#pragma push_macro("__PRIVATE_AS")
38#define __PRIVATE_AS __attribute__((opencl_private))
43#pragma push_macro("__FAST_OR_SLOW")
44#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
45#define __FAST_OR_SLOW(fast, slow) fast
47#define __FAST_OR_SLOW(fast, slow) slow
51#pragma push_macro("__RETURN_TYPE")
52#ifdef __OPENMP_AMDGCN__
53#define __RETURN_TYPE int
55#if defined(__cplusplus)
56#define __RETURN_TYPE bool
58#define __RETURN_TYPE int
62#if defined (__cplusplus) && __cplusplus < 201103L
65struct __compare_result{};
67struct __compare_result<
true> {
72void __suppress_unused_warning(
bool b){};
73template <
unsigned int S,
unsigned int T>
75 __suppress_unused_warning(__compare_result<S == T>::valid);
78#define __static_assert_type_size_equal(A, B) \
79 __static_assert_equal_size<A,B>()
82#define __static_assert_type_size_equal(A,B) \
83 static_assert((A) == (B), "")
90 while (*__tagp !=
'\0') {
93 if (__tmp >=
'0' && __tmp <=
'7')
94 __r = (__r * 8u) + __tmp -
'0';
107 while (*__tagp !=
'\0') {
108 char __tmp = *__tagp;
110 if (__tmp >=
'0' && __tmp <=
'9')
111 __r = (__r * 10u) + __tmp -
'0';
124 while (*__tagp !=
'\0') {
125 char __tmp = *__tagp;
127 if (__tmp >=
'0' && __tmp <=
'9')
128 __r = (__r * 16u) + __tmp -
'0';
129 else if (__tmp >=
'a' && __tmp <=
'f')
130 __r = (__r * 16u) + __tmp -
'a' + 10;
131 else if (__tmp >=
'A' && __tmp <=
'F')
132 __r = (__r * 16u) + __tmp -
'A' + 10;
144 if (*__tagp ==
'0') {
147 if (*__tagp ==
'x' || *__tagp ==
'X')
165 const float __log2_10 = 0x1.a934f0p+1f;
166 return __builtin_amdgcn_exp2f(__log2_10 * __x);
171 const float __log2_e = 0x1.715476p+0;
172 return __builtin_amdgcn_exp2f(__log2_e * __x);
175#if defined OCML_BASIC_ROUNDED_OPERATIONS
177float __fadd_rd(
float __x,
float __y) {
return __ocml_add_rtn_f32(__x,
__y); }
179float __fadd_rn(
float __x,
float __y) {
return __ocml_add_rte_f32(__x,
__y); }
181float __fadd_ru(
float __x,
float __y) {
return __ocml_add_rtp_f32(__x,
__y); }
183float __fadd_rz(
float __x,
float __y) {
return __ocml_add_rtz_f32(__x,
__y); }
189#if defined OCML_BASIC_ROUNDED_OPERATIONS
191float __fdiv_rd(
float __x,
float __y) {
return __ocml_div_rtn_f32(__x,
__y); }
193float __fdiv_rn(
float __x,
float __y) {
return __ocml_div_rte_f32(__x,
__y); }
195float __fdiv_ru(
float __x,
float __y) {
return __ocml_div_rtp_f32(__x,
__y); }
197float __fdiv_rz(
float __x,
float __y) {
return __ocml_div_rtz_f32(__x,
__y); }
206#if defined OCML_BASIC_ROUNDED_OPERATIONS
209 return __ocml_fma_rtn_f32(__x,
__y, __z);
213 return __ocml_fma_rte_f32(__x,
__y, __z);
217 return __ocml_fma_rtp_f32(__x,
__y, __z);
221 return __ocml_fma_rtz_f32(__x,
__y, __z);
226 return __builtin_fmaf(__x,
__y, __z);
230#if defined OCML_BASIC_ROUNDED_OPERATIONS
232float __fmul_rd(
float __x,
float __y) {
return __ocml_mul_rtn_f32(__x,
__y); }
234float __fmul_rn(
float __x,
float __y) {
return __ocml_mul_rte_f32(__x,
__y); }
236float __fmul_ru(
float __x,
float __y) {
return __ocml_mul_rtp_f32(__x,
__y); }
238float __fmul_rz(
float __x,
float __y) {
return __ocml_mul_rtz_f32(__x,
__y); }
244#if defined OCML_BASIC_ROUNDED_OPERATIONS
246float __frcp_rd(
float __x) {
return __ocml_div_rtn_f32(1.0f, __x); }
248float __frcp_rn(
float __x) {
return __ocml_div_rte_f32(1.0f, __x); }
250float __frcp_ru(
float __x) {
return __ocml_div_rtp_f32(1.0f, __x); }
252float __frcp_rz(
float __x) {
return __ocml_div_rtz_f32(1.0f, __x); }
259float __frsqrt_rn(
float __x) {
return __builtin_amdgcn_rsqf(__x); }
261#if defined OCML_BASIC_ROUNDED_OPERATIONS
263float __fsqrt_rd(
float __x) {
return __ocml_sqrt_rtn_f32(__x); }
265float __fsqrt_rn(
float __x) {
return __ocml_sqrt_rte_f32(__x); }
267float __fsqrt_ru(
float __x) {
return __ocml_sqrt_rtp_f32(__x); }
269float __fsqrt_rz(
float __x) {
return __ocml_sqrt_rtz_f32(__x); }
272float __fsqrt_rn(
float __x) {
return __ocml_native_sqrt_f32(__x); }
275#if defined OCML_BASIC_ROUNDED_OPERATIONS
277float __fsub_rd(
float __x,
float __y) {
return __ocml_sub_rtn_f32(__x,
__y); }
279float __fsub_rn(
float __x,
float __y) {
return __ocml_sub_rte_f32(__x,
__y); }
281float __fsub_ru(
float __x,
float __y) {
return __ocml_sub_rtp_f32(__x,
__y); }
283float __fsub_rz(
float __x,
float __y) {
return __ocml_sub_rtz_f32(__x,
__y); }
290float __log10f(
float __x) {
return __builtin_log10f(__x); }
293float __log2f(
float __x) {
return __builtin_amdgcn_logf(__x); }
296float __logf(
float __x) {
return __builtin_logf(__x); }
302float __saturatef(
float __x) {
return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
305void __sincosf(
float __x,
float *__sinptr,
float *__cosptr) {
315 return __sinf(__x) * __builtin_amdgcn_rcpf(
__cosf(__x));
319#if defined(__cplusplus)
322 return __builtin_abs(__x);
326 return __builtin_labs(__x);
329long long llabs(
long long __x) {
330 return __builtin_llabs(__x);
335float acosf(
float __x) {
return __ocml_acos_f32(__x); }
338float acoshf(
float __x) {
return __ocml_acosh_f32(__x); }
341float asinf(
float __x) {
return __ocml_asin_f32(__x); }
344float asinhf(
float __x) {
return __ocml_asinh_f32(__x); }
347float atan2f(
float __x,
float __y) {
return __ocml_atan2_f32(__x,
__y); }
350float atanf(
float __x) {
return __ocml_atan_f32(__x); }
353float atanhf(
float __x) {
return __ocml_atanh_f32(__x); }
356float cbrtf(
float __x) {
return __ocml_cbrt_f32(__x); }
359float ceilf(
float __x) {
return __builtin_ceilf(__x); }
368float coshf(
float __x) {
return __ocml_cosh_f32(__x); }
380float erfcf(
float __x) {
return __ocml_erfc_f32(__x); }
383float erfcinvf(
float __x) {
return __ocml_erfcinv_f32(__x); }
386float erfcxf(
float __x) {
return __ocml_erfcx_f32(__x); }
389float erff(
float __x) {
return __ocml_erf_f32(__x); }
392float erfinvf(
float __x) {
return __ocml_erfinv_f32(__x); }
395float exp10f(
float __x) {
return __builtin_exp10f(__x); }
398float exp2f(
float __x) {
return __builtin_exp2f(__x); }
401float expf(
float __x) {
return __builtin_expf(__x); }
404float expm1f(
float __x) {
return __ocml_expm1_f32(__x); }
407float fabsf(
float __x) {
return __builtin_fabsf(__x); }
410float fdimf(
float __x,
float __y) {
return __ocml_fdim_f32(__x,
__y); }
416float floorf(
float __x) {
return __builtin_floorf(__x); }
420 return __builtin_fmaf(__x,
__y, __z);
424float fmaxf(
float __x,
float __y) {
return __builtin_fmaxf(__x,
__y); }
427float fminf(
float __x,
float __y) {
return __builtin_fminf(__x,
__y); }
430float fmodf(
float __x,
float __y) {
return __ocml_fmod_f32(__x,
__y); }
434 return __builtin_frexpf(__x, __nptr);
438float hypotf(
float __x,
float __y) {
return __ocml_hypot_f32(__x,
__y); }
441int ilogbf(
float __x) {
return __ocml_ilogb_f32(__x); }
459float jnf(
int __n,
float __x) {
468 float __x0 =
j0f(__x);
469 float __x1 =
j1f(__x);
470 for (
int __i = 1; __i < __n; ++__i) {
471 float __x2 = (2 * __i) / __x * __x1 - __x0;
480float ldexpf(
float __x,
int __e) {
return __builtin_amdgcn_ldexpf(__x, __e); }
486long long int llrintf(
float __x) {
return __builtin_rintf(__x); }
489long long int llroundf(
float __x) {
return __builtin_roundf(__x); }
492float log10f(
float __x) {
return __builtin_log10f(__x); }
495float log1pf(
float __x) {
return __ocml_log1p_f32(__x); }
501float logbf(
float __x) {
return __ocml_logb_f32(__x); }
507long int lrintf(
float __x) {
return __builtin_rintf(__x); }
510long int lroundf(
float __x) {
return __builtin_roundf(__x); }
513float modff(
float __x,
float *__iptr) {
515#ifdef __OPENMP_AMDGCN__
516#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
528 unsigned int mantissa : 22;
529 unsigned int quiet : 1;
530 unsigned int exponent : 8;
531 unsigned int sign : 1;
536 __tmp.bits.sign = 0u;
537 __tmp.bits.exponent = ~0u;
538 __tmp.bits.quiet = 1u;
545float nearbyintf(
float __x) {
return __builtin_nearbyintf(__x); }
549 return __ocml_nextafter_f32(__x,
__y);
554 return __ocml_len3_f32(__x,
__y, __z);
559 return __ocml_len4_f32(__x,
__y, __z, __w);
563float normcdff(
float __x) {
return __ocml_ncdf_f32(__x); }
577 return __builtin_sqrtf(__r);
581float powf(
float __x,
float __y) {
return __ocml_pow_f32(__x,
__y); }
584float powif(
float __x,
int __y) {
return __ocml_pown_f32(__x,
__y); }
587float rcbrtf(
float __x) {
return __ocml_rcbrt_f32(__x); }
591 return __ocml_remainder_f32(__x,
__y);
597#ifdef __OPENMP_AMDGCN__
598#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
610float rintf(
float __x) {
return __builtin_rintf(__x); }
614 return __ocml_rlen3_f32(__x,
__y, __z);
619 return __ocml_rlen4_f32(__x,
__y, __z, __w);
631 return __ocml_rsqrt_f32(__r);
635float roundf(
float __x) {
return __builtin_roundf(__x); }
638float rsqrtf(
float __x) {
return __ocml_rsqrt_f32(__x); }
646 return __builtin_ldexpf(__x, (
int)__n);
650float scalbnf(
float __x,
int __n) {
return __builtin_amdgcn_ldexpf(__x, __n); }
656void sincosf(
float __x,
float *__sinptr,
float *__cosptr) {
658#ifdef __OPENMP_AMDGCN__
659#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
661#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
670void sincospif(
float __x,
float *__sinptr,
float *__cosptr) {
672#ifdef __OPENMP_AMDGCN__
673#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
683float sinhf(
float __x) {
return __ocml_sinh_f32(__x); }
689float sqrtf(
float __x) {
return __builtin_sqrtf(__x); }
695float tanhf(
float __x) {
return __ocml_tanh_f32(__x); }
701float truncf(
float __x) {
return __builtin_truncf(__x); }
710float ynf(
int __n,
float __x) {
720 float __x0 =
y0f(__x);
721 float __x1 =
y1f(__x);
722 for (
int __i = 1; __i < __n; ++__i) {
723 float __x2 = (2 * __i) / __x * __x1 - __x0;
736double acos(
double __x) {
return __ocml_acos_f64(__x); }
739double acosh(
double __x) {
return __ocml_acosh_f64(__x); }
742double asin(
double __x) {
return __ocml_asin_f64(__x); }
745double asinh(
double __x) {
return __ocml_asinh_f64(__x); }
748double atan(
double __x) {
return __ocml_atan_f64(__x); }
751double atan2(
double __x,
double __y) {
return __ocml_atan2_f64(__x,
__y); }
754double atanh(
double __x) {
return __ocml_atanh_f64(__x); }
757double cbrt(
double __x) {
return __ocml_cbrt_f64(__x); }
760double ceil(
double __x) {
return __builtin_ceil(__x); }
764 return __builtin_copysign(__x,
__y);
771double cosh(
double __x) {
return __ocml_cosh_f64(__x); }
783double erf(
double __x) {
return __ocml_erf_f64(__x); }
786double erfc(
double __x) {
return __ocml_erfc_f64(__x); }
789double erfcinv(
double __x) {
return __ocml_erfcinv_f64(__x); }
792double erfcx(
double __x) {
return __ocml_erfcx_f64(__x); }
795double erfinv(
double __x) {
return __ocml_erfinv_f64(__x); }
798double exp(
double __x) {
return __ocml_exp_f64(__x); }
801double exp10(
double __x) {
return __ocml_exp10_f64(__x); }
804double exp2(
double __x) {
return __ocml_exp2_f64(__x); }
807double expm1(
double __x) {
return __ocml_expm1_f64(__x); }
810double fabs(
double __x) {
return __builtin_fabs(__x); }
813double fdim(
double __x,
double __y) {
return __ocml_fdim_f64(__x,
__y); }
816double floor(
double __x) {
return __builtin_floor(__x); }
819double fma(
double __x,
double __y,
double __z) {
820 return __builtin_fma(__x,
__y, __z);
824double fmax(
double __x,
double __y) {
return __builtin_fmax(__x,
__y); }
827double fmin(
double __x,
double __y) {
return __builtin_fmin(__x,
__y); }
830double fmod(
double __x,
double __y) {
return __ocml_fmod_f64(__x,
__y); }
833double frexp(
double __x,
int *__nptr) {
834 return __builtin_frexp(__x, __nptr);
838double hypot(
double __x,
double __y) {
return __ocml_hypot_f64(__x,
__y); }
841int ilogb(
double __x) {
return __ocml_ilogb_f64(__x); }
859double jn(
int __n,
double __x) {
869 double __x0 =
j0(__x);
870 double __x1 =
j1(__x);
871 for (
int __i = 1; __i < __n; ++__i) {
872 double __x2 = (2 * __i) / __x * __x1 - __x0;
880double ldexp(
double __x,
int __e) {
return __builtin_amdgcn_ldexp(__x, __e); }
886long long int llrint(
double __x) {
return __builtin_rint(__x); }
889long long int llround(
double __x) {
return __builtin_round(__x); }
892double log(
double __x) {
return __ocml_log_f64(__x); }
895double log10(
double __x) {
return __ocml_log10_f64(__x); }
898double log1p(
double __x) {
return __ocml_log1p_f64(__x); }
901double log2(
double __x) {
return __ocml_log2_f64(__x); }
904double logb(
double __x) {
return __ocml_logb_f64(__x); }
907long int lrint(
double __x) {
return __builtin_rint(__x); }
910long int lround(
double __x) {
return __builtin_round(__x); }
913double modf(
double __x,
double *__iptr) {
915#ifdef __OPENMP_AMDGCN__
916#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
925double nan(
const char *__tagp) {
930 uint64_t mantissa : 51;
932 uint32_t exponent : 11;
938 __tmp.bits.sign = 0u;
939 __tmp.bits.exponent = ~0u;
940 __tmp.bits.quiet = 1u;
947 __val |= 0xFFF << 51;
948 return *
reinterpret_cast<double *
>(&__val);
953double nearbyint(
double __x) {
return __builtin_nearbyint(__x); }
957 return __ocml_nextafter_f64(__x,
__y);
969 return __builtin_sqrt(__r);
974 return __ocml_len3_f64(__x,
__y, __z);
978double norm4d(
double __x,
double __y,
double __z,
double __w) {
979 return __ocml_len4_f64(__x,
__y, __z, __w);
983double normcdf(
double __x) {
return __ocml_ncdf_f64(__x); }
986double normcdfinv(
double __x) {
return __ocml_ncdfinv_f64(__x); }
989double pow(
double __x,
double __y) {
return __ocml_pow_f64(__x,
__y); }
992double powi(
double __x,
int __y) {
return __ocml_pown_f64(__x,
__y); }
995double rcbrt(
double __x) {
return __ocml_rcbrt_f64(__x); }
999 return __ocml_remainder_f64(__x,
__y);
1005#ifdef __OPENMP_AMDGCN__
1006#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1015double rhypot(
double __x,
double __y) {
return __ocml_rhypot_f64(__x,
__y); }
1018double rint(
double __x) {
return __builtin_rint(__x); }
1022 const double *
__a) {
1029 return __ocml_rsqrt_f64(__r);
1034 return __ocml_rlen3_f64(__x,
__y, __z);
1039 return __ocml_rlen4_f64(__x,
__y, __z, __w);
1043double round(
double __x) {
return __builtin_round(__x); }
1046double rsqrt(
double __x) {
return __ocml_rsqrt_f64(__x); }
1054 return __builtin_ldexp(__x, (
int)__n);
1057double scalbn(
double __x,
int __n) {
return __builtin_amdgcn_ldexp(__x, __n); }
1066void sincos(
double __x,
double *__sinptr,
double *__cosptr) {
1068#ifdef __OPENMP_AMDGCN__
1069#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1076void sincospi(
double __x,
double *__sinptr,
double *__cosptr) {
1078#ifdef __OPENMP_AMDGCN__
1079#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1086double sinh(
double __x) {
return __ocml_sinh_f64(__x); }
1092double sqrt(
double __x) {
return __builtin_sqrt(__x); }
1098double tanh(
double __x) {
return __ocml_tanh_f64(__x); }
1104double trunc(
double __x) {
return __builtin_trunc(__x); }
1113double yn(
int __n,
double __x) {
1123 double __x0 =
y0(__x);
1124 double __x1 =
y1(__x);
1125 for (
int __i = 1; __i < __n; ++__i) {
1126 double __x2 = (2 * __i) / __x * __x1 - __x0;
1135#if defined OCML_BASIC_ROUNDED_OPERATIONS
1138 return __ocml_add_rtn_f64(__x,
__y);
1142 return __ocml_add_rte_f64(__x,
__y);
1146 return __ocml_add_rtp_f64(__x,
__y);
1150 return __ocml_add_rtz_f64(__x,
__y);
1157#if defined OCML_BASIC_ROUNDED_OPERATIONS
1160 return __ocml_div_rtn_f64(__x,
__y);
1164 return __ocml_div_rte_f64(__x,
__y);
1168 return __ocml_div_rtp_f64(__x,
__y);
1172 return __ocml_div_rtz_f64(__x,
__y);
1179#if defined OCML_BASIC_ROUNDED_OPERATIONS
1182 return __ocml_mul_rtn_f64(__x,
__y);
1186 return __ocml_mul_rte_f64(__x,
__y);
1190 return __ocml_mul_rtp_f64(__x,
__y);
1194 return __ocml_mul_rtz_f64(__x,
__y);
1201#if defined OCML_BASIC_ROUNDED_OPERATIONS
1203double __drcp_rd(
double __x) {
return __ocml_div_rtn_f64(1.0, __x); }
1205double __drcp_rn(
double __x) {
return __ocml_div_rte_f64(1.0, __x); }
1207double __drcp_ru(
double __x) {
return __ocml_div_rtp_f64(1.0, __x); }
1209double __drcp_rz(
double __x) {
return __ocml_div_rtz_f64(1.0, __x); }
1215#if defined OCML_BASIC_ROUNDED_OPERATIONS
1217double __dsqrt_rd(
double __x) {
return __ocml_sqrt_rtn_f64(__x); }
1219double __dsqrt_rn(
double __x) {
return __ocml_sqrt_rte_f64(__x); }
1221double __dsqrt_ru(
double __x) {
return __ocml_sqrt_rtp_f64(__x); }
1223double __dsqrt_rz(
double __x) {
return __ocml_sqrt_rtz_f64(__x); }
1229#if defined OCML_BASIC_ROUNDED_OPERATIONS
1232 return __ocml_sub_rtn_f64(__x,
__y);
1236 return __ocml_sub_rte_f64(__x,
__y);
1240 return __ocml_sub_rtp_f64(__x,
__y);
1244 return __ocml_sub_rtz_f64(__x,
__y);
1251#if defined OCML_BASIC_ROUNDED_OPERATIONS
1253double __fma_rd(
double __x,
double __y,
double __z) {
1254 return __ocml_fma_rtn_f64(__x,
__y, __z);
1257double __fma_rn(
double __x,
double __y,
double __z) {
1258 return __ocml_fma_rte_f64(__x,
__y, __z);
1261double __fma_ru(
double __x,
double __y,
double __z) {
1262 return __ocml_fma_rtp_f64(__x,
__y, __z);
1265double __fma_rz(
double __x,
double __y,
double __z) {
1266 return __ocml_fma_rtz_f64(__x,
__y, __z);
1271 return __builtin_fma(__x,
__y, __z);
1278#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1279#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1280#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1281#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1282#define signbit(__x) \
1283 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1286#if defined(__cplusplus)
1288 return (__arg1 < __arg2) ? __arg1 : __arg2;
1292 return (__arg1 > __arg2) ? __arg1 : __arg2;
1296 return (__arg1 < __arg2) ? __arg1 : __arg2;
1299 return (__arg1 > __arg2) ? __arg1 : __arg2;
1303float max(
float __x,
float __y) {
return __builtin_fmaxf(__x,
__y); }
1306double max(
double __x,
double __y) {
return __builtin_fmax(__x,
__y); }
1309float min(
float __x,
float __y) {
return __builtin_fminf(__x,
__y); }
1312double min(
double __x,
double __y) {
return __builtin_fmin(__x,
__y); }
1315#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) && \
1316 !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
1319#ifndef __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1320#define __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__ 0
1323#ifndef __HIP_DEFINE_MIXED_HOST_MIN_MAX__
1324#define __HIP_DEFINE_MIXED_HOST_MIN_MAX__ 0
1327#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
1328#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
1329#define DEFINE_MIN_MAX_FUNCTIONS(ret_type, type1, type2) \
1330 inline ret_type min(const type1 __a, const type2 __b) { \
1331 return (__a < __b) ? __a : __b; \
1333 inline ret_type max(const type1 __a, const type2 __b) { \
1334 return (__a > __b) ? __a : __b; \
1338DEFINE_MIN_MAX_FUNCTIONS(
int,
int,
int)
1340#if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1341DEFINE_MIN_MAX_FUNCTIONS(
unsigned int,
unsigned int,
unsigned int)
1342DEFINE_MIN_MAX_FUNCTIONS(
long,
long,
long)
1343DEFINE_MIN_MAX_FUNCTIONS(
unsigned long,
unsigned long,
unsigned long)
1344DEFINE_MIN_MAX_FUNCTIONS(
long long,
long long,
long long)
1345DEFINE_MIN_MAX_FUNCTIONS(
unsigned long long,
unsigned long long,
1355#if __HIP_DEFINE_MIXED_HOST_MIN_MAX__
1356DEFINE_MIN_MAX_FUNCTIONS(
unsigned int,
int,
unsigned int)
1357DEFINE_MIN_MAX_FUNCTIONS(
unsigned int,
unsigned int,
int)
1358DEFINE_MIN_MAX_FUNCTIONS(
unsigned long,
long,
unsigned long)
1359DEFINE_MIN_MAX_FUNCTIONS(
unsigned long,
unsigned long,
long)
1360DEFINE_MIN_MAX_FUNCTIONS(
unsigned long long,
long long,
unsigned long long)
1361DEFINE_MIN_MAX_FUNCTIONS(
unsigned long long,
unsigned long long,
long long)
1365#if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1366inline float min(
float const __a,
float const __b) {
1367 return __builtin_fminf(
__a,
__b);
1369inline double min(
double const __a,
double const __b) {
1370 return __builtin_fmin(
__a,
__b);
1372inline double min(
float const __a,
double const __b) {
1373 return __builtin_fmin(
__a,
__b);
1375inline double min(
double const __a,
float const __b) {
1376 return __builtin_fmin(
__a,
__b);
1379inline float max(
float const __a,
float const __b) {
1380 return __builtin_fmaxf(
__a,
__b);
1382inline double max(
double const __a,
double const __b) {
1383 return __builtin_fmax(
__a,
__b);
1385inline double max(
float const __a,
double const __b) {
1386 return __builtin_fmax(
__a,
__b);
1388inline double max(
double const __a,
float const __b) {
1389 return __builtin_fmax(
__a,
__b);
1393#pragma pop_macro("DEFINE_MIN_MAX_FUNCTIONS")
1399#pragma pop_macro("__DEVICE__")
1400#pragma pop_macro("__PRIVATE_AS")
1401#pragma pop_macro("__RETURN_TYPE")
1402#pragma pop_macro("__FAST_OR_SLOW")
__DEVICE__ long long abs(long long __n)
__DEVICE__ float __fsqrt_rd(float __a)
__DEVICE__ float __fdiv_rd(float __a, float __b)
__DEVICE__ double __dsub_ru(double __a, double __b)
__DEVICE__ double __drcp_ru(double __a)
__DEVICE__ float __frcp_rz(float __a)
__DEVICE__ float __fmul_ru(float __a, float __b)
__DEVICE__ double __dsub_rd(double __a, double __b)
__DEVICE__ float __frcp_ru(float __a)
__DEVICE__ float __frcp_rd(float __a)
__DEVICE__ double __dmul_ru(double __a, double __b)
__DEVICE__ float __fmaf_ru(float __a, float __b, float __c)
__DEVICE__ double __fma_rz(double __a, double __b, double __c)
__DEVICE__ double __fma_rd(double __a, double __b, double __c)
__DEVICE__ double __dmul_rd(double __a, double __b)
__DEVICE__ double __ddiv_ru(double __a, double __b)
__DEVICE__ double __ddiv_rd(double __a, double __b)
__DEVICE__ double __dadd_ru(double __a, double __b)
__DEVICE__ float __fmul_rd(float __a, float __b)
__DEVICE__ float __fsub_rd(float __a, float __b)
__DEVICE__ float __fsub_rz(float __a, float __b)
__DEVICE__ double __fma_ru(double __a, double __b, double __c)
__DEVICE__ double __dsqrt_ru(double __a)
__DEVICE__ float __fsqrt_rz(float __a)
__DEVICE__ double __dsub_rz(double __a, double __b)
__DEVICE__ float __fadd_rd(float __a, float __b)
__DEVICE__ float __fmul_rz(float __a, float __b)
__DEVICE__ float __fadd_rz(float __a, float __b)
__DEVICE__ double __dsqrt_rd(double __a)
__DEVICE__ float __fmaf_rd(float __a, float __b, float __c)
__DEVICE__ double __dadd_rd(double __a, double __b)
__DEVICE__ double __dsqrt_rz(double __a)
__DEVICE__ double __drcp_rd(double __a)
__DEVICE__ float __fdiv_rz(float __a, float __b)
__DEVICE__ float __fmaf_rz(float __a, float __b, float __c)
__DEVICE__ double __drcp_rz(double __a)
__DEVICE__ float __fsub_ru(float __a, float __b)
__DEVICE__ double __dmul_rz(double __a, double __b)
__DEVICE__ float __fsqrt_ru(float __a)
__DEVICE__ float __fadd_ru(float __a, float __b)
__DEVICE__ float __fdiv_ru(float __a, float __b)
__DEVICE__ double __ddiv_rz(double __a, double __b)
__DEVICE__ double __dadd_rz(double __a, double __b)
__DEVICE__ long labs(long __a)
__DEVICE__ long long llabs(long long __a)
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *)
__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *)
__device__ double __ocml_i0_f64(double)
__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *)
__device__ float __ocml_j1_f32(float)
__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *)
__device__ float __ocml_cospi_f32(float)
__device__ float __ocml_i0_f32(float)
__device__ double __ocml_lgamma_f64(double)
__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *)
__device__ float __ocml_y0_f32(float)
__device__ float __ocml_i1_f32(float)
__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_lgamma_f32(float)
__device__ double __ocml_sinpi_f64(double)
__device__ double __ocml_cospi_f64(double)
__device__ double __ocml_tgamma_f64(double)
__device__ float __ocml_tan_f32(float)
__device__ float __ocml_tgamma_f32(float)
__device__ float __ocml_sinpi_f32(float)
__device__ double __ocml_j1_f64(double)
__device__ double __ocml_y1_f64(double)
__device__ double __ocml_j0_f64(double)
__device__ float __ocml_cos_f32(float)
__device__ float __ocml_y1_f32(float)
__device__ float __ocml_j0_f32(float)
__device__ double __ocml_cos_f64(double)
__device__ double __ocml_i1_f64(double)
__device__ double __ocml_sin_f64(double)
__device__ float __ocml_sin_f32(float)
__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_native_sin_f32(float)
__device__ float __ocml_native_cos_f32(float)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__device__ double __ocml_y0_f64(double)
__device__ double __ocml_tan_f64(double)
__DEVICE__ double __dsub_rn(double __x, double __y)
__DEVICE__ __RETURN_TYPE __isinff(float __x)
__DEVICE__ float sinpif(float __x)
__DEVICE__ float tanf(float __x)
__DEVICE__ float log2f(float __x)
__DEVICE__ float y0f(float __x)
__DEVICE__ float tanhf(float __x)
__DEVICE__ float coshf(float __x)
__DEVICE__ float log10f(float __x)
__DEVICE__ float j1f(float __x)
__DEVICE__ __RETURN_TYPE __finitef(float __x)
__DEVICE__ float ldexpf(float __x, int __e)
__DEVICE__ long long int llroundf(float __x)
__DEVICE__ double rhypot(double __x, double __y)
__DEVICE__ double normcdfinv(double __x)
__DEVICE__ double norm3d(double __x, double __y, double __z)
__DEVICE__ float truncf(float __x)
__DEVICE__ float remainderf(float __x, float __y)
__DEVICE__ float fabsf(float __x)
__DEVICE__ float __fdiv_rn(float __x, float __y)
__DEVICE__ float scalbnf(float __x, int __n)
__DEVICE__ float cyl_bessel_i0f(float __x)
__DEVICE__ float nanf(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float lgammaf(float __x)
__DEVICE__ float cospif(float __x)
__DEVICE__ __RETURN_TYPE __signbitf(float __x)
__DEVICE__ double __dsqrt_rn(double __x)
__DEVICE__ float frexpf(float __x, int *__nptr)
__DEVICE__ float tgammaf(float __x)
__DEVICE__ float __sinf(float __x)
__DEVICE__ float erfinvf(float __x)
__DEVICE__ float modff(float __x, float *__iptr)
__DEVICE__ double erfinv(double __x)
__DEVICE__ float expm1f(float __x)
__DEVICE__ float sinhf(float __x)
__DEVICE__ double j0(double __x)
__DEVICE__ float y1f(float __x)
__DEVICE__ float acosf(float __x)
__DEVICE__ float fmaf(float __x, float __y, float __z)
__DEVICE__ float cyl_bessel_i1f(float __x)
__DEVICE__ float fmodf(float __x, float __y)
__DEVICE__ float log1pf(float __x)
__DEVICE__ float atan2f(float __x, float __y)
__DEVICE__ float copysignf(float __x, float __y)
__DEVICE__ double j1(double __x)
__DEVICE__ __RETURN_TYPE __isnan(double __x)
__DEVICE__ float rnormf(int __dim, const float *__a)
__DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w)
__DEVICE__ float __cosf(float __x)
__DEVICE__ float erff(float __x)
__DEVICE__ float atanf(float __x)
__DEVICE__ float rnorm3df(float __x, float __y, float __z)
__DEVICE__ double norm(int __dim, const double *__a)
__DEVICE__ float erfcxf(float __x)
__DEVICE__ float erfcinvf(float __x)
__DEVICE__ float asinf(float __x)
__DEVICE__ long int lroundf(float __x)
__DEVICE__ float __fdividef(float __x, float __y)
__DEVICE__ float __frsqrt_rn(float __x)
__DEVICE__ float __log2f(float __x)
__DEVICE__ float norm4df(float __x, float __y, float __z, float __w)
__DEVICE__ __RETURN_TYPE __isnanf(float __x)
__DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull)))
__DEVICE__ double jn(int __n, double __x)
__DEVICE__ float __exp10f(float __x)
__DEVICE__ float __frcp_rn(float __x)
__DEVICE__ float ynf(int __n, float __x)
__DEVICE__ float powf(float __x, float __y)
__DEVICE__ float __fsub_rn(float __x, float __y)
__DEVICE__ double __dadd_rn(double __x, double __y)
__DEVICE__ float sinf(float __x)
__DEVICE__ float __tanf(float __x)
__DEVICE__ float remquof(float __x, float __y, int *__quo)
__DEVICE__ double normcdf(double __x)
__DEVICE__ float __fsqrt_rn(float __x)
__DEVICE__ float hypotf(float __x, float __y)
__DEVICE__ float __fmaf_rn(float __x, float __y, float __z)
__DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float exp10f(float __x)
__DEVICE__ double y1(double __x)
#define __FAST_OR_SLOW(fast, slow)
__DEVICE__ float fmaxf(float __x, float __y)
__DEVICE__ float fminf(float __x, float __y)
__DEVICE__ double erfcinv(double __x)
__DEVICE__ double powi(double __x, int __y)
__DEVICE__ float logf(float __x)
__DEVICE__ float __fadd_rn(float __x, float __y)
__DEVICE__ double cospi(double __x)
__DEVICE__ double rsqrt(double __x)
__DEVICE__ float erfcf(float __x)
__DEVICE__ float atanhf(float __x)
__DEVICE__ float asinhf(float __x)
__DEVICE__ float __expf(float __x)
__DEVICE__ double norm4d(double __x, double __y, double __z, double __w)
__DEVICE__ float __logf(float __x)
__DEVICE__ double __fma_rn(double __x, double __y, double __z)
__DEVICE__ double nan(const char *__tagp)
__DEVICE__ double rnorm(int __dim, const double *__a)
__DEVICE__ float j0f(float __x)
__DEVICE__ float rsqrtf(float __x)
__DEVICE__ float jnf(int __n, float __x)
__DEVICE__ double sinpi(double __x)
__DEVICE__ float logbf(float __x)
__DEVICE__ double y0(double __x)
__DEVICE__ __RETURN_TYPE __finite(double __x)
__DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double yn(int __n, double __x)
__DEVICE__ float rhypotf(float __x, float __y)
__DEVICE__ float exp2f(float __x)
__DEVICE__ double cyl_bessel_i0(double __x)
__DEVICE__ float powif(float __x, int __y)
__DEVICE__ double __ddiv_rn(double __x, double __y)
__DEVICE__ double cyl_bessel_i1(double __x)
__DEVICE__ float ceilf(float __x)
__DEVICE__ double rcbrt(double __x)
__DEVICE__ double rnorm3d(double __x, double __y, double __z)
__DEVICE__ float normcdfinvf(float __x)
__DEVICE__ float norm3df(float __x, float __y, float __z)
__DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr)
#define __static_assert_type_size_equal(A, B)
__DEVICE__ __RETURN_TYPE __signbit(double __x)
__DEVICE__ float fdimf(float __x, float __y)
__DEVICE__ double __dmul_rn(double __x, double __y)
__DEVICE__ float normf(int __dim, const float *__a)
__DEVICE__ float nearbyintf(float __x)
__DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull)))
__DEVICE__ int ilogbf(float __x)
__DEVICE__ float floorf(float __x)
__DEVICE__ float sqrtf(float __x)
__DEVICE__ float roundf(float __x)
__DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double __drcp_rn(double __x)
__DEVICE__ long int lrintf(float __x)
__DEVICE__ float acoshf(float __x)
__DEVICE__ double modf(double __x, double *__iptr)
__DEVICE__ float cosf(float __x)
__DEVICE__ float expf(float __x)
__DEVICE__ float nextafterf(float __x, float __y)
__DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w)
__DEVICE__ long long int llrintf(float __x)
__DEVICE__ double erfcx(double __x)
__DEVICE__ float fdividef(float __x, float __y)
__DEVICE__ float rcbrtf(float __x)
__DEVICE__ double exp10(double __x)
__DEVICE__ float __log10f(float __x)
__DEVICE__ float cbrtf(float __x)
__DEVICE__ float __fmul_rn(float __x, float __y)
__DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr)
__DEVICE__ float scalblnf(float __x, long int __n)
__DEVICE__ __RETURN_TYPE __isinf(double __x)
__DEVICE__ float rintf(float __x)
__DEVICE__ float normcdff(float __x)
__DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float __saturatef(float __x)
__DEVICE__ float __powf(float __x, float __y)
static __inline__ vector float vector float __b
static __inline__ uint32_t uint32_t __y
static __inline__ void int __a
const FunctionProtoType * T
float __ovld __cnfn sign(float)
Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = +0.0, or -1.0 if x < 0.
#define scalbln(__x, __y)
#define copysign(__x, __y)
#define remquo(__x, __y, __z)
#define nextafter(__x, __y)
#define remainder(__x, __y)
#define fma(__x, __y, __z)