clang 22.0.0git
__clang_hip_math.h
Go to the documentation of this file.
1/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
11
12#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14#endif
15
16// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
17// libcalls reach the link step instead of being eagerly replaced.
18#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
19
20#if !defined(__HIPCC_RTC__)
21#include <limits.h>
22#include <stdint.h>
23#ifdef __OPENMP_AMDGCN__
24#include <omp.h>
25#endif
26#endif // !defined(__HIPCC_RTC__)
27
28#pragma push_macro("__DEVICE__")
29
30#ifdef __OPENMP_AMDGCN__
31#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
32#else
33#define __DEVICE__ static __device__ inline __attribute__((always_inline))
34#endif
35
36#pragma push_macro("__PRIVATE_AS")
37
38#define __PRIVATE_AS __attribute__((opencl_private))
39// Device library provides fast low precision and slow full-recision
40// implementations for some functions. Which one gets selected depends on
41// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
42// -ffast-math or -fgpu-approx-transcendentals are in effect.
43#pragma push_macro("__FAST_OR_SLOW")
44#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
45#define __FAST_OR_SLOW(fast, slow) fast
46#else
47#define __FAST_OR_SLOW(fast, slow) slow
48#endif
49
50// A few functions return bool type starting only in C++11.
51#pragma push_macro("__RETURN_TYPE")
52#ifdef __OPENMP_AMDGCN__
53#define __RETURN_TYPE int
54#else
55#if defined(__cplusplus)
56#define __RETURN_TYPE bool
57#else
58#define __RETURN_TYPE int
59#endif
60#endif // __OPENMP_AMDGCN__
61
62#if defined (__cplusplus) && __cplusplus < 201103L
63// emulate static_assert on type sizes
64template<bool>
65struct __compare_result{};
66template<>
67struct __compare_result<true> {
68 static const __device__ bool valid;
69};
70
72void __suppress_unused_warning(bool b){};
73template <unsigned int S, unsigned int T>
74__DEVICE__ void __static_assert_equal_size() {
75 __suppress_unused_warning(__compare_result<S == T>::valid);
76}
77
78#define __static_assert_type_size_equal(A, B) \
79 __static_assert_equal_size<A,B>()
80
81#else
82#define __static_assert_type_size_equal(A,B) \
83 static_assert((A) == (B), "")
84
85#endif
86
88uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
89 uint64_t __r = 0;
90 while (*__tagp != '\0') {
91 char __tmp = *__tagp;
92
93 if (__tmp >= '0' && __tmp <= '7')
94 __r = (__r * 8u) + __tmp - '0';
95 else
96 return 0;
97
98 ++__tagp;
99 }
100
101 return __r;
102}
103
105uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
106 uint64_t __r = 0;
107 while (*__tagp != '\0') {
108 char __tmp = *__tagp;
109
110 if (__tmp >= '0' && __tmp <= '9')
111 __r = (__r * 10u) + __tmp - '0';
112 else
113 return 0;
114
115 ++__tagp;
116 }
117
118 return __r;
119}
120
122uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
123 uint64_t __r = 0;
124 while (*__tagp != '\0') {
125 char __tmp = *__tagp;
126
127 if (__tmp >= '0' && __tmp <= '9')
128 __r = (__r * 16u) + __tmp - '0';
129 else if (__tmp >= 'a' && __tmp <= 'f')
130 __r = (__r * 16u) + __tmp - 'a' + 10;
131 else if (__tmp >= 'A' && __tmp <= 'F')
132 __r = (__r * 16u) + __tmp - 'A' + 10;
133 else
134 return 0;
135
136 ++__tagp;
137 }
138
139 return __r;
140}
141
143uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
144 if (*__tagp == '0') {
145 ++__tagp;
146
147 if (*__tagp == 'x' || *__tagp == 'X')
148 return __make_mantissa_base16(__tagp);
149 else
150 return __make_mantissa_base8(__tagp);
151 }
152
153 return __make_mantissa_base10(__tagp);
154}
155
156// BEGIN FLOAT
157
158// BEGIN INTRINSICS
159
161float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
162
164float __exp10f(float __x) {
165 const float __log2_10 = 0x1.a934f0p+1f;
166 return __builtin_amdgcn_exp2f(__log2_10 * __x);
167}
168
170float __expf(float __x) {
171 const float __log2_e = 0x1.715476p+0;
172 return __builtin_amdgcn_exp2f(__log2_e * __x);
173}
174
175#if defined OCML_BASIC_ROUNDED_OPERATIONS
177float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
179float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
181float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
183float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
184#else
186float __fadd_rn(float __x, float __y) { return __x + __y; }
187#endif
188
189#if defined OCML_BASIC_ROUNDED_OPERATIONS
191float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
193float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
195float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
197float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
198#else
200float __fdiv_rn(float __x, float __y) { return __x / __y; }
201#endif
202
204float __fdividef(float __x, float __y) { return __x / __y; }
205
206#if defined OCML_BASIC_ROUNDED_OPERATIONS
208float __fmaf_rd(float __x, float __y, float __z) {
209 return __ocml_fma_rtn_f32(__x, __y, __z);
210}
212float __fmaf_rn(float __x, float __y, float __z) {
213 return __ocml_fma_rte_f32(__x, __y, __z);
214}
216float __fmaf_ru(float __x, float __y, float __z) {
217 return __ocml_fma_rtp_f32(__x, __y, __z);
218}
220float __fmaf_rz(float __x, float __y, float __z) {
221 return __ocml_fma_rtz_f32(__x, __y, __z);
222}
223#else
225float __fmaf_rn(float __x, float __y, float __z) {
226 return __builtin_fmaf(__x, __y, __z);
227}
228#endif
229
230#if defined OCML_BASIC_ROUNDED_OPERATIONS
232float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
234float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
236float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
238float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
239#else
241float __fmul_rn(float __x, float __y) { return __x * __y; }
242#endif
243
244#if defined OCML_BASIC_ROUNDED_OPERATIONS
246float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
248float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
250float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
252float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
253#else
255float __frcp_rn(float __x) { return 1.0f / __x; }
256#endif
257
259float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
260
261#if defined OCML_BASIC_ROUNDED_OPERATIONS
263float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
265float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
267float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
269float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
270#else
272float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
273#endif
274
275#if defined OCML_BASIC_ROUNDED_OPERATIONS
277float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
279float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
281float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
283float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
284#else
286float __fsub_rn(float __x, float __y) { return __x - __y; }
287#endif
288
290float __log10f(float __x) { return __builtin_log10f(__x); }
291
293float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
294
296float __logf(float __x) { return __builtin_logf(__x); }
297
299float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
300
302float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
303
305void __sincosf(float __x, float *__sinptr, float *__cosptr) {
306 *__sinptr = __ocml_native_sin_f32(__x);
307 *__cosptr = __ocml_native_cos_f32(__x);
308}
309
311float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
312
314float __tanf(float __x) {
315 return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
316}
317// END INTRINSICS
318
319#if defined(__cplusplus)
321int abs(int __x) {
322 return __builtin_abs(__x);
323}
325long labs(long __x) {
326 return __builtin_labs(__x);
327}
329long long llabs(long long __x) {
330 return __builtin_llabs(__x);
331}
332#endif
333
335float acosf(float __x) { return __ocml_acos_f32(__x); }
336
338float acoshf(float __x) { return __ocml_acosh_f32(__x); }
339
341float asinf(float __x) { return __ocml_asin_f32(__x); }
342
344float asinhf(float __x) { return __ocml_asinh_f32(__x); }
345
347float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
348
350float atanf(float __x) { return __ocml_atan_f32(__x); }
351
353float atanhf(float __x) { return __ocml_atanh_f32(__x); }
354
356float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
357
359float ceilf(float __x) { return __builtin_ceilf(__x); }
360
362float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
363
365float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
366
368float coshf(float __x) { return __ocml_cosh_f32(__x); }
369
371float cospif(float __x) { return __ocml_cospi_f32(__x); }
372
374float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
375
377float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
378
380float erfcf(float __x) { return __ocml_erfc_f32(__x); }
381
383float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
384
386float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
387
389float erff(float __x) { return __ocml_erf_f32(__x); }
390
392float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
393
395float exp10f(float __x) { return __builtin_exp10f(__x); }
396
398float exp2f(float __x) { return __builtin_exp2f(__x); }
399
401float expf(float __x) { return __builtin_expf(__x); }
402
404float expm1f(float __x) { return __ocml_expm1_f32(__x); }
405
407float fabsf(float __x) { return __builtin_fabsf(__x); }
408
410float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
411
413float fdividef(float __x, float __y) { return __x / __y; }
414
416float floorf(float __x) { return __builtin_floorf(__x); }
417
419float fmaf(float __x, float __y, float __z) {
420 return __builtin_fmaf(__x, __y, __z);
421}
422
424float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
425
427float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); }
428
430float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
431
433float frexpf(float __x, int *__nptr) {
434 return __builtin_frexpf(__x, __nptr);
435}
436
438float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
439
441int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
442
444__RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); }
445
447__RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); }
448
450__RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); }
451
453float j0f(float __x) { return __ocml_j0_f32(__x); }
454
456float j1f(float __x) { return __ocml_j1_f32(__x); }
457
459float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
460 // and the Miller & Brown algorithm
461 // for linear recurrences to get O(log n) steps, but it's unclear if
462 // it'd be beneficial in this case.
463 if (__n == 0)
464 return j0f(__x);
465 if (__n == 1)
466 return j1f(__x);
467
468 float __x0 = j0f(__x);
469 float __x1 = j1f(__x);
470 for (int __i = 1; __i < __n; ++__i) {
471 float __x2 = (2 * __i) / __x * __x1 - __x0;
472 __x0 = __x1;
473 __x1 = __x2;
474 }
475
476 return __x1;
477}
478
480float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); }
481
483float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
484
486long long int llrintf(float __x) { return __builtin_rintf(__x); }
487
489long long int llroundf(float __x) { return __builtin_roundf(__x); }
490
492float log10f(float __x) { return __builtin_log10f(__x); }
493
495float log1pf(float __x) { return __ocml_log1p_f32(__x); }
496
498float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x); }
499
501float logbf(float __x) { return __ocml_logb_f32(__x); }
502
504float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
505
507long int lrintf(float __x) { return __builtin_rintf(__x); }
508
510long int lroundf(float __x) { return __builtin_roundf(__x); }
511
513float modff(float __x, float *__iptr) {
514 float __tmp;
515#ifdef __OPENMP_AMDGCN__
516#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
517#endif
518 float __r = __ocml_modf_f32(__x, (__PRIVATE_AS float *)&__tmp);
519 *__iptr = __tmp;
520 return __r;
521}
522
524float nanf(const char *__tagp __attribute__((nonnull))) {
525 union {
526 float val;
527 struct ieee_float {
528 unsigned int mantissa : 22;
529 unsigned int quiet : 1;
530 unsigned int exponent : 8;
531 unsigned int sign : 1;
532 } bits;
533 } __tmp;
534 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
535
536 __tmp.bits.sign = 0u;
537 __tmp.bits.exponent = ~0u;
538 __tmp.bits.quiet = 1u;
539 __tmp.bits.mantissa = __make_mantissa(__tagp);
540
541 return __tmp.val;
542}
543
545float nearbyintf(float __x) { return __builtin_nearbyintf(__x); }
546
548float nextafterf(float __x, float __y) {
549 return __ocml_nextafter_f32(__x, __y);
550}
551
553float norm3df(float __x, float __y, float __z) {
554 return __ocml_len3_f32(__x, __y, __z);
555}
556
558float norm4df(float __x, float __y, float __z, float __w) {
559 return __ocml_len4_f32(__x, __y, __z, __w);
560}
561
563float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
564
566float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
567
569float normf(int __dim,
570 const float *__a) { // TODO: placeholder until OCML adds support.
571 float __r = 0;
572 while (__dim--) {
573 __r += __a[0] * __a[0];
574 ++__a;
575 }
576
577 return __builtin_sqrtf(__r);
578}
579
581float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
582
584float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
585
587float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
588
590float remainderf(float __x, float __y) {
591 return __ocml_remainder_f32(__x, __y);
592}
593
595float remquof(float __x, float __y, int *__quo) {
596 int __tmp;
597#ifdef __OPENMP_AMDGCN__
598#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
599#endif
600 float __r = __ocml_remquo_f32(__x, __y, (__PRIVATE_AS int *)&__tmp);
601 *__quo = __tmp;
602
603 return __r;
604}
605
607float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
608
610float rintf(float __x) { return __builtin_rintf(__x); }
611
613float rnorm3df(float __x, float __y, float __z) {
614 return __ocml_rlen3_f32(__x, __y, __z);
615}
616
618float rnorm4df(float __x, float __y, float __z, float __w) {
619 return __ocml_rlen4_f32(__x, __y, __z, __w);
620}
621
623float rnormf(int __dim,
624 const float *__a) { // TODO: placeholder until OCML adds support.
625 float __r = 0;
626 while (__dim--) {
627 __r += __a[0] * __a[0];
628 ++__a;
629 }
630
631 return __ocml_rsqrt_f32(__r);
632}
633
635float roundf(float __x) { return __builtin_roundf(__x); }
636
638float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
639
641float scalblnf(float __x, long int __n) {
642 if (__n > INT_MAX)
643 __n = INT_MAX;
644 else if (__n < INT_MIN)
645 __n = INT_MIN;
646 return __builtin_ldexpf(__x, (int)__n);
647}
648
650float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); }
651
653__RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); }
654
656void sincosf(float __x, float *__sinptr, float *__cosptr) {
657 float __tmp;
658#ifdef __OPENMP_AMDGCN__
659#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
660#endif
661#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
662 __sincosf(__x, __sinptr, __cosptr);
663#else
664 *__sinptr = __ocml_sincos_f32(__x, (__PRIVATE_AS float *)&__tmp);
665 *__cosptr = __tmp;
666#endif
667}
668
670void sincospif(float __x, float *__sinptr, float *__cosptr) {
671 float __tmp;
672#ifdef __OPENMP_AMDGCN__
673#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
674#endif
675 *__sinptr = __ocml_sincospi_f32(__x, (__PRIVATE_AS float *)&__tmp);
676 *__cosptr = __tmp;
677}
678
680float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
681
683float sinhf(float __x) { return __ocml_sinh_f32(__x); }
684
686float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
687
689float sqrtf(float __x) { return __builtin_sqrtf(__x); }
690
692float tanf(float __x) { return __ocml_tan_f32(__x); }
693
695float tanhf(float __x) { return __ocml_tanh_f32(__x); }
696
698float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
699
701float truncf(float __x) { return __builtin_truncf(__x); }
702
704float y0f(float __x) { return __ocml_y0_f32(__x); }
705
707float y1f(float __x) { return __ocml_y1_f32(__x); }
708
710float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
711 // and the Miller & Brown algorithm
712 // for linear recurrences to get O(log n) steps, but it's unclear if
713 // it'd be beneficial in this case. Placeholder until OCML adds
714 // support.
715 if (__n == 0)
716 return y0f(__x);
717 if (__n == 1)
718 return y1f(__x);
719
720 float __x0 = y0f(__x);
721 float __x1 = y1f(__x);
722 for (int __i = 1; __i < __n; ++__i) {
723 float __x2 = (2 * __i) / __x * __x1 - __x0;
724 __x0 = __x1;
725 __x1 = __x2;
726 }
727
728 return __x1;
729}
730
731
732// END FLOAT
733
734// BEGIN DOUBLE
736double acos(double __x) { return __ocml_acos_f64(__x); }
737
739double acosh(double __x) { return __ocml_acosh_f64(__x); }
740
742double asin(double __x) { return __ocml_asin_f64(__x); }
743
745double asinh(double __x) { return __ocml_asinh_f64(__x); }
746
748double atan(double __x) { return __ocml_atan_f64(__x); }
749
751double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
752
754double atanh(double __x) { return __ocml_atanh_f64(__x); }
755
757double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
758
760double ceil(double __x) { return __builtin_ceil(__x); }
761
763double copysign(double __x, double __y) {
764 return __builtin_copysign(__x, __y);
765}
766
768double cos(double __x) { return __ocml_cos_f64(__x); }
769
771double cosh(double __x) { return __ocml_cosh_f64(__x); }
772
774double cospi(double __x) { return __ocml_cospi_f64(__x); }
775
777double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
778
780double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
781
783double erf(double __x) { return __ocml_erf_f64(__x); }
784
786double erfc(double __x) { return __ocml_erfc_f64(__x); }
787
789double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
790
792double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
793
795double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
796
798double exp(double __x) { return __ocml_exp_f64(__x); }
799
801double exp10(double __x) { return __ocml_exp10_f64(__x); }
802
804double exp2(double __x) { return __ocml_exp2_f64(__x); }
805
807double expm1(double __x) { return __ocml_expm1_f64(__x); }
808
810double fabs(double __x) { return __builtin_fabs(__x); }
811
813double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
814
816double floor(double __x) { return __builtin_floor(__x); }
817
819double fma(double __x, double __y, double __z) {
820 return __builtin_fma(__x, __y, __z);
821}
822
824double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); }
825
827double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); }
828
830double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
831
833double frexp(double __x, int *__nptr) {
834 return __builtin_frexp(__x, __nptr);
835}
836
838double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
839
841int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
842
844__RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); }
845
847__RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); }
848
850__RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); }
851
853double j0(double __x) { return __ocml_j0_f64(__x); }
854
856double j1(double __x) { return __ocml_j1_f64(__x); }
857
859double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
860 // and the Miller & Brown algorithm
861 // for linear recurrences to get O(log n) steps, but it's unclear if
862 // it'd be beneficial in this case. Placeholder until OCML adds
863 // support.
864 if (__n == 0)
865 return j0(__x);
866 if (__n == 1)
867 return j1(__x);
868
869 double __x0 = j0(__x);
870 double __x1 = j1(__x);
871 for (int __i = 1; __i < __n; ++__i) {
872 double __x2 = (2 * __i) / __x * __x1 - __x0;
873 __x0 = __x1;
874 __x1 = __x2;
875 }
876 return __x1;
877}
878
880double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); }
881
883double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
884
886long long int llrint(double __x) { return __builtin_rint(__x); }
887
889long long int llround(double __x) { return __builtin_round(__x); }
890
892double log(double __x) { return __ocml_log_f64(__x); }
893
895double log10(double __x) { return __ocml_log10_f64(__x); }
896
898double log1p(double __x) { return __ocml_log1p_f64(__x); }
899
901double log2(double __x) { return __ocml_log2_f64(__x); }
902
904double logb(double __x) { return __ocml_logb_f64(__x); }
905
907long int lrint(double __x) { return __builtin_rint(__x); }
908
910long int lround(double __x) { return __builtin_round(__x); }
911
913double modf(double __x, double *__iptr) {
914 double __tmp;
915#ifdef __OPENMP_AMDGCN__
916#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
917#endif
918 double __r = __ocml_modf_f64(__x, (__PRIVATE_AS double *)&__tmp);
919 *__iptr = __tmp;
920
921 return __r;
922}
923
925double nan(const char *__tagp) {
926#if !_WIN32
927 union {
928 double val;
929 struct ieee_double {
930 uint64_t mantissa : 51;
931 uint32_t quiet : 1;
932 uint32_t exponent : 11;
933 uint32_t sign : 1;
934 } bits;
935 } __tmp;
936 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
937
938 __tmp.bits.sign = 0u;
939 __tmp.bits.exponent = ~0u;
940 __tmp.bits.quiet = 1u;
941 __tmp.bits.mantissa = __make_mantissa(__tagp);
942
943 return __tmp.val;
944#else
945 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
946 uint64_t __val = __make_mantissa(__tagp);
947 __val |= 0xFFF << 51;
948 return *reinterpret_cast<double *>(&__val);
949#endif
950}
951
953double nearbyint(double __x) { return __builtin_nearbyint(__x); }
954
956double nextafter(double __x, double __y) {
957 return __ocml_nextafter_f64(__x, __y);
958}
959
961double norm(int __dim,
962 const double *__a) { // TODO: placeholder until OCML adds support.
963 double __r = 0;
964 while (__dim--) {
965 __r += __a[0] * __a[0];
966 ++__a;
967 }
968
969 return __builtin_sqrt(__r);
970}
971
973double norm3d(double __x, double __y, double __z) {
974 return __ocml_len3_f64(__x, __y, __z);
975}
976
978double norm4d(double __x, double __y, double __z, double __w) {
979 return __ocml_len4_f64(__x, __y, __z, __w);
980}
981
983double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
984
986double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
987
989double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
990
992double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
993
995double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
996
998double remainder(double __x, double __y) {
999 return __ocml_remainder_f64(__x, __y);
1000}
1001
1003double remquo(double __x, double __y, int *__quo) {
1004 int __tmp;
1005#ifdef __OPENMP_AMDGCN__
1006#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1007#endif
1008 double __r = __ocml_remquo_f64(__x, __y, (__PRIVATE_AS int *)&__tmp);
1009 *__quo = __tmp;
1010
1011 return __r;
1012}
1013
1015double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1016
1018double rint(double __x) { return __builtin_rint(__x); }
1019
1021double rnorm(int __dim,
1022 const double *__a) { // TODO: placeholder until OCML adds support.
1023 double __r = 0;
1024 while (__dim--) {
1025 __r += __a[0] * __a[0];
1026 ++__a;
1027 }
1028
1029 return __ocml_rsqrt_f64(__r);
1030}
1031
1033double rnorm3d(double __x, double __y, double __z) {
1034 return __ocml_rlen3_f64(__x, __y, __z);
1035}
1036
1038double rnorm4d(double __x, double __y, double __z, double __w) {
1039 return __ocml_rlen4_f64(__x, __y, __z, __w);
1040}
1041
1043double round(double __x) { return __builtin_round(__x); }
1044
1046double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1047
1049double scalbln(double __x, long int __n) {
1050 if (__n > INT_MAX)
1051 __n = INT_MAX;
1052 else if (__n < INT_MIN)
1053 __n = INT_MIN;
1054 return __builtin_ldexp(__x, (int)__n);
1055}
1057double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }
1058
1060__RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); }
1061
1063double sin(double __x) { return __ocml_sin_f64(__x); }
1064
1066void sincos(double __x, double *__sinptr, double *__cosptr) {
1067 double __tmp;
1068#ifdef __OPENMP_AMDGCN__
1069#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1070#endif
1071 *__sinptr = __ocml_sincos_f64(__x, (__PRIVATE_AS double *)&__tmp);
1072 *__cosptr = __tmp;
1073}
1074
1076void sincospi(double __x, double *__sinptr, double *__cosptr) {
1077 double __tmp;
1078#ifdef __OPENMP_AMDGCN__
1079#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1080#endif
1081 *__sinptr = __ocml_sincospi_f64(__x, (__PRIVATE_AS double *)&__tmp);
1082 *__cosptr = __tmp;
1083}
1084
1086double sinh(double __x) { return __ocml_sinh_f64(__x); }
1087
1089double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1090
1092double sqrt(double __x) { return __builtin_sqrt(__x); }
1093
1095double tan(double __x) { return __ocml_tan_f64(__x); }
1096
1098double tanh(double __x) { return __ocml_tanh_f64(__x); }
1099
1101double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1102
1104double trunc(double __x) { return __builtin_trunc(__x); }
1105
1107double y0(double __x) { return __ocml_y0_f64(__x); }
1108
1110double y1(double __x) { return __ocml_y1_f64(__x); }
1111
1113double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1114 // and the Miller & Brown algorithm
1115 // for linear recurrences to get O(log n) steps, but it's unclear if
1116 // it'd be beneficial in this case. Placeholder until OCML adds
1117 // support.
1118 if (__n == 0)
1119 return y0(__x);
1120 if (__n == 1)
1121 return y1(__x);
1122
1123 double __x0 = y0(__x);
1124 double __x1 = y1(__x);
1125 for (int __i = 1; __i < __n; ++__i) {
1126 double __x2 = (2 * __i) / __x * __x1 - __x0;
1127 __x0 = __x1;
1128 __x1 = __x2;
1129 }
1130
1131 return __x1;
1132}
1133
1134// BEGIN INTRINSICS
1135#if defined OCML_BASIC_ROUNDED_OPERATIONS
1137double __dadd_rd(double __x, double __y) {
1138 return __ocml_add_rtn_f64(__x, __y);
1139}
1141double __dadd_rn(double __x, double __y) {
1142 return __ocml_add_rte_f64(__x, __y);
1143}
1145double __dadd_ru(double __x, double __y) {
1146 return __ocml_add_rtp_f64(__x, __y);
1147}
1149double __dadd_rz(double __x, double __y) {
1150 return __ocml_add_rtz_f64(__x, __y);
1151}
1152#else
1154double __dadd_rn(double __x, double __y) { return __x + __y; }
1155#endif
1156
1157#if defined OCML_BASIC_ROUNDED_OPERATIONS
1159double __ddiv_rd(double __x, double __y) {
1160 return __ocml_div_rtn_f64(__x, __y);
1161}
1163double __ddiv_rn(double __x, double __y) {
1164 return __ocml_div_rte_f64(__x, __y);
1165}
1167double __ddiv_ru(double __x, double __y) {
1168 return __ocml_div_rtp_f64(__x, __y);
1169}
1171double __ddiv_rz(double __x, double __y) {
1172 return __ocml_div_rtz_f64(__x, __y);
1173}
1174#else
1176double __ddiv_rn(double __x, double __y) { return __x / __y; }
1177#endif
1178
1179#if defined OCML_BASIC_ROUNDED_OPERATIONS
1181double __dmul_rd(double __x, double __y) {
1182 return __ocml_mul_rtn_f64(__x, __y);
1183}
1185double __dmul_rn(double __x, double __y) {
1186 return __ocml_mul_rte_f64(__x, __y);
1187}
1189double __dmul_ru(double __x, double __y) {
1190 return __ocml_mul_rtp_f64(__x, __y);
1191}
1193double __dmul_rz(double __x, double __y) {
1194 return __ocml_mul_rtz_f64(__x, __y);
1195}
1196#else
1198double __dmul_rn(double __x, double __y) { return __x * __y; }
1199#endif
1200
1201#if defined OCML_BASIC_ROUNDED_OPERATIONS
1203double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1205double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1207double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1209double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1210#else
1212double __drcp_rn(double __x) { return 1.0 / __x; }
1213#endif
1214
1215#if defined OCML_BASIC_ROUNDED_OPERATIONS
1217double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1219double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1221double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1223double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1224#else
1226double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
1227#endif
1228
1229#if defined OCML_BASIC_ROUNDED_OPERATIONS
1231double __dsub_rd(double __x, double __y) {
1232 return __ocml_sub_rtn_f64(__x, __y);
1233}
1235double __dsub_rn(double __x, double __y) {
1236 return __ocml_sub_rte_f64(__x, __y);
1237}
1239double __dsub_ru(double __x, double __y) {
1240 return __ocml_sub_rtp_f64(__x, __y);
1241}
1243double __dsub_rz(double __x, double __y) {
1244 return __ocml_sub_rtz_f64(__x, __y);
1245}
1246#else
1248double __dsub_rn(double __x, double __y) { return __x - __y; }
1249#endif
1250
1251#if defined OCML_BASIC_ROUNDED_OPERATIONS
1253double __fma_rd(double __x, double __y, double __z) {
1254 return __ocml_fma_rtn_f64(__x, __y, __z);
1255}
1257double __fma_rn(double __x, double __y, double __z) {
1258 return __ocml_fma_rte_f64(__x, __y, __z);
1259}
1261double __fma_ru(double __x, double __y, double __z) {
1262 return __ocml_fma_rtp_f64(__x, __y, __z);
1263}
1265double __fma_rz(double __x, double __y, double __z) {
1266 return __ocml_fma_rtz_f64(__x, __y, __z);
1267}
1268#else
1270double __fma_rn(double __x, double __y, double __z) {
1271 return __builtin_fma(__x, __y, __z);
1272}
1273#endif
1274// END INTRINSICS
1275// END DOUBLE
1276
1277// C only macros
1278#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1279#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1280#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1281#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1282#define signbit(__x) \
1283 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1284#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1285
1286#if defined(__cplusplus)
1287template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1288 return (__arg1 < __arg2) ? __arg1 : __arg2;
1289}
1290
1291template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1292 return (__arg1 > __arg2) ? __arg1 : __arg2;
1293}
1294
1295__DEVICE__ int min(int __arg1, int __arg2) {
1296 return (__arg1 < __arg2) ? __arg1 : __arg2;
1297}
1298__DEVICE__ int max(int __arg1, int __arg2) {
1299 return (__arg1 > __arg2) ? __arg1 : __arg2;
1300}
1301
1303float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
1304
1306double max(double __x, double __y) { return __builtin_fmax(__x, __y); }
1307
1309float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
1310
1312double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
1313
1314// Define host min/max functions.
1315#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) && \
1316 !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
1317
1318// TODO: make this default to 1 after existing HIP apps adopting this change.
1319#ifndef __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1320#define __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__ 0
1321#endif
1322
1323#ifndef __HIP_DEFINE_MIXED_HOST_MIN_MAX__
1324#define __HIP_DEFINE_MIXED_HOST_MIN_MAX__ 0
1325#endif
1326
1327#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
1328#pragma push_macro("DEFINE_MIN_MAX_FUNCTIONS")
1329#define DEFINE_MIN_MAX_FUNCTIONS(ret_type, type1, type2) \
1330 inline ret_type min(const type1 __a, const type2 __b) { \
1331 return (__a < __b) ? __a : __b; \
1332 } \
1333 inline ret_type max(const type1 __a, const type2 __b) { \
1334 return (__a > __b) ? __a : __b; \
1335 }
1336
1337// Define min and max functions for same type comparisons
1338DEFINE_MIN_MAX_FUNCTIONS(int, int, int)
1339
1340#if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1341DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, unsigned int)
1342DEFINE_MIN_MAX_FUNCTIONS(long, long, long)
1343DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, unsigned long)
1344DEFINE_MIN_MAX_FUNCTIONS(long long, long long, long long)
1345DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long,
1346 unsigned long long)
1347#endif // if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1348
1349// The host min/max functions below accept mixed signed/unsigned integer
1350// parameters and perform unsigned comparisons, which may produce unexpected
1351// results if a signed integer was passed unintentionally. To avoid this
1352// happening silently, these overloaded functions are not defined by default.
1353// However, for compatibility with CUDA, they will be defined if users define
1354// __HIP_DEFINE_MIXED_HOST_MIN_MAX__.
1355#if __HIP_DEFINE_MIXED_HOST_MIN_MAX__
1356DEFINE_MIN_MAX_FUNCTIONS(unsigned int, int, unsigned int)
1357DEFINE_MIN_MAX_FUNCTIONS(unsigned int, unsigned int, int)
1358DEFINE_MIN_MAX_FUNCTIONS(unsigned long, long, unsigned long)
1359DEFINE_MIN_MAX_FUNCTIONS(unsigned long, unsigned long, long)
1360DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, long long, unsigned long long)
1361DEFINE_MIN_MAX_FUNCTIONS(unsigned long long, unsigned long long, long long)
1362#endif // if __HIP_DEFINE_MIXED_HOST_MIN_MAX__
1363
1364// Floating-point comparisons using built-in functions
1365#if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1366inline float min(float const __a, float const __b) {
1367 return __builtin_fminf(__a, __b);
1368}
1369inline double min(double const __a, double const __b) {
1370 return __builtin_fmin(__a, __b);
1371}
1372inline double min(float const __a, double const __b) {
1373 return __builtin_fmin(__a, __b);
1374}
1375inline double min(double const __a, float const __b) {
1376 return __builtin_fmin(__a, __b);
1377}
1378
1379inline float max(float const __a, float const __b) {
1380 return __builtin_fmaxf(__a, __b);
1381}
1382inline double max(double const __a, double const __b) {
1383 return __builtin_fmax(__a, __b);
1384}
1385inline double max(float const __a, double const __b) {
1386 return __builtin_fmax(__a, __b);
1387}
1388inline double max(double const __a, float const __b) {
1389 return __builtin_fmax(__a, __b);
1390}
1391#endif // if __HIP_DEFINE_EXTENDED_HOST_MIN_MAX__
1392
1393#pragma pop_macro("DEFINE_MIN_MAX_FUNCTIONS")
1394
1395#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) &&
1396 // !defined(__HIP_NO_HOST_MIN_MAX_IN_GLOBAL_NAMESPACE__)
1397#endif
1398
1399#pragma pop_macro("__DEVICE__")
1400#pragma pop_macro("__PRIVATE_AS")
1401#pragma pop_macro("__RETURN_TYPE")
1402#pragma pop_macro("__FAST_OR_SLOW")
1403
1404#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
1405#endif // __CLANG_HIP_MATH_H__
__DEVICE__ long long abs(long long __n)
__DEVICE__ float __fsqrt_rd(float __a)
__DEVICE__ float __fdiv_rd(float __a, float __b)
__DEVICE__ double __dsub_ru(double __a, double __b)
__DEVICE__ double __drcp_ru(double __a)
__DEVICE__ float __frcp_rz(float __a)
__DEVICE__ float __fmul_ru(float __a, float __b)
__DEVICE__ double __dsub_rd(double __a, double __b)
__DEVICE__ float __frcp_ru(float __a)
__DEVICE__ float __frcp_rd(float __a)
__DEVICE__ double __dmul_ru(double __a, double __b)
__DEVICE__ float __fmaf_ru(float __a, float __b, float __c)
__DEVICE__ double __fma_rz(double __a, double __b, double __c)
__DEVICE__ double __fma_rd(double __a, double __b, double __c)
__DEVICE__ double __dmul_rd(double __a, double __b)
__DEVICE__ double __ddiv_ru(double __a, double __b)
__DEVICE__ double __ddiv_rd(double __a, double __b)
__DEVICE__ double __dadd_ru(double __a, double __b)
__DEVICE__ float __fmul_rd(float __a, float __b)
__DEVICE__ float __fsub_rd(float __a, float __b)
__DEVICE__ float __fsub_rz(float __a, float __b)
__DEVICE__ double __fma_ru(double __a, double __b, double __c)
__DEVICE__ double __dsqrt_ru(double __a)
__DEVICE__ float __fsqrt_rz(float __a)
__DEVICE__ double __dsub_rz(double __a, double __b)
__DEVICE__ float __fadd_rd(float __a, float __b)
__DEVICE__ float __fmul_rz(float __a, float __b)
__DEVICE__ float __fadd_rz(float __a, float __b)
__DEVICE__ double __dsqrt_rd(double __a)
__DEVICE__ float __fmaf_rd(float __a, float __b, float __c)
__DEVICE__ double __dadd_rd(double __a, double __b)
__DEVICE__ double __dsqrt_rz(double __a)
__DEVICE__ double __drcp_rd(double __a)
__DEVICE__ float __fdiv_rz(float __a, float __b)
__DEVICE__ float __fmaf_rz(float __a, float __b, float __c)
__DEVICE__ double __drcp_rz(double __a)
__DEVICE__ float __fsub_ru(float __a, float __b)
__DEVICE__ double __dmul_rz(double __a, double __b)
__DEVICE__ float __fsqrt_ru(float __a)
__DEVICE__ float __fadd_ru(float __a, float __b)
__DEVICE__ float __fdiv_ru(float __a, float __b)
__DEVICE__ double __ddiv_rz(double __a, double __b)
__DEVICE__ double __dadd_rz(double __a, double __b)
__DEVICE__ long labs(long __a)
__DEVICE__ long long llabs(long long __a)
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
__device__ double __ocml_sincos_f64(double, __PRIVATE_AS double *)
__device__ double __ocml_remquo_f64(double, double, __PRIVATE_AS int *)
__device__ double __ocml_i0_f64(double)
__device__ double __ocml_sincospi_f64(double, __PRIVATE_AS double *)
__device__ float __ocml_j1_f32(float)
__device__ double __ocml_modf_f64(double, __PRIVATE_AS double *)
__device__ float __ocml_cospi_f32(float)
__device__ float __ocml_i0_f32(float)
__device__ double __ocml_lgamma_f64(double)
__device__ __2f16 b
__device__ float __ocml_sincos_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_remquo_f32(float, float, __PRIVATE_AS int *)
__device__ float __ocml_y0_f32(float)
__device__ float __ocml_i1_f32(float)
__device__ float __ocml_sincospi_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_lgamma_f32(float)
__device__ double __ocml_sinpi_f64(double)
__device__ double __ocml_cospi_f64(double)
__device__ double __ocml_tgamma_f64(double)
__device__ float __ocml_tan_f32(float)
__device__ float __ocml_tgamma_f32(float)
__device__ float __ocml_sinpi_f32(float)
__device__ double __ocml_j1_f64(double)
__device__ double __ocml_y1_f64(double)
__device__ double __ocml_j0_f64(double)
__device__ float __ocml_cos_f32(float)
__device__ float __ocml_y1_f32(float)
__device__ float __ocml_j0_f32(float)
__device__ double __ocml_cos_f64(double)
__device__ double __ocml_i1_f64(double)
__device__ double __ocml_sin_f64(double)
__device__ float __ocml_sin_f32(float)
__device__ float __ocml_modf_f32(float, __PRIVATE_AS float *)
__device__ float __ocml_native_sin_f32(float)
__device__ float __ocml_native_cos_f32(float)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__device__ double __ocml_y0_f64(double)
__device__ double __ocml_tan_f64(double)
__DEVICE__ double __dsub_rn(double __x, double __y)
__DEVICE__ __RETURN_TYPE __isinff(float __x)
__DEVICE__ float sinpif(float __x)
__DEVICE__ float tanf(float __x)
__DEVICE__ float log2f(float __x)
__DEVICE__ float y0f(float __x)
__DEVICE__ float tanhf(float __x)
__DEVICE__ float coshf(float __x)
__DEVICE__ float log10f(float __x)
__DEVICE__ float j1f(float __x)
__DEVICE__ __RETURN_TYPE __finitef(float __x)
__DEVICE__ float ldexpf(float __x, int __e)
__DEVICE__ long long int llroundf(float __x)
__DEVICE__ double rhypot(double __x, double __y)
__DEVICE__ double normcdfinv(double __x)
__DEVICE__ double norm3d(double __x, double __y, double __z)
__DEVICE__ float truncf(float __x)
__DEVICE__ float remainderf(float __x, float __y)
__DEVICE__ float fabsf(float __x)
__DEVICE__ float __fdiv_rn(float __x, float __y)
__DEVICE__ float scalbnf(float __x, int __n)
__DEVICE__ float cyl_bessel_i0f(float __x)
__DEVICE__ float nanf(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float lgammaf(float __x)
__DEVICE__ float cospif(float __x)
__DEVICE__ __RETURN_TYPE __signbitf(float __x)
__DEVICE__ double __dsqrt_rn(double __x)
__DEVICE__ float frexpf(float __x, int *__nptr)
__DEVICE__ float tgammaf(float __x)
__DEVICE__ float __sinf(float __x)
__DEVICE__ float erfinvf(float __x)
__DEVICE__ float modff(float __x, float *__iptr)
__DEVICE__ double erfinv(double __x)
__DEVICE__ float expm1f(float __x)
__DEVICE__ float sinhf(float __x)
__DEVICE__ double j0(double __x)
__DEVICE__ float y1f(float __x)
__DEVICE__ float acosf(float __x)
__DEVICE__ float fmaf(float __x, float __y, float __z)
__DEVICE__ float cyl_bessel_i1f(float __x)
__DEVICE__ float fmodf(float __x, float __y)
__DEVICE__ float log1pf(float __x)
__DEVICE__ float atan2f(float __x, float __y)
__DEVICE__ float copysignf(float __x, float __y)
__DEVICE__ double j1(double __x)
__DEVICE__ __RETURN_TYPE __isnan(double __x)
__DEVICE__ float rnormf(int __dim, const float *__a)
__DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w)
__DEVICE__ float __cosf(float __x)
__DEVICE__ float erff(float __x)
__DEVICE__ float atanf(float __x)
__DEVICE__ float rnorm3df(float __x, float __y, float __z)
__DEVICE__ double norm(int __dim, const double *__a)
__DEVICE__ float erfcxf(float __x)
__DEVICE__ float erfcinvf(float __x)
__DEVICE__ float asinf(float __x)
__DEVICE__ long int lroundf(float __x)
__DEVICE__ float __fdividef(float __x, float __y)
__DEVICE__ float __frsqrt_rn(float __x)
__DEVICE__ float __log2f(float __x)
__DEVICE__ float norm4df(float __x, float __y, float __z, float __w)
__DEVICE__ __RETURN_TYPE __isnanf(float __x)
__DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull)))
__DEVICE__ double jn(int __n, double __x)
__DEVICE__ float __exp10f(float __x)
__DEVICE__ float __frcp_rn(float __x)
#define __DEVICE__
__DEVICE__ float ynf(int __n, float __x)
__DEVICE__ float powf(float __x, float __y)
__DEVICE__ float __fsub_rn(float __x, float __y)
__DEVICE__ double __dadd_rn(double __x, double __y)
__DEVICE__ float sinf(float __x)
__DEVICE__ float __tanf(float __x)
__DEVICE__ float remquof(float __x, float __y, int *__quo)
__DEVICE__ double normcdf(double __x)
__DEVICE__ float __fsqrt_rn(float __x)
__DEVICE__ float hypotf(float __x, float __y)
__DEVICE__ float __fmaf_rn(float __x, float __y, float __z)
__DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float exp10f(float __x)
__DEVICE__ double y1(double __x)
#define __FAST_OR_SLOW(fast, slow)
__DEVICE__ float fmaxf(float __x, float __y)
__DEVICE__ float fminf(float __x, float __y)
__DEVICE__ double erfcinv(double __x)
__DEVICE__ double powi(double __x, int __y)
__DEVICE__ float logf(float __x)
__DEVICE__ float __fadd_rn(float __x, float __y)
__DEVICE__ double cospi(double __x)
__DEVICE__ double rsqrt(double __x)
__DEVICE__ float erfcf(float __x)
__DEVICE__ float atanhf(float __x)
#define __PRIVATE_AS
__DEVICE__ float asinhf(float __x)
__DEVICE__ float __expf(float __x)
__DEVICE__ double norm4d(double __x, double __y, double __z, double __w)
#define __RETURN_TYPE
__DEVICE__ float __logf(float __x)
__DEVICE__ double __fma_rn(double __x, double __y, double __z)
__DEVICE__ double nan(const char *__tagp)
__DEVICE__ double rnorm(int __dim, const double *__a)
__DEVICE__ float j0f(float __x)
__DEVICE__ float rsqrtf(float __x)
__DEVICE__ float jnf(int __n, float __x)
__DEVICE__ double sinpi(double __x)
__DEVICE__ float logbf(float __x)
__DEVICE__ double y0(double __x)
__DEVICE__ __RETURN_TYPE __finite(double __x)
__DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double yn(int __n, double __x)
__DEVICE__ float rhypotf(float __x, float __y)
__DEVICE__ float exp2f(float __x)
__DEVICE__ double cyl_bessel_i0(double __x)
__DEVICE__ float powif(float __x, int __y)
__DEVICE__ double __ddiv_rn(double __x, double __y)
__DEVICE__ double cyl_bessel_i1(double __x)
__DEVICE__ float ceilf(float __x)
__DEVICE__ double rcbrt(double __x)
__DEVICE__ double rnorm3d(double __x, double __y, double __z)
__DEVICE__ float normcdfinvf(float __x)
__DEVICE__ float norm3df(float __x, float __y, float __z)
__DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr)
#define __static_assert_type_size_equal(A, B)
__DEVICE__ __RETURN_TYPE __signbit(double __x)
__DEVICE__ float fdimf(float __x, float __y)
__DEVICE__ double __dmul_rn(double __x, double __y)
__DEVICE__ float normf(int __dim, const float *__a)
__DEVICE__ float nearbyintf(float __x)
__DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull)))
__DEVICE__ int ilogbf(float __x)
__DEVICE__ float floorf(float __x)
__DEVICE__ float sqrtf(float __x)
__DEVICE__ float roundf(float __x)
__DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double __drcp_rn(double __x)
__DEVICE__ long int lrintf(float __x)
__DEVICE__ float acoshf(float __x)
__DEVICE__ double modf(double __x, double *__iptr)
__DEVICE__ float cosf(float __x)
__DEVICE__ float expf(float __x)
__DEVICE__ float nextafterf(float __x, float __y)
__DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w)
__DEVICE__ long long int llrintf(float __x)
__DEVICE__ double erfcx(double __x)
__DEVICE__ float fdividef(float __x, float __y)
__DEVICE__ float rcbrtf(float __x)
__DEVICE__ double exp10(double __x)
__DEVICE__ float __log10f(float __x)
__DEVICE__ float cbrtf(float __x)
__DEVICE__ float __fmul_rn(float __x, float __y)
__DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr)
__DEVICE__ float scalblnf(float __x, long int __n)
__DEVICE__ __RETURN_TYPE __isinf(double __x)
__DEVICE__ float rintf(float __x)
__DEVICE__ float normcdff(float __x)
__DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float __saturatef(float __x)
__DEVICE__ float __powf(float __x, float __y)
#define __device__
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ uint32_t uint32_t __y
Definition: arm_acle.h:125
static __inline__ void int __a
Definition: emmintrin.h:4084
#define INT_MIN
Definition: limits.h:55
#define INT_MAX
Definition: limits.h:50
const FunctionProtoType * T
float __ovld __cnfn sign(float)
Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = +0.0, or -1.0 if x < 0.
#define true
Definition: stdbool.h:25
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742