From 0d8b5147e37e53e98019eea8e0d0a3aed0e875c5 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 19 Feb 2025 12:09:17 -0600 Subject: [PATCH 1/2] Enable gcc-c-torture suite for hexagon ; Add HVX test for hexagon (#205) (#208) * Add HVX intrinsic test for Hexagon * Enable gcc torture suite for hexagon --- SingleSource/Regression/C/CMakeLists.txt | 2 +- .../C/gcc-c-torture/execute/CMakeLists.txt | 14 + .../gcc-c-torture/execute/ieee/CMakeLists.txt | 13 + SingleSource/UnitTests/Vector/CMakeLists.txt | 3 + .../UnitTests/Vector/HVX/CMakeLists.txt | 3 + SingleSource/UnitTests/Vector/HVX/QFloat.c | 325 ++++++++++++++++++ .../Vector/HVX/QFloat.reference_output | 45 +++ 7 files changed, 404 insertions(+), 1 deletion(-) create mode 100644 SingleSource/UnitTests/Vector/HVX/CMakeLists.txt create mode 100644 SingleSource/UnitTests/Vector/HVX/QFloat.c create mode 100644 SingleSource/UnitTests/Vector/HVX/QFloat.reference_output diff --git a/SingleSource/Regression/C/CMakeLists.txt b/SingleSource/Regression/C/CMakeLists.txt index 3f224e2768..6288d2c64d 100644 --- a/SingleSource/Regression/C/CMakeLists.txt +++ b/SingleSource/Regression/C/CMakeLists.txt @@ -1,7 +1,7 @@ if(ARCH MATCHES "x86" OR ARCH MATCHES "riscv" OR ARCH MATCHES "SystemZ" OR ARCH MATCHES "Mips" OR ARCH MATCHES "AArch64" OR ARCH MATCHES "ARM" OR - ARCH MATCHES "LoongArch") + ARCH MATCHES "LoongArch" OR ARCH MATCHES "Hexagon") add_subdirectory(gcc-c-torture) endif() diff --git a/SingleSource/Regression/C/gcc-c-torture/execute/CMakeLists.txt b/SingleSource/Regression/C/gcc-c-torture/execute/CMakeLists.txt index 034258119a..a2a25927cb 100644 --- a/SingleSource/Regression/C/gcc-c-torture/execute/CMakeLists.txt +++ b/SingleSource/Regression/C/gcc-c-torture/execute/CMakeLists.txt @@ -337,6 +337,20 @@ if(ARCH MATCHES "ARM") list(APPEND TestsToSkip ${ARMTestsToSkip}) endif() +if(ARCH MATCHES "Hexagon") + file(GLOB HexagonTestsToSkip CONFIGURE_DEPENDS + 990127-1.c + alloca-1.c + va-arg-22.c + # No support for __int128 on Hexagon + pr84748.c + built-in-setjmp.c + pr84521.c + ) + + list(APPEND TestsToSkip ${HexagonTestsToSkip}) +endif() + # Darwin Test Blacklist if(TARGET_OS STREQUAL "Darwin") file(GLOB DarwinTestsToSkip CONFIGURE_DEPENDS diff --git a/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeLists.txt b/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeLists.txt index e4ba9009e5..21f1081c81 100644 --- a/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeLists.txt +++ b/SingleSource/Regression/C/gcc-c-torture/execute/ieee/CMakeLists.txt @@ -19,6 +19,19 @@ file(GLOB UnsupportedTests ) list(APPEND TestsToSkip ${UnsupportedTests}) +if(ARCH MATCHES "Hexagon") + file(GLOB HexagonTestsToSkip + CONFIGURE_DEPENDS + fp-cmp-8.c + fp-cmp-8f.c + fp-cmp-8l.c + pr38016.c + pr50310.c + ) + list(APPEND TestsToSkip ${HexagonTestsToSkip}) +endif() + + ## ## Tests that require extra CFLAGS in Clang ## diff --git a/SingleSource/UnitTests/Vector/CMakeLists.txt b/SingleSource/UnitTests/Vector/CMakeLists.txt index e39c33a68c..3926d46076 100644 --- a/SingleSource/UnitTests/Vector/CMakeLists.txt +++ b/SingleSource/UnitTests/Vector/CMakeLists.txt @@ -53,4 +53,7 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Clang") endif() endif() endif() +if(ARCH STREQUAL "Hexagon") + add_subdirectory(HVX) +endif() llvm_singlesource(PREFIX "Vector-") diff --git a/SingleSource/UnitTests/Vector/HVX/CMakeLists.txt b/SingleSource/UnitTests/Vector/HVX/CMakeLists.txt new file mode 100644 index 0000000000..0e5f574683 --- /dev/null +++ b/SingleSource/UnitTests/Vector/HVX/CMakeLists.txt @@ -0,0 +1,3 @@ +list(APPEND CFLAGS -mhvx) +list(APPEND CFLAGS -mv69) +llvm_singlesource(PREFIX "Vector-HVX-") diff --git a/SingleSource/UnitTests/Vector/HVX/QFloat.c b/SingleSource/UnitTests/Vector/HVX/QFloat.c new file mode 100644 index 0000000000..e11ba64eca --- /dev/null +++ b/SingleSource/UnitTests/Vector/HVX/QFloat.c @@ -0,0 +1,325 @@ + +#include +#include + +#include +#if !defined(__linux__) +#include +#endif + +union ui32f { + int32_t i; + float f; +}; +union ui16f16 { + int16_t i; + __fp16 f16; +}; + +// 128 byte vectors +#define VSIZE_BYTES 128 +#define VSIZE_WORDS (VSIZE_BYTES / 4) + +void print_vector_words(HVX_Vector x) { + for (int i = 0; i < VSIZE_WORDS; i++) { + if (!(i % 8)) + printf("\n"); + printf("0x%08lx ", x[i]); + } + + printf("\n"); +} + +// +// Create vectors +// + +// create a vector of floats from a float +static __attribute__((always_inline)) HVX_Vector +create_sfv_from_sf(float value) { + union ui32f cvt; + cvt.f = value; + HVX_Vector tmp = Q6_V_vsplat_R(cvt.i); + return tmp; +} + +// create a vector of half floats from a float +static __attribute__((always_inline)) HVX_Vector +create_hfv_from_sf(float value) { + __fp16 hf = value; + union ui16f16 cvt; + cvt.f16 = hf; + HVX_Vector tmp = Q6_Vh_vsplat_R(cvt.i); + return tmp; +} + +// create a vector of qf32's from a float +static __attribute__((always_inline)) HVX_Vector +create_qf32v_from_sf(float value) { + HVX_Vector tmp = + Q6_Vqf32_vadd_Vqf32Vsf(Q6_V_vsplat_R(0), create_sfv_from_sf(value)); + return tmp; +} + +// create a vector of qf16's from a float +static __attribute__((always_inline)) HVX_Vector +create_qf16v_from_sf(float value) { + // create qf16 vector from hf + HVX_Vector tmp = + Q6_Vqf16_vadd_Vqf16Vhf(Q6_V_vsplat_R(0), create_hfv_from_sf(value)); + return tmp; +} + +// +// Conversion vectors +// + +// convert qf32 vector to float vector +static __attribute__((always_inline)) HVX_Vector +convert_qf32v_to_fltv(HVX_Vector vect) { + HVX_Vector tmp = Q6_Vsf_equals_Vqf32(vect); + return tmp; +} + +// convert qf16 vector to half float vector +static __attribute__((always_inline)) HVX_Vector +convert_qf16v_to_hfv(HVX_Vector vect) { + HVX_Vector tmp = Q6_Vhf_equals_Vqf16(vect); + return tmp; +} + +// +// Extraction routines +// + +// get lowest float from a vector of floats +static __attribute__((always_inline)) float +get_flt0_from_fltv(HVX_Vector vect) { + union ui32f cvt; + cvt.i = vect[0]; + return cvt.f; +} + +// get lowest float from a vector of qf32's +static __attribute__((always_inline)) float +get_flt0_from_qf32v(HVX_Vector vect) { + union ui32f cvt; + HVX_Vector tmp = convert_qf32v_to_fltv(vect); + cvt.i = tmp[0]; + return cvt.f; +} + +// get lowest float from a vector of halfs +static __attribute__((always_inline)) float +get_flt0_from_halfv(HVX_Vector vect) { + union ui16f16 cvt; + cvt.i = (vect[0] & 0xffff); + return (float)cvt.f16; +} + +// get lowest float from a vector of qf16's +static __attribute__((always_inline)) float +get_flt0_from_qf16v(HVX_Vector vect) { + return get_flt0_from_halfv(convert_qf16v_to_hfv(vect)); +} + +// get lowest float from a vector pair of qf32's +static __attribute__((always_inline)) float +get_flt0_from_qf32vp(HVX_VectorPair vect) { + union ui32f cvt; + HVX_Vector tmp = convert_qf32v_to_fltv(HEXAGON_HVX_GET_V0(vect)); + cvt.i = tmp[0]; + return cvt.f; +} + +int main(int argc, char **argv) { +#if !defined(__linux__) + SIM_ACQUIRE_HVX; + SIM_SET_HVX_DOUBLE_MODE; +#endif + + // create 2 sf vectors in IEEE-754 format + HVX_Vector v1 = create_sfv_from_sf(0.5); + HVX_Vector v2 = create_sfv_from_sf(0.25); + + // create 2 vectors in the qf32 format + HVX_Vector v3 = create_qf32v_from_sf(0.5); + HVX_Vector v4 = create_qf32v_from_sf(0.25); + + printf("\nAdd intrinsics with a qf32 result\n"); + + // add the IEEE vectors into a qf32 vector + HVX_Vector result1 = Q6_Vqf32_vadd_VsfVsf(v1, v2); + printf("The sum of flt %f and flt %f is %f\n", get_flt0_from_fltv(v1), + get_flt0_from_fltv(v2), get_flt0_from_qf32v(result1)); + + // add the qf32 vectors into a qf32 vector + HVX_Vector result2 = Q6_Vqf32_vadd_Vqf32Vqf32(v3, v4); + printf("The sum of qf32 %f and qf32 %f is %f\n", get_flt0_from_qf32v(v3), + get_flt0_from_qf32v(v4), get_flt0_from_qf32v(result2)); + + // add a qf32 vector and an IEEE vector into a qf32 vector + HVX_Vector result3 = Q6_Vqf32_vadd_Vqf32Vsf(v3, v2); + printf("The sum of qf32 %f and flt %f is %f\n", get_flt0_from_qf32v(v3), + get_flt0_from_fltv(v2), get_flt0_from_qf32v(result3)); + + printf("\nSubtract intrinsics with a qf32 result\n"); + + // subtract the IEEE vectors into a qf32 vector + HVX_Vector result4 = Q6_Vqf32_vsub_VsfVsf(v1, v2); + printf("The sum of flt %f and flt -%f is %f\n", get_flt0_from_fltv(v1), + get_flt0_from_fltv(v2), get_flt0_from_qf32v(result4)); + + // add the qf32 vectors into a qf32 vector + HVX_Vector result5 = Q6_Vqf32_vsub_Vqf32Vqf32(v3, v4); + printf("The sum of qf32 %f and qf32 -%f is %f\n", get_flt0_from_qf32v(v3), + get_flt0_from_qf32v(v4), get_flt0_from_qf32v(result5)); + + // add a qf32 vector and an IEEE vector into a qf32 vector + HVX_Vector result6 = Q6_Vqf32_vsub_Vqf32Vsf(v3, v2); + printf("The sum of qf32 %f and flt -%f is %f\n", get_flt0_from_qf32v(v3), + get_flt0_from_fltv(v2), get_flt0_from_qf32v(result6)); + + printf("\nMultiply intrinsics with a qf32 result\n"); + + // multiply the IEEE vectors into a qf32 vector + HVX_Vector result7 = Q6_Vqf32_vmpy_VsfVsf(v1, v2); + printf("The result of flt %f * flt %f is %f\n", get_flt0_from_fltv(v1), + get_flt0_from_fltv(v2), get_flt0_from_qf32v(result7)); + + // multiply the qf32 vectors into a qf32 vector + HVX_Vector result8 = Q6_Vqf32_vmpy_Vqf32Vqf32(v3, v4); + printf("The result of qf32 %f * qf32 %f is %f\n", get_flt0_from_qf32v(v3), + get_flt0_from_qf32v(v4), get_flt0_from_qf32v(result8)); + + // create 2 half vectors in the IEEE-754 format + HVX_Vector v5 = create_hfv_from_sf(0.5); + HVX_Vector v6 = create_hfv_from_sf(0.25); + + // create 2 vectors in the qf16 format + HVX_Vector v7 = create_qf16v_from_sf(0.5); + HVX_Vector v8 = create_qf16v_from_sf(0.25); + + printf("\nAdd intrinsics with a qf16 result\n"); + + // add the IEEE hf vectors into a qf16 vector + HVX_Vector result9 = Q6_Vqf16_vadd_VhfVhf(v5, v6); + printf("The sum of hf %.3f and hf %.3f is %.3f\n", + get_flt0_from_halfv(v5), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result9)); + + // add the qf16 vectors into a qf16 vector + HVX_Vector result10 = Q6_Vqf16_vadd_Vqf16Vqf16(v7, v8); + printf("The sum of qf16 %.3f and qf16 %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_qf16v(v8), + get_flt0_from_qf16v(result10)); + + // add a qf16 vector and an IEEE hf vector into a qf16 vector + HVX_Vector result11 = Q6_Vqf16_vadd_Vqf16Vhf(v7, v6); + printf("The sum of qf16 %.3f and hf %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result11)); + + printf("\nSubtract intrinsics with a qf16 result\n"); + + // add the IEEE hf vectors into a qf16 vector + HVX_Vector result12 = Q6_Vqf16_vsub_VhfVhf(v5, v6); + printf("The sum of hf %.3f and hf -%.3f is %.3f\n", + get_flt0_from_halfv(v5), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result12)); + + // add the qf16 vectors into a qf16 vector + HVX_Vector result13 = Q6_Vqf16_vsub_Vqf16Vqf16(v7, v8); + printf("The sum of qf16 %.3f and qf16 -%.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_qf16v(v8), + get_flt0_from_qf16v(result13)); + + // add a qf16 vector and an IEEE hf vector into a qf16 vector + HVX_Vector result14 = Q6_Vqf16_vsub_Vqf16Vhf(v7, v6); + printf("The sum of qf16 %.3f and hf -%.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result14)); + + printf("\nMultiply intrinsics with a qf16 result\n"); + + // multiply the IEEE hf vectors into a qf16 vector + HVX_Vector result15 = Q6_Vqf16_vmpy_VhfVhf(v5, v6); + printf("The result of hf %.3f * hf %.3f is %.3f\n", + get_flt0_from_halfv(v5), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result15)); + + // multiply the qf16 vectors into a qf16 vector + HVX_Vector result16 = Q6_Vqf16_vmpy_Vqf16Vqf16(v7, v8); + printf("The result of qf16 %.3f * qf16 %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_qf16v(v8), + get_flt0_from_qf16v(result16)); + + // multiply the qf16 vector with a hf vector into a qf16 vector + HVX_Vector result17 = Q6_Vqf16_vmpy_Vqf16Vhf(v7, v6); + printf("The result of qf16 %.3f * hf %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_halfv(v6), + get_flt0_from_qf16v(result17)); + + // multiply with pair results + + printf("\nMultiply hf/qf16 intrinsics with a qf32 result\n"); + + // multiply the IEEE hf vectors into a qf32 vector pair + HVX_VectorPair result18 = Q6_Wqf32_vmpy_VhfVhf(v5, v6); + printf("The result of hf %.3f * hf %.3f is %.3f\n", + get_flt0_from_halfv(v5), get_flt0_from_halfv(v6), + get_flt0_from_qf32vp(result18)); + + // multiply the qf16 vectors into a qf32 vector pair + HVX_VectorPair result19 = Q6_Wqf32_vmpy_Vqf16Vqf16(v7, v8); + printf("The result of qf16 %.3f * qf16 %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_qf16v(v8), + get_flt0_from_qf32vp(result19)); + + // multiply the qf16 vector with a hf vector into a qf32 vector + HVX_VectorPair result20 = Q6_Wqf32_vmpy_Vqf16Vhf(v7, v6); + printf("The result of qf16 %.3f * hf %.3f is %.3f\n", + get_flt0_from_qf16v(v7), get_flt0_from_halfv(v6), + get_flt0_from_qf32vp(result20)); + + // create_qf32v_from_qf16v(HVX_Vector qf16) + + printf("\nCompare instrinsics\n"); + + // compare 2 single float vectors + HVX_VectorPred Pred = Q6_Q_vcmp_gt_VsfVsf(v1, v2); + + // sum the bits + HVX_Vector sum = Q6_Vw_prefixsum_Q(Pred); + printf("The sum of the predicate bits from the sf compare is %ld\n", sum[31]); + + // compare 2 half float vectors + Pred = Q6_Q_vcmp_gt_VhfVhf(v5, v6); + + // sum the bits + sum = Q6_Vh_prefixsum_Q(Pred); + printf("The sum of the predicate bits from the hf compare is %ld\n", + sum[31] >> 16); + + printf("\nMin/Max instrinsics\n"); + + // get a vector that is the max of 2 sf vectors + HVX_Vector sfmax = Q6_Vsf_vmax_VsfVsf(v1, v2); + printf("The max value of sf v1 and sf v2 is %f\n", get_flt0_from_fltv(sfmax)); + + // get a vector that is the min of 2 sf vectors + HVX_Vector sfmin = Q6_Vsf_vmin_VsfVsf(v1, v2); + printf("The min value of sf v1 and sf v2 is %f\n", get_flt0_from_fltv(sfmin)); + + // get a vector that is the max of 2 hf vectors + HVX_Vector hfmax = Q6_Vhf_vmax_VhfVhf(v5, v6); + printf("The max value of hf v5 and sf v6 is %f\n", + get_flt0_from_halfv(hfmax)); + + // get a vector that is the min of 2 hf vectors + HVX_Vector hfmin = Q6_Vhf_vmin_VhfVhf(v5, v6); + printf("The min value of hf v5 and sf v6 is %f\n", + get_flt0_from_halfv(hfmin)); + + return 0; +} diff --git a/SingleSource/UnitTests/Vector/HVX/QFloat.reference_output b/SingleSource/UnitTests/Vector/HVX/QFloat.reference_output new file mode 100644 index 0000000000..ca4e88fcd8 --- /dev/null +++ b/SingleSource/UnitTests/Vector/HVX/QFloat.reference_output @@ -0,0 +1,45 @@ + +Add intrinsics with a qf32 result +The sum of flt 0.500000 and flt 0.250000 is 0.750000 +The sum of qf32 0.500000 and qf32 0.250000 is 0.750000 +The sum of qf32 0.500000 and flt 0.250000 is 0.750000 + +Subtract intrinsics with a qf32 result +The sum of flt 0.500000 and flt -0.250000 is 0.250000 +The sum of qf32 0.500000 and qf32 -0.250000 is 0.250000 +The sum of qf32 0.500000 and flt -0.250000 is 0.250000 + +Multiply intrinsics with a qf32 result +The result of flt 0.500000 * flt 0.250000 is 0.125000 +The result of qf32 0.500000 * qf32 0.250000 is 0.125000 + +Add intrinsics with a qf16 result +The sum of hf 0.500 and hf 0.250 is 0.750 +The sum of qf16 0.500 and qf16 0.250 is 0.750 +The sum of qf16 0.500 and hf 0.250 is 0.750 + +Subtract intrinsics with a qf16 result +The sum of hf 0.500 and hf -0.250 is 0.250 +The sum of qf16 0.500 and qf16 -0.250 is 0.250 +The sum of qf16 0.500 and hf -0.250 is 0.250 + +Multiply intrinsics with a qf16 result +The result of hf 0.500 * hf 0.250 is 0.125 +The result of qf16 0.500 * qf16 0.250 is 0.125 +The result of qf16 0.500 * hf 0.250 is 0.125 + +Multiply hf/qf16 intrinsics with a qf32 result +The result of hf 0.500 * hf 0.250 is 0.125 +The result of qf16 0.500 * qf16 0.250 is 0.125 +The result of qf16 0.500 * hf 0.250 is 0.125 + +Compare instrinsics +The sum of the predicate bits from the sf compare is 128 +The sum of the predicate bits from the hf compare is 128 + +Min/Max instrinsics +The max value of sf v1 and sf v2 is 0.500000 +The min value of sf v1 and sf v2 is 0.250000 +The max value of hf v5 and sf v6 is 0.500000 +The min value of hf v5 and sf v6 is 0.250000 +exit 0 From acf9e8d53639848de444adc4e5f06bb3cc7dbcdd Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Mon, 24 Feb 2025 15:48:48 -0600 Subject: [PATCH 2/2] [Hexagon] Add v79 HVX cmake cache file (#210) --- cmake/caches/target-hexagon-v79-O2.cmake | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 cmake/caches/target-hexagon-v79-O2.cmake diff --git a/cmake/caches/target-hexagon-v79-O2.cmake b/cmake/caches/target-hexagon-v79-O2.cmake new file mode 100644 index 0000000000..cc8d0b4289 --- /dev/null +++ b/cmake/caches/target-hexagon-v79-O2.cmake @@ -0,0 +1,6 @@ +set(OPTFLAGS "${OPTFLAGS} -mv79 -mhvx -mhvx-ieee-fp") +set(OPTFLAGS "${OPTFLAGS} -O2") + +set(CMAKE_C_FLAGS_RELEASE "${OPTFLAGS}" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELEASE "${OPTFLAGS}" CACHE STRING "") +set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")