Skip to content

Conversation

mrkajetanp
Copy link
Contributor

Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions.
Fixes #154343.

Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16
formats for the same conversions.

Signed-off-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:codegen IR generation bugs: mangling, exceptions, etc. labels Aug 29, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 29, 2025

@llvm/pr-subscribers-clang

Author: Kajetan Puchalski (mrkajetanp)

Changes

Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions.
Fixes #154343.


Full diff: https://github.com/llvm/llvm-project/pull/156029.diff

3 Files Affected:

  • (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+2-3)
  • (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c (+4-6)
  • (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c (+20-30)
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60413e7b18e85..f4baf8c7f0dde 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtph_s16_f16:
   case NEON::BI__builtin_neon_vcvth_s16_f16: {
     unsigned Int;
-    llvm::Type* InTy = Int32Ty;
+    llvm::Type *InTy = Int16Ty;
     llvm::Type* FTy  = HalfTy;
     llvm::Type *Tys[2] = {InTy, FTy};
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     case NEON::BI__builtin_neon_vcvth_s16_f16:
       Int = Intrinsic::aarch64_neon_fcvtzs; break;
     }
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
-    return Builder.CreateTrunc(Ops[0], Int16Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
   }
   case NEON::BI__builtin_neon_vcaleh_f16:
   case NEON::BI__builtin_neon_vcalth_f16:
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
index 9109626cea9ca..4c19d75df96e2 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_s16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_u16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 90ee74e459ebd..c3bbd5a43398a 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_s16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_u16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtah_s16_f16 (float16_t a) {
   return vcvtah_s16_f16(a);
 }
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtah_u16_f16 (float16_t a) {
   return vcvtah_u16_f16(a);
 }
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtmh_s16_f16 (float16_t a) {
   return vcvtmh_s16_f16(a);
 }
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtmh_u16_f16 (float16_t a) {
   return vcvtmh_u16_f16(a);
 }
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtnh_s16_f16 (float16_t a) {
   return vcvtnh_s16_f16(a);
 }
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtnh_u16_f16 (float16_t a) {
   return vcvtnh_u16_f16(a);
 }
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtph_s16_f16 (float16_t a) {
   return vcvtph_s16_f16(a);
 }
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtph_u16_f16 (float16_t a) {
   return vcvtph_u16_f16(a);
 }

@llvmbot
Copy link
Member

llvmbot commented Aug 29, 2025

@llvm/pr-subscribers-clang-codegen

Author: Kajetan Puchalski (mrkajetanp)

Changes

Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions.
Fixes #154343.


Full diff: https://github.com/llvm/llvm-project/pull/156029.diff

3 Files Affected:

  • (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+2-3)
  • (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c (+4-6)
  • (modified) clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c (+20-30)
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60413e7b18e85..f4baf8c7f0dde 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtph_s16_f16:
   case NEON::BI__builtin_neon_vcvth_s16_f16: {
     unsigned Int;
-    llvm::Type* InTy = Int32Ty;
+    llvm::Type *InTy = Int16Ty;
     llvm::Type* FTy  = HalfTy;
     llvm::Type *Tys[2] = {InTy, FTy};
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     case NEON::BI__builtin_neon_vcvth_s16_f16:
       Int = Intrinsic::aarch64_neon_fcvtzs; break;
     }
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
-    return Builder.CreateTrunc(Ops[0], Int16Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
   }
   case NEON::BI__builtin_neon_vcaleh_f16:
   case NEON::BI__builtin_neon_vcalth_f16:
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
index 9109626cea9ca..4c19d75df96e2 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_s16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_u16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 90ee74e459ebd..c3bbd5a43398a 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_s16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_u16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtah_s16_f16 (float16_t a) {
   return vcvtah_s16_f16(a);
 }
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtah_u16_f16 (float16_t a) {
   return vcvtah_u16_f16(a);
 }
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtmh_s16_f16 (float16_t a) {
   return vcvtmh_s16_f16(a);
 }
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtmh_u16_f16 (float16_t a) {
   return vcvtmh_u16_f16(a);
 }
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtnh_s16_f16 (float16_t a) {
   return vcvtnh_s16_f16(a);
 }
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtnh_u16_f16 (float16_t a) {
   return vcvtnh_u16_f16(a);
 }
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtph_s16_f16 (float16_t a) {
   return vcvtph_s16_f16(a);
 }
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtph_u16_f16 (float16_t a) {
   return vcvtph_u16_f16(a);
 }

@mrkajetanp
Copy link
Contributor Author

Posting this ahead of time, it depends on #155851 being merged first.

@mrkajetanp
Copy link
Contributor Author

This can now move ahead. I think the only question is whether to keep vcvth_s16_f16 generating the intrinsic llvm.aarch64.neon.fcvtzs.i16.f16 or to change it to now generate llvm.fptosi.sat.i16.f16. They both will generate the same assembly in the end. The same for fcvtzu/fptoui.

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

(I don't think you need to auto-upgrade anything too).

@tblah
Copy link
Contributor

tblah commented Sep 2, 2025

I'm pressing merge because @mrkajetanp is away

@tblah tblah merged commit b96fa9f into llvm:main Sep 2, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:codegen IR generation bugs: mangling, exceptions, etc. clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[AArch64] Incorrect overflow behaviour for f16 -> s16 scalar NEON conversion intrinsics
4 participants