-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[clang][AArch64] Use .i16.f16 intrinsic formats for vcvth*_[s|u]16_f16 #156029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16. Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions. Signed-off-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
@llvm/pr-subscribers-clang Author: Kajetan Puchalski (mrkajetanp) ChangesUse .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16. Full diff: https://github.com/llvm/llvm-project/pull/156029.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60413e7b18e85..f4baf8c7f0dde 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvtph_s16_f16:
case NEON::BI__builtin_neon_vcvth_s16_f16: {
unsigned Int;
- llvm::Type* InTy = Int32Ty;
+ llvm::Type *InTy = Int16Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtzs; break;
}
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
}
case NEON::BI__builtin_neon_vcaleh_f16:
case NEON::BI__builtin_neon_vcalth_f16:
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
index 9109626cea9ca..4c19d75df96e2 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
}
// COMMON-LABEL: test_vcvth_s16_f16
-// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR: ret i16 [[TRUNC]]
+// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// COMMONIR: ret i16 [[VCVT]]
int16_t test_vcvth_s16_f16 (float16_t a) {
return vcvth_s16_f16(a);
}
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
}
// COMMON-LABEL: test_vcvth_u16_f16
-// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR: ret i16 [[TRUNC]]
+// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// COMMONIR: ret i16 [[VCVT]]
uint16_t test_vcvth_u16_f16 (float16_t a) {
return vcvth_u16_f16(a);
}
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 90ee74e459ebd..c3bbd5a43398a 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
}
// CHECK-LABEL: test_vcvth_s16_f16
-// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK: ret i16 [[TRUNC]]
+// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// CHECK: ret i16 [[VCVT]]
int16_t test_vcvth_s16_f16 (float16_t a) {
return vcvth_s16_f16(a);
}
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvth_u16_f16
-// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK: ret i16 [[TRUNC]]
+// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// CHECK: ret i16 [[VCVT]]
uint16_t test_vcvth_u16_f16 (float16_t a) {
return vcvth_u16_f16(a);
}
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtah_s16_f16 (float16_t a) {
return vcvtah_s16_f16(a);
}
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtah_u16_f16 (float16_t a) {
return vcvtah_u16_f16(a);
}
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtmh_s16_f16 (float16_t a) {
return vcvtmh_s16_f16(a);
}
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtmh_u16_f16 (float16_t a) {
return vcvtmh_u16_f16(a);
}
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtnh_s16_f16 (float16_t a) {
return vcvtnh_s16_f16(a);
}
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtnh_u16_f16 (float16_t a) {
return vcvtnh_u16_f16(a);
}
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtph_s16_f16 (float16_t a) {
return vcvtph_s16_f16(a);
}
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtph_u16_f16 (float16_t a) {
return vcvtph_u16_f16(a);
}
|
@llvm/pr-subscribers-clang-codegen Author: Kajetan Puchalski (mrkajetanp) ChangesUse .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16. Full diff: https://github.com/llvm/llvm-project/pull/156029.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60413e7b18e85..f4baf8c7f0dde 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvtph_s16_f16:
case NEON::BI__builtin_neon_vcvth_s16_f16: {
unsigned Int;
- llvm::Type* InTy = Int32Ty;
+ llvm::Type *InTy = Int16Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtzs; break;
}
- Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
- return Builder.CreateTrunc(Ops[0], Int16Ty);
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
}
case NEON::BI__builtin_neon_vcaleh_f16:
case NEON::BI__builtin_neon_vcalth_f16:
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
index 9109626cea9ca..4c19d75df96e2 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
}
// COMMON-LABEL: test_vcvth_s16_f16
-// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR: ret i16 [[TRUNC]]
+// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// COMMONIR: ret i16 [[VCVT]]
int16_t test_vcvth_s16_f16 (float16_t a) {
return vcvth_s16_f16(a);
}
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
}
// COMMON-LABEL: test_vcvth_u16_f16
-// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR: ret i16 [[TRUNC]]
+// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// COMMONIR: ret i16 [[VCVT]]
uint16_t test_vcvth_u16_f16 (float16_t a) {
return vcvth_u16_f16(a);
}
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 90ee74e459ebd..c3bbd5a43398a 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
}
// CHECK-LABEL: test_vcvth_s16_f16
-// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK: ret i16 [[TRUNC]]
+// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// CHECK: ret i16 [[VCVT]]
int16_t test_vcvth_s16_f16 (float16_t a) {
return vcvth_s16_f16(a);
}
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvth_u16_f16
-// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK: ret i16 [[TRUNC]]
+// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// CHECK: ret i16 [[VCVT]]
uint16_t test_vcvth_u16_f16 (float16_t a) {
return vcvth_u16_f16(a);
}
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtah_s16_f16 (float16_t a) {
return vcvtah_s16_f16(a);
}
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtah_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtah_u16_f16 (float16_t a) {
return vcvtah_u16_f16(a);
}
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtmh_s16_f16 (float16_t a) {
return vcvtmh_s16_f16(a);
}
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtmh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtmh_u16_f16 (float16_t a) {
return vcvtmh_u16_f16(a);
}
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtnh_s16_f16 (float16_t a) {
return vcvtnh_s16_f16(a);
}
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtnh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtnh_u16_f16 (float16_t a) {
return vcvtnh_u16_f16(a);
}
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
int16_t test_vcvtph_s16_f16 (float16_t a) {
return vcvtph_s16_f16(a);
}
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
}
// CHECK-LABEL: test_vcvtph_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
uint16_t test_vcvtph_u16_f16 (float16_t a) {
return vcvtph_u16_f16(a);
}
|
Posting this ahead of time, it depends on #155851 being merged first. |
This can now move ahead. I think the only question is whether to keep |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
(I don't think you need to auto-upgrade anything too).
I'm pressing merge because @mrkajetanp is away |
Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions.
Fixes #154343.