Skip to content

Conversation

davemgreen
Copy link
Collaborator

Otherwise we can end up with more instructions, needing to emit both fcvtzu w0, s0 and fcvtzu s0, s0.

Otherwise we can end up with more instructions, needing to emit both
`fcvtzu w0, s0` and fcvtzu s0, s0`.
@llvmbot
Copy link
Member

llvmbot commented Sep 2, 2025

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

Otherwise we can end up with more instructions, needing to emit both fcvtzu w0, s0 and fcvtzu s0, s0.


Full diff: https://github.com/llvm/llvm-project/pull/156407.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+10-6)
  • (modified) llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll (+6-12)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ce40e202f30f5..62b26b5239365 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6706,20 +6706,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
+let HasOneUse = 1 in {
+def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>;
+def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>;
+}
 let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
-def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
+def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
+def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))),
           (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
-def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
+def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))),
           (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
+def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
 let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
-def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
+def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
-def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
+def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))),
           (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index d4caf64294f45..1207de746894b 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -122,9 +122,8 @@ entry:
 define i64 @testu_f64_multiuse(double %x) {
 ; CHECK-LABEL: testu_f64_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu d1, d0
 ; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    ucvtf d1, d1
+; CHECK-NEXT:    ucvtf d1, x8
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
@@ -139,9 +138,8 @@ entry:
 define i32 @testu_f32_multiuse(float %x) {
 ; CHECK-LABEL: testu_f32_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu s1, s0
 ; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    ucvtf s1, s1
+; CHECK-NEXT:    ucvtf s1, w8
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -156,9 +154,8 @@ entry:
 define i32 @testu_f16_multiuse(half %x) {
 ; CHECK-LABEL: testu_f16_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h1, h0
 ; CHECK-NEXT:    fcvtzu w8, h0
-; CHECK-NEXT:    ucvtf h1, h1
+; CHECK-NEXT:    ucvtf h1, w8
 ; CHECK-NEXT:    fcmp h0, h1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -173,9 +170,8 @@ entry:
 define i64 @tests_f64_multiuse(double %x) {
 ; CHECK-LABEL: tests_f64_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs d1, d0
 ; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    scvtf d1, d1
+; CHECK-NEXT:    scvtf d1, x8
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
@@ -190,9 +186,8 @@ entry:
 define i32 @tests_f32_multiuse(float %x) {
 ; CHECK-LABEL: tests_f32_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs s1, s0
 ; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    scvtf s1, s1
+; CHECK-NEXT:    scvtf s1, w8
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -207,9 +202,8 @@ entry:
 define i32 @tests_f16_multiuse(half %x) {
 ; CHECK-LABEL: tests_f16_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h1, h0
 ; CHECK-NEXT:    fcvtzs w8, h0
-; CHECK-NEXT:    scvtf h1, h1
+; CHECK-NEXT:    scvtf h1, w8
 ; CHECK-NEXT:    fcmp h0, h1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret

@davemgreen davemgreen merged commit fba17cd into llvm:main Sep 3, 2025
11 checks passed
@davemgreen davemgreen deleted the gh-a64-fpcvtoneuse branch September 3, 2025 07:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants