-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AArch64] Add patterns for sub from add negative immediates #156024
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) Changes
The alternative is to add a isel combine, which seemed to work but created problems for mad and index patterns. This version does still need to add a lower-than-default Complexity to the ComplexPatterns to ensure that index was selected over sub-imm + index, as the default Complexity on ComplexPatterns is quite high. Fixes #155928 Full diff: https://github.com/llvm/llvm-project/pull/156024.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index bc786f415b554..42d1c1dd594f1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -246,9 +246,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
- template<MVT::SimpleValueType VT>
+ template<MVT::SimpleValueType VT, bool Negate>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
- return SelectSVEAddSubImm(N, VT, Imm, Shift);
+ return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
}
template <MVT::SimpleValueType VT, bool Negate>
@@ -489,7 +489,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCMP_SWAP(SDNode *N);
- bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift, bool Negate);
bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
bool Negate);
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -4227,35 +4227,36 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
}
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
- SDValue &Shift) {
+ SDValue &Shift, bool Negate) {
if (!isa<ConstantSDNode>(N))
return false;
SDLoc DL(N);
- uint64_t Val = cast<ConstantSDNode>(N)
- ->getAPIntValue()
- .trunc(VT.getFixedSizeInBits())
- .getZExtValue();
+ APInt Val =
+ cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
+
+ if (Negate)
+ Val = -Val;
switch (VT.SimpleTy) {
case MVT::i8:
// All immediates are supported.
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
return true;
case MVT::i16:
case MVT::i32:
case MVT::i64:
// Support 8bit unsigned immediates.
- if (Val <= 255) {
+ if ((Val & ~0xff) == 0) {
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
return true;
}
// Support 16bit unsigned immediates that are a multiple of 256.
- if (Val <= 65280 && Val % 256 == 0) {
+ if ((Val & ~0xff00) == 0) {
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
- Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+ Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
return true;
}
break;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index eeb47b4d99750..58d8dcff4f34e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -657,6 +657,12 @@ let Predicates = [HasSVE_or_SME] in {
defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
+ // Extra patterns for add(x, splat(-ve)) -> sub(x, +ve). There is no i8
+ // pattern as all i8 constants can be handled by an add.
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, add, ZPR16, i32, SVEAddSubNegImm16Pat, SUB_ZI_H>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, add, ZPR32, i32, SVEAddSubNegImm32Pat, SUB_ZI_S>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, add, ZPR64, i64, SVEAddSubNegImm64Pat, SUB_ZI_D>;
+
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", AArch64msb_m1, "MLS_ZPmZZ", /*isReverseInstr*/ 1>;
defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1, "MLA_ZPZZZ", "MAD_ZPmZZ">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a3a7d0f74e1bc..7389f90457234 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -315,10 +315,16 @@ def addsub_imm8_opt_lsl_i16 : imm8_opt_lsl<16, "uint16_t", SVEAddSubImmOperand16
def addsub_imm8_opt_lsl_i32 : imm8_opt_lsl<32, "uint32_t", SVEAddSubImmOperand32>;
def addsub_imm8_opt_lsl_i64 : imm8_opt_lsl<64, "uint64_t", SVEAddSubImmOperand64>;
-def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8>", []>;
-def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", []>;
-def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
-def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
+let Complexity = 1 in {
+def SVEAddSubImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, false>", []>;
+def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, false>", []>;
+def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, false>", []>;
+def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, false>", []>;
+
+def SVEAddSubNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i8, true>", []>;
+def SVEAddSubNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16, true>", []>;
+def SVEAddSubNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32, true>", []>;
+def SVEAddSubNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64, true>", []>;
def SVEAddSubSSatNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
@@ -329,6 +335,7 @@ def SVEAddSubSSatPosImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MV
def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
+} // Complexity = 1
def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
index fb494afa11de2..258e399018ba8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
@@ -13,15 +13,15 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
-; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
-; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
-; CHECK-NEXT: add z1.s, z0.s, z1.s
-; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
-; CHECK-NEXT: orr z0.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
+; CHECK-NEXT: and z0.d, z0.d, z3.d
+; CHECK-NEXT: and z1.d, z2.d, z1.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
; CHECK-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
diff --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index 433ddbd4a261b..cf2ae02c14b18 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -94,8 +94,8 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
define <4 x i32> @v4i32_out_range_start() #0 {
; CHECK-LABEL: v4i32_out_range_start:
; CHECK: // %bb.0:
-; CHECK-NEXT: index z0.s, #0, #1
-; CHECK-NEXT: add z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT: mov w8, #16 // =0x10
+; CHECK-NEXT: index z0.s, w8, #1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
diff --git a/llvm/test/CodeGen/AArch64/sve-int-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
index e34f4840f517c..985b7b9597705 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
@@ -229,8 +229,7 @@ define <vscale x 16 x i8> @addnve_i8_low(<vscale x 16 x i8> %a) {
define <vscale x 8 x i16> @addnve_i16_low(<vscale x 8 x i16> %a) {
; CHECK-LABEL: addnve_i16_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.h, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.h, z0.h, z1.h
+; CHECK-NEXT: sub z0.h, z0.h, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 8 x i16> %a, splat(i16 -30)
ret <vscale x 8 x i16> %res
@@ -248,8 +247,7 @@ define <vscale x 8 x i16> @addnve_i16_high(<vscale x 8 x i16> %a) {
define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
; CHECK-LABEL: addnve_i32_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.s, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 4 x i32> %a, splat(i32 -30)
ret <vscale x 4 x i32> %res
@@ -258,8 +256,7 @@ define <vscale x 4 x i32> @addnve_i32_low(<vscale x 4 x i32> %a) {
define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
; CHECK-LABEL: addnve_i32_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.s, #-1024 // =0xfffffffffffffc00
-; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, #1024 // =0x400
; CHECK-NEXT: ret
%res = add <vscale x 4 x i32> %a, splat(i32 -1024)
ret <vscale x 4 x i32> %res
@@ -268,8 +265,7 @@ define <vscale x 4 x i32> @addnve_i32_high(<vscale x 4 x i32> %a) {
define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
; CHECK-LABEL: addnve_i64_low:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, #-30 // =0xffffffffffffffe2
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: sub z0.d, z0.d, #30 // =0x1e
; CHECK-NEXT: ret
%res = add <vscale x 2 x i64> %a, splat(i64 -30)
ret <vscale x 2 x i64> %res
@@ -278,8 +274,7 @@ define <vscale x 2 x i64> @addnve_i64_low(<vscale x 2 x i64> %a) {
define <vscale x 2 x i64> @addnve_i64_high(<vscale x 2 x i64> %a) {
; CHECK-LABEL: addnve_i64_high:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, #-1024 // =0xfffffffffffffc00
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: sub z0.d, z0.d, #1024 // =0x400
; CHECK-NEXT: ret
%res = add <vscale x 2 x i64> %a, splat(i64 -1024)
ret <vscale x 2 x i64> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index d29e43509dfe9..71396da004002 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -14,20 +14,21 @@ target triple = "aarch64"
define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) {
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldp q2, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ldp q5, q4, [x1]
; CHECK-NEXT: ldp q6, q7, [x2]
-; CHECK-NEXT: add z3.s, z1.s, z0.s
-; CHECK-NEXT: subr z1.s, z1.s, #0 // =0x0
-; CHECK-NEXT: add z0.s, z2.s, z0.s
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: mov z3.d, z1.d
+; CHECK-NEXT: sub z1.s, z1.s, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z7.d
-; CHECK-NEXT: and z0.d, z0.d, z6.d
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
+; CHECK-NEXT: and z2.d, z2.d, z4.d
+; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: and z4.d, z0.d, z7.d
+; CHECK-NEXT: and z0.d, z1.d, z6.d
+; CHECK-NEXT: orr z1.d, z4.d, z2.d
+; CHECK-NEXT: orr z0.d, z0.d, z3.d
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 3a6445dd1d99b..d226fc89c3381 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -69,9 +69,9 @@ define void @build_vector_0_dec3_v8i32(ptr %a) {
; CHECK-LABEL: build_vector_0_dec3_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #-3
-; CHECK-NEXT: mov z1.s, #-12 // =0xfffffffffffffff4
-; CHECK-NEXT: add z1.s, z0.s, z1.s
-; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: sub z0.s, z0.s, #12 // =0xc
+; CHECK-NEXT: str q0, [x0, #16]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32:
@@ -91,11 +91,10 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) {
; CHECK-LABEL: build_vector_minus2_dec32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
-; CHECK-NEXT: mov z1.d, #-66 // =0xffffffffffffffbe
-; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: index z0.d, #0, x8
-; CHECK-NEXT: add z1.d, z0.d, z1.d
-; CHECK-NEXT: add z0.d, z0.d, z2.d
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
+; CHECK-NEXT: sub z1.d, z1.d, #66 // =0x42
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
4a11da9
to
f3bddf8
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI: I have some WIP work to improve handling of INDEX, specifically to allow finer control for emitting a specific variant.
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, add, ZPR16, i32, SVEAddSubNegImm16Pat, SUB_ZI_H>; | ||
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, add, ZPR32, i32, SVEAddSubNegImm32Pat, SUB_ZI_S>; | ||
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, add, ZPR64, i64, SVEAddSubNegImm64Pat, SUB_ZI_D>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any objections to moving these into sve_int_arith_imm0
similar to sve_int_arith_imm0_ssat
, albeit this time with inv_op
defaulting to null_frag
?
f3bddf8
to
289ae65
Compare
sub 3
will be canonicalized in llvm toadd -3
. This adds some tablegen patterns for add from a negative immediate so that we can still generate sub imm SVE instructions.The alternative is to add a isel combine, which seemed to work but created problems for mad and index patterns. This version does still need to add a lower-than-default Complexity to the ComplexPatterns to ensure that index was selected over sub-imm + index, as the default Complexity on ComplexPatterns is quite high.
Fixes #155928