Skip to content

Commit ed7bcb2

Browse files
amehsanamehsan
authored andcommitted
[AArch64][SVE] Add patterns for some integer vector instructions
Add pattern matching for SVE vector instructions: -- add, sub, and, or, xor instructions -- sqadd, uqadd, sqsub, uqsub target-independent intrinsics -- bic intrinsics -- predicated add, sub, subr intrinsics Patch Review: https://reviews.llvm.org/D69128 Patch authored by: dancgr (Danilo Carvalho Grael)
1 parent 2dad729 commit ed7bcb2

File tree

7 files changed

+595
-40
lines changed

7 files changed

+595
-40
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,10 +815,23 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
815815

816816
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
817817

818+
819+
class AdvSIMD_Pred2VectorArg_Intrinsic
820+
: Intrinsic<[llvm_anyvector_ty],
821+
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>],
822+
[IntrNoMem]>;
823+
824+
818825
//
819826
// Integer arithmetic
820827
//
821828

829+
def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic;
830+
def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic;
831+
def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic;
832+
833+
def int_aarch64_sve_bic : AdvSIMD_2VectorArg_Intrinsic;
834+
822835
def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic;
823836
def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
824837

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
183183
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
184184
addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass);
185185
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
186+
187+
for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
188+
setOperationAction(ISD::SADDSAT, VT, Legal);
189+
setOperationAction(ISD::UADDSAT, VT, Legal);
190+
setOperationAction(ISD::SSUBSAT, VT, Legal);
191+
setOperationAction(ISD::USUBSAT, VT, Legal);
192+
}
186193
}
187194

188195
// Compute derived properties from the register classes

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,26 @@ let Predicates = [HasSVE] in {
1818
def SETFFR : sve_int_setffr<"setffr">;
1919
def WRFFR : sve_int_wrffr<"wrffr">;
2020

21-
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">;
22-
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">;
23-
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd">;
24-
defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd">;
25-
defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub">;
26-
defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub">;
27-
28-
defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and">;
29-
defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr">;
30-
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor">;
31-
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic">;
32-
33-
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">;
34-
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">;
35-
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr">;
36-
37-
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr">;
38-
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor">;
39-
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and">;
40-
defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic">;
21+
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
22+
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
23+
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
24+
defm UQADD_ZZZ : sve_int_bin_cons_arit_0<0b101, "uqadd", uaddsat>;
25+
defm SQSUB_ZZZ : sve_int_bin_cons_arit_0<0b110, "sqsub", ssubsat>;
26+
defm UQSUB_ZZZ : sve_int_bin_cons_arit_0<0b111, "uqsub", usubsat>;
27+
28+
defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>;
29+
defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>;
30+
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
31+
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", int_aarch64_sve_bic>;
32+
33+
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", int_aarch64_sve_add>;
34+
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", int_aarch64_sve_sub>;
35+
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", int_aarch64_sve_subr>;
36+
37+
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", null_frag>;
38+
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", null_frag>;
39+
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", null_frag>;
40+
defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", null_frag>;
4141

4242
defm ADD_ZI : sve_int_arith_imm0<0b000, "add">;
4343
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub">;
@@ -73,14 +73,14 @@ let Predicates = [HasSVE] in {
7373
defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>;
7474

7575
defm MUL_ZI : sve_int_arith_imm2<"mul">;
76-
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul">;
77-
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh">;
78-
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh">;
76+
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", null_frag>;
77+
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", null_frag>;
78+
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", null_frag>;
7979

80-
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv">;
81-
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv">;
82-
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr">;
83-
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr">;
80+
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", null_frag>;
81+
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", null_frag>;
82+
defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", null_frag>;
83+
defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", null_frag>;
8484

8585
defm SDOT_ZZZ : sve_intx_dot<0b0, "sdot", int_aarch64_sve_sdot>;
8686
defm UDOT_ZZZ : sve_intx_dot<0b1, "udot", int_aarch64_sve_udot>;
@@ -105,12 +105,12 @@ let Predicates = [HasSVE] in {
105105
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
106106
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
107107

108-
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax">;
109-
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax">;
110-
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin">;
111-
defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin">;
112-
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd">;
113-
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd">;
108+
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", null_frag>;
109+
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", null_frag>;
110+
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", null_frag>;
111+
defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", null_frag>;
112+
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", null_frag>;
113+
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", null_frag>;
114114

115115
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe">;
116116
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,11 @@ class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
288288
: Pat<(vtd (op vt1:$Op1)),
289289
(inst $Op1)>;
290290

291+
class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
292+
ValueType vt2, Instruction inst>
293+
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
294+
(inst $Op1, $Op2)>;
295+
291296
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
292297
ValueType vt2, ValueType vt3, Instruction inst>
293298
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
@@ -1122,11 +1127,16 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
11221127
let Inst{4-0} = Zd;
11231128
}
11241129

1125-
multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm> {
1130+
multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
11261131
def _B : sve_int_bin_cons_arit_0<0b00, opc, asm, ZPR8>;
11271132
def _H : sve_int_bin_cons_arit_0<0b01, opc, asm, ZPR16>;
11281133
def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>;
11291134
def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>;
1135+
1136+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1137+
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1138+
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1139+
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
11301140
}
11311141

11321142
//===----------------------------------------------------------------------===//
@@ -1801,38 +1811,61 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
18011811
let ElementSize = zprty.ElementSize;
18021812
}
18031813

1804-
multiclass sve_int_bin_pred_log<bits<3> opc, string asm> {
1814+
multiclass sve_int_bin_pred_log<bits<3> opc, string asm, SDPatternOperator op> {
18051815
def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>;
18061816
def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>;
18071817
def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>;
18081818
def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>;
1819+
1820+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1821+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1822+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1823+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
18091824
}
18101825

1811-
multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm> {
1826+
multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
18121827
def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>;
18131828
def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>;
18141829
def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>;
18151830
def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>;
1831+
1832+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1833+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1834+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1835+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
18161836
}
18171837

1818-
multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm> {
1838+
multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, SDPatternOperator op> {
18191839
def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>;
18201840
def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>;
18211841
def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>;
18221842
def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>;
1843+
1844+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1845+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1846+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1847+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
18231848
}
18241849

1825-
multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm> {
1850+
multiclass sve_int_bin_pred_arit_2<bits<3> opc, string asm, SDPatternOperator op> {
18261851
def _B : sve_int_bin_pred_arit_log<0b00, 0b10, opc, asm, ZPR8>;
18271852
def _H : sve_int_bin_pred_arit_log<0b01, 0b10, opc, asm, ZPR16>;
18281853
def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
18291854
def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
1855+
1856+
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1857+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1858+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1859+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
18301860
}
18311861

18321862
// Special case for divides which are not defined for 8b/16b elements.
1833-
multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm> {
1863+
multiclass sve_int_bin_pred_arit_2_div<bits<3> opc, string asm, SDPatternOperator op> {
18341864
def _S : sve_int_bin_pred_arit_log<0b10, 0b10, opc, asm, ZPR32>;
18351865
def _D : sve_int_bin_pred_arit_log<0b11, 0b10, opc, asm, ZPR64>;
1866+
1867+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1868+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
18361869
}
18371870

18381871
//===----------------------------------------------------------------------===//
@@ -3086,9 +3119,14 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
30863119
let Inst{4-0} = Zd;
30873120
}
30883121

3089-
multiclass sve_int_bin_cons_log<bits<2> opc, string asm> {
3122+
multiclass sve_int_bin_cons_log<bits<2> opc, string asm, SDPatternOperator op> {
30903123
def NAME : sve_int_bin_cons_log<opc, asm>;
30913124

3125+
def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME)>;
3126+
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
3127+
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME)>;
3128+
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME)>;
3129+
30923130
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
30933131
(!cast<Instruction>(NAME) ZPR8:$Zd, ZPR8:$Zn, ZPR8:$Zm), 1>;
30943132
def : InstAlias<asm # "\t$Zd, $Zn, $Zm",
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
define <vscale x 16 x i8> @add_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
4+
; CHECK-LABEL: add_i8:
5+
; CHECK: add z0.b, p0/m, z0.b, z1.b
6+
; CHECK-NEXT: ret
7+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
8+
<vscale x 16 x i8> %a,
9+
<vscale x 16 x i8> %b)
10+
ret <vscale x 16 x i8> %out
11+
}
12+
13+
define <vscale x 8 x i16> @add_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
14+
; CHECK-LABEL: add_i16:
15+
; CHECK: add z0.h, p0/m, z0.h, z1.h
16+
; CHECK-NEXT: ret
17+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
18+
<vscale x 8 x i16> %a,
19+
<vscale x 8 x i16> %b)
20+
ret <vscale x 8 x i16> %out
21+
}
22+
23+
define <vscale x 4 x i32> @add_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
24+
; CHECK-LABEL: add_i32:
25+
; CHECK: add z0.s, p0/m, z0.s, z1.s
26+
; CHECK-NEXT: ret
27+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
28+
<vscale x 4 x i32> %a,
29+
<vscale x 4 x i32> %b)
30+
ret <vscale x 4 x i32> %out
31+
}
32+
33+
define <vscale x 2 x i64> @add_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
34+
; CHECK-LABEL: add_i64:
35+
; CHECK: add z0.d, p0/m, z0.d, z1.d
36+
; CHECK-NEXT: ret
37+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
38+
<vscale x 2 x i64> %a,
39+
<vscale x 2 x i64> %b)
40+
ret <vscale x 2 x i64> %out
41+
}
42+
43+
44+
45+
46+
define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
47+
; CHECK-LABEL: sub_i8:
48+
; CHECK: sub z0.b, p0/m, z0.b, z1.b
49+
; CHECK-NEXT: ret
50+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
51+
<vscale x 16 x i8> %a,
52+
<vscale x 16 x i8> %b)
53+
ret <vscale x 16 x i8> %out
54+
}
55+
56+
define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
57+
; CHECK-LABEL: sub_i16:
58+
; CHECK: sub z0.h, p0/m, z0.h, z1.h
59+
; CHECK-NEXT: ret
60+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
61+
<vscale x 8 x i16> %a,
62+
<vscale x 8 x i16> %b)
63+
ret <vscale x 8 x i16> %out
64+
}
65+
66+
define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
67+
; CHECK-LABEL: sub_i32:
68+
; CHECK: sub z0.s, p0/m, z0.s, z1.s
69+
; CHECK-NEXT: ret
70+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
71+
<vscale x 4 x i32> %a,
72+
<vscale x 4 x i32> %b)
73+
ret <vscale x 4 x i32> %out
74+
}
75+
76+
define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
77+
; CHECK-LABEL: sub_i64:
78+
; CHECK: sub z0.d, p0/m, z0.d, z1.d
79+
; CHECK-NEXT: ret
80+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
81+
<vscale x 2 x i64> %a,
82+
<vscale x 2 x i64> %b)
83+
ret <vscale x 2 x i64> %out
84+
}
85+
86+
87+
88+
define <vscale x 16 x i8> @subr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
89+
; CHECK-LABEL: subr_i8:
90+
; CHECK: subr z0.b, p0/m, z0.b, z1.b
91+
; CHECK-NEXT: ret
92+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> %pg,
93+
<vscale x 16 x i8> %a,
94+
<vscale x 16 x i8> %b)
95+
ret <vscale x 16 x i8> %out
96+
}
97+
98+
define <vscale x 8 x i16> @subr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
99+
; CHECK-LABEL: subr_i16:
100+
; CHECK: subr z0.h, p0/m, z0.h, z1.h
101+
; CHECK-NEXT: ret
102+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
103+
<vscale x 8 x i16> %a,
104+
<vscale x 8 x i16> %b)
105+
ret <vscale x 8 x i16> %out
106+
}
107+
108+
define <vscale x 4 x i32> @subr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
109+
; CHECK-LABEL: subr_i32:
110+
; CHECK: subr z0.s, p0/m, z0.s, z1.s
111+
; CHECK-NEXT: ret
112+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
113+
<vscale x 4 x i32> %a,
114+
<vscale x 4 x i32> %b)
115+
ret <vscale x 4 x i32> %out
116+
}
117+
118+
define <vscale x 2 x i64> @subr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
119+
; CHECK-LABEL: subr_i64:
120+
; CHECK: subr z0.d, p0/m, z0.d, z1.d
121+
; CHECK-NEXT: ret
122+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
123+
<vscale x 2 x i64> %a,
124+
<vscale x 2 x i64> %b)
125+
ret <vscale x 2 x i64> %out
126+
}
127+
128+
129+
130+
declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
131+
declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
132+
declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
133+
declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
134+
135+
declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
136+
declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
137+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
138+
declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
139+
140+
declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
141+
declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
142+
declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
143+
declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

0 commit comments

Comments
 (0)