Skip to content

Commit 91cb13f

Browse files
committed
[SveEmitter] Add builtins for svqadd, svqsub and svdot
This patch adds builtins for saturating add/sub instructions: - svqadd, svqadd_n - svqsub, svqsub_n and builtins for dot product instructions: - svdot, svdot_lane
1 parent 447efdb commit 91cb13f

File tree

5 files changed

+386
-5
lines changed

5 files changed

+386
-5
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
// s: scalar of element type
6868
// a: scalar of element type (splat to vector type)
6969
// R: scalar of 1/2 width element type (splat to vector type)
70+
// r: scalar of 1/4 width element type (splat to vector type)
7071
// e: 1/2 width unsigned elements, 2x element count
7172
// h: 1/2 width elements, 2x element count
7273
// q: 1/4 width elements, 4x element count
@@ -675,6 +676,25 @@ defm SVMLA : SInstZPZZZ<"svmla", "csilUcUsUiUl", "aarch64_sve_mla">;
675676
defm SVMLS : SInstZPZZZ<"svmls", "csilUcUsUiUl", "aarch64_sve_mls">;
676677
defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb">;
677678

679+
//------------------------------------------------------------------------------
680+
681+
def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot">;
682+
def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot">;
683+
def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x">;
684+
def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">;
685+
def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x">;
686+
def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">;
687+
688+
def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot">;
689+
def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot">;
690+
def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x">;
691+
def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">;
692+
def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x">;
693+
def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">;
694+
695+
def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
696+
def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
697+
678698
////////////////////////////////////////////////////////////////////////////////
679699
// Logical operations
680700

@@ -1180,11 +1200,6 @@ defm SVQINCW_U : SInst_SAT2<"svqincw", "aarch64_sve_uqincw", UnsignedWord>;
11801200
defm SVQINCD_S : SInst_SAT2<"svqincd", "aarch64_sve_sqincd", SignedDoubleWord>;
11811201
defm SVQINCD_U : SInst_SAT2<"svqincd", "aarch64_sve_uqincd", UnsignedDoubleWord>;
11821202

1183-
////////////////////////////////////////////////////////////////////////////////
1184-
// Integer arithmetic
1185-
def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
1186-
def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
1187-
11881203
////////////////////////////////////////////////////////////////////////////////
11891204
// SVE2 WhileGE/GT
11901205
let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {

clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,74 @@
1010
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
1111
#endif
1212

13+
svint32_t test_svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3)
14+
{
15+
// CHECK-LABEL: test_svdot_s32
16+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
17+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
18+
return SVE_ACLE_FUNC(svdot,_s32,,)(op1, op2, op3);
19+
}
20+
21+
svint64_t test_svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3)
22+
{
23+
// CHECK-LABEL: test_svdot_s64
24+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
25+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
26+
return SVE_ACLE_FUNC(svdot,_s64,,)(op1, op2, op3);
27+
}
28+
29+
svuint32_t test_svdot_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
30+
{
31+
// CHECK-LABEL: test_svdot_u32
32+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %op3)
33+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
34+
return SVE_ACLE_FUNC(svdot,_u32,,)(op1, op2, op3);
35+
}
36+
37+
svuint64_t test_svdot_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
38+
{
39+
// CHECK-LABEL: test_svdot_u64
40+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %op3)
41+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
42+
return SVE_ACLE_FUNC(svdot,_u64,,)(op1, op2, op3);
43+
}
44+
45+
svint32_t test_svdot_n_s32(svint32_t op1, svint8_t op2, int8_t op3)
46+
{
47+
// CHECK-LABEL: test_svdot_n_s32
48+
// CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
49+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
50+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
51+
return SVE_ACLE_FUNC(svdot,_n_s32,,)(op1, op2, op3);
52+
}
53+
54+
svint64_t test_svdot_n_s64(svint64_t op1, svint16_t op2, int16_t op3)
55+
{
56+
// CHECK-LABEL: test_svdot_n_s64
57+
// CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
58+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
59+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
60+
return SVE_ACLE_FUNC(svdot,_n_s64,,)(op1, op2, op3);
61+
}
62+
63+
svuint32_t test_svdot_n_u32(svuint32_t op1, svuint8_t op2, uint8_t op3)
64+
{
65+
// CHECK-LABEL: test_svdot_n_u32
66+
// CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op3)
67+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 16 x i8> %op2, <vscale x 16 x i8> %[[DUP]])
68+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
69+
return SVE_ACLE_FUNC(svdot,_n_u32,,)(op1, op2, op3);
70+
}
71+
72+
svuint64_t test_svdot_n_u64(svuint64_t op1, svuint16_t op2, uint16_t op3)
73+
{
74+
// CHECK-LABEL: test_svdot_n_u64
75+
// CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op3)
76+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 8 x i16> %op2, <vscale x 8 x i16> %[[DUP]])
77+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
78+
return SVE_ACLE_FUNC(svdot,_n_u64,,)(op1, op2, op3);
79+
}
80+
1381
svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
1482
{
1583
// CHECK-LABEL: test_svdot_lane_s32
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
2+
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef SVE_OVERLOADED_FORMS
7+
// A simple used,unused... macro, long enough to represent any SVE builtin.
8+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
9+
#else
10+
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
11+
#endif
12+
13+
svint8_t test_svqadd_s8(svint8_t op1, svint8_t op2)
14+
{
15+
// CHECK-LABEL: test_svqadd_s8
16+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
17+
// CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
18+
return SVE_ACLE_FUNC(svqadd,_s8,,)(op1, op2);
19+
}
20+
21+
svint16_t test_svqadd_s16(svint16_t op1, svint16_t op2)
22+
{
23+
// CHECK-LABEL: test_svqadd_s16
24+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2)
25+
// CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
26+
return SVE_ACLE_FUNC(svqadd,_s16,,)(op1, op2);
27+
}
28+
29+
svint32_t test_svqadd_s32(svint32_t op1, svint32_t op2)
30+
{
31+
// CHECK-LABEL: test_svqadd_s32
32+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
33+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
34+
return SVE_ACLE_FUNC(svqadd,_s32,,)(op1, op2);
35+
}
36+
37+
svint64_t test_svqadd_s64(svint64_t op1, svint64_t op2)
38+
{
39+
// CHECK-LABEL: test_svqadd_s64
40+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2)
41+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
42+
return SVE_ACLE_FUNC(svqadd,_s64,,)(op1, op2);
43+
}
44+
45+
svuint8_t test_svqadd_u8(svuint8_t op1, svuint8_t op2)
46+
{
47+
// CHECK-LABEL: test_svqadd_u8
48+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %op2)
49+
// CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
50+
return SVE_ACLE_FUNC(svqadd,_u8,,)(op1, op2);
51+
}
52+
53+
svuint16_t test_svqadd_u16(svuint16_t op1, svuint16_t op2)
54+
{
55+
// CHECK-LABEL: test_svqadd_u16
56+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %op2)
57+
// CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
58+
return SVE_ACLE_FUNC(svqadd,_u16,,)(op1, op2);
59+
}
60+
61+
svuint32_t test_svqadd_u32(svuint32_t op1, svuint32_t op2)
62+
{
63+
// CHECK-LABEL: test_svqadd_u32
64+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %op2)
65+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
66+
return SVE_ACLE_FUNC(svqadd,_u32,,)(op1, op2);
67+
}
68+
69+
svuint64_t test_svqadd_u64(svuint64_t op1, svuint64_t op2)
70+
{
71+
// CHECK-LABEL: test_svqadd_u64
72+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %op2)
73+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
74+
return SVE_ACLE_FUNC(svqadd,_u64,,)(op1, op2);
75+
}
76+
77+
svint8_t test_svqadd_n_s8(svint8_t op1, int8_t op2)
78+
{
79+
// CHECK-LABEL: test_svqadd_n_s8
80+
// CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
81+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
82+
// CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
83+
return SVE_ACLE_FUNC(svqadd,_n_s8,,)(op1, op2);
84+
}
85+
86+
svint16_t test_svqadd_n_s16(svint16_t op1, int16_t op2)
87+
{
88+
// CHECK-LABEL: test_svqadd_n_s16
89+
// CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2)
90+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %[[DUP]])
91+
// CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
92+
return SVE_ACLE_FUNC(svqadd,_n_s16,,)(op1, op2);
93+
}
94+
95+
svint32_t test_svqadd_n_s32(svint32_t op1, int32_t op2)
96+
{
97+
// CHECK-LABEL: test_svqadd_n_s32
98+
// CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
99+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
100+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
101+
return SVE_ACLE_FUNC(svqadd,_n_s32,,)(op1, op2);
102+
}
103+
104+
svint64_t test_svqadd_n_s64(svint64_t op1, int64_t op2)
105+
{
106+
// CHECK-LABEL: test_svqadd_n_s64
107+
// CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2)
108+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %[[DUP]])
109+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
110+
return SVE_ACLE_FUNC(svqadd,_n_s64,,)(op1, op2);
111+
}
112+
113+
svuint8_t test_svqadd_n_u8(svuint8_t op1, uint8_t op2)
114+
{
115+
// CHECK-LABEL: test_svqadd_n_u8
116+
// CHECK: %[[DUP:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %op2)
117+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %op1, <vscale x 16 x i8> %[[DUP]])
118+
// CHECK: ret <vscale x 16 x i8> %[[INTRINSIC]]
119+
return SVE_ACLE_FUNC(svqadd,_n_u8,,)(op1, op2);
120+
}
121+
122+
svuint16_t test_svqadd_n_u16(svuint16_t op1, uint16_t op2)
123+
{
124+
// CHECK-LABEL: test_svqadd_n_u16
125+
// CHECK: %[[DUP:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %op2)
126+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %op1, <vscale x 8 x i16> %[[DUP]])
127+
// CHECK: ret <vscale x 8 x i16> %[[INTRINSIC]]
128+
return SVE_ACLE_FUNC(svqadd,_n_u16,,)(op1, op2);
129+
}
130+
131+
svuint32_t test_svqadd_n_u32(svuint32_t op1, uint32_t op2)
132+
{
133+
// CHECK-LABEL: test_svqadd_n_u32
134+
// CHECK: %[[DUP:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %op2)
135+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %op1, <vscale x 4 x i32> %[[DUP]])
136+
// CHECK: ret <vscale x 4 x i32> %[[INTRINSIC]]
137+
return SVE_ACLE_FUNC(svqadd,_n_u32,,)(op1, op2);
138+
}
139+
140+
svuint64_t test_svqadd_n_u64(svuint64_t op1, uint64_t op2)
141+
{
142+
// CHECK-LABEL: test_svqadd_n_u64
143+
// CHECK: %[[DUP:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %op2)
144+
// CHECK: %[[INTRINSIC:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %op1, <vscale x 2 x i64> %[[DUP]])
145+
// CHECK: ret <vscale x 2 x i64> %[[INTRINSIC]]
146+
return SVE_ACLE_FUNC(svqadd,_n_u64,,)(op1, op2);
147+
}

0 commit comments

Comments
 (0)