Skip to content

Commit 254b4f2

Browse files
committed
[ARM,MVE] Add intrinsics for scalar shifts.
This fills in the small family of MVE intrinsics that have nothing to do with vectors: they implement bit-shift operations on 32- or 64-bit values held in one or two general-purpose registers. Most of these shift operations saturate if shifting left, and round to nearest if shifting right, although LSLL and ASRL behave like ordinary shifts. When these instructions take a variable shift count in a register, they pay attention to its sign, so that (for example) LSLL or UQRSHLL will shift left if given a positive number but right if given a negative one. That makes even LSLL and ASRL different enough from standard LLVM IR shift semantics that I couldn't see any better alternative than to simply model the whole family as a set of MVE-specific IR intrinsics. (The //immediate// forms of LSLL and ASRL, on the other hand, do behave exactly like a standard IR shift of a 64-bit value. In fact, those forms don't have ACLE intrinsics defined at all, because you can just write an ordinary C shift operation if you want one of those.) The 64-bit shifts have to be instruction-selected in C++, because they deliver two output values. But the 32-bit ones are simple enough that I could write a DAG isel pattern directly into each Instruction record. Reviewers: ostannard, MarkMurrayARM, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70319
1 parent 33d93c3 commit 254b4f2

File tree

7 files changed

+606
-23
lines changed

7 files changed

+606
-23
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -388,13 +388,56 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
388388
defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
389389
defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
390390

391-
let params = [Void], pnt = PNT_None in
392-
def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),
393-
(seq (u32 (lshr $value, (u64 32))):$hi,
394-
(u32 $value):$lo,
395-
(IRInt<"urshrl"> $lo, $hi, $shift):$pair,
396-
(or (shl (u64 (xval $pair, 1)), (u64 32)),
397-
(u64 (xval $pair, 0))))>;
391+
// Base class for the scalar shift intrinsics.
392+
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
393+
Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
394+
let params = [Void];
395+
let pnt = PNT_None;
396+
}
397+
398+
// Subclass that includes the machinery to take a 64-bit input apart
399+
// into halves, retrieve the two halves of a shifted output as a pair,
400+
// and glue the pieces of the pair back into an i64 for output.
401+
class LongScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
402+
ScalarShift<argtype, shiftCountArg,
403+
(seq (u32 (lshr $value, (argtype 32))):$hi,
404+
(u32 $value):$lo,
405+
shiftCodeGen:$pair,
406+
(or (shl (u64 (xval $pair, 1)), (u64 32)),
407+
(u64 (xval $pair, 0))))>;
408+
409+
// The family of saturating/rounding scalar shifts that take an
410+
// immediate shift count. They come in matched 32- and 64-bit pairs.
411+
multiclass ScalarSaturatingShiftImm<Type arg32, Type arg64> {
412+
def "": ScalarShift<arg32, (args imm_1to32:$sh),
413+
(IRInt<NAME> $value, $sh)>;
414+
def l: LongScalarShift<arg64, (args imm_1to32:$sh),
415+
(IRInt<NAME # "l"> $lo, $hi, $sh)>;
416+
}
417+
defm uqshl: ScalarSaturatingShiftImm<u32, u64>;
418+
defm urshr: ScalarSaturatingShiftImm<u32, u64>;
419+
defm sqshl: ScalarSaturatingShiftImm<s32, s64>;
420+
defm srshr: ScalarSaturatingShiftImm<s32, s64>;
421+
422+
// The family of saturating/rounding scalar shifts that take a
423+
// register shift count. They also have 32- and 64-bit forms, but the
424+
// 64-bit form also has a version that saturates to 48 bits, so the IR
425+
// intrinsic takes an extra saturation-type operand.
426+
multiclass ScalarSaturatingShiftReg<Type arg32, Type arg64> {
427+
def "": ScalarShift<arg32, (args s32:$sh),
428+
(IRInt<NAME> $value, $sh)>;
429+
def l: LongScalarShift<arg64, (args s32:$sh),
430+
(IRInt<NAME # "l"> $lo, $hi, $sh, 64)>;
431+
def l_sat48: LongScalarShift<arg64, (args s32:$sh),
432+
(IRInt<NAME # "l"> $lo, $hi, $sh, 48)>;
433+
}
434+
defm uqrshl: ScalarSaturatingShiftReg<u32, u64>;
435+
defm sqrshr: ScalarSaturatingShiftReg<s32, s64>;
436+
437+
// The intrinsics for LSLL and ASRL come in 64-bit versions only, with
438+
// no saturation count.
439+
def lsll: LongScalarShift<u64, (args s32:$sh), (IRInt<"lsll"> $lo, $hi, $sh)>;
440+
def asrl: LongScalarShift<s64, (args s32:$sh), (IRInt<"asrl"> $lo, $hi, $sh)>;
398441

399442
let params = T.Int32 in {
400443
def vadcq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<uint>:$carry),

clang/include/clang/Basic/arm_mve_defs.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def imm_lane : Immediate<sint, IB_LaneIndex>;
312312

313313
// imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
314314
// intrinsics)
315-
def imm_1to32 : Immediate<u32, IB_ConstRange<1, 32>>;
315+
def imm_1to32 : Immediate<sint, IB_ConstRange<1, 32>>;
316316

317317
// imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq)
318318
def imm_1248 : Immediate<u32, IB_ConstRange<1, 8>> {

clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,237 @@
33

44
#include <arm_mve.h>
55

6+
// CHECK-LABEL: @test_asrl(
7+
// CHECK-NEXT: entry:
8+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
9+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
10+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
11+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.asrl(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]])
12+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
13+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
14+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
15+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
16+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
17+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
18+
// CHECK-NEXT: ret i64 [[TMP9]]
19+
//
20+
int64_t test_asrl(int64_t value, int32_t shift)
21+
{
22+
return asrl(value, shift);
23+
}
24+
25+
// CHECK-LABEL: @test_lsll(
26+
// CHECK-NEXT: entry:
27+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
28+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
29+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
30+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.lsll(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]])
31+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
32+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
33+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
34+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
35+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
36+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
37+
// CHECK-NEXT: ret i64 [[TMP9]]
38+
//
39+
uint64_t test_lsll(uint64_t value, int32_t shift)
40+
{
41+
return lsll(value, shift);
42+
}
43+
44+
// CHECK-LABEL: @test_sqrshr(
45+
// CHECK-NEXT: entry:
46+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.sqrshr(i32 [[VALUE:%.*]], i32 [[SHIFT:%.*]])
47+
// CHECK-NEXT: ret i32 [[TMP0]]
48+
//
49+
int32_t test_sqrshr(int32_t value, int32_t shift)
50+
{
51+
return sqrshr(value, shift);
52+
}
53+
54+
// CHECK-LABEL: @test_sqrshrl(
55+
// CHECK-NEXT: entry:
56+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
57+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
58+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
59+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]], i32 64)
60+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
61+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
62+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
63+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
64+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
65+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
66+
// CHECK-NEXT: ret i64 [[TMP9]]
67+
//
68+
int64_t test_sqrshrl(int64_t value, int32_t shift)
69+
{
70+
return sqrshrl(value, shift);
71+
}
72+
73+
// CHECK-LABEL: @test_sqrshrl_sat48(
74+
// CHECK-NEXT: entry:
75+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
76+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
77+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
78+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.sqrshrl(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]], i32 48)
79+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
80+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
81+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
82+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
83+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
84+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
85+
// CHECK-NEXT: ret i64 [[TMP9]]
86+
//
87+
int64_t test_sqrshrl_sat48(int64_t value, int32_t shift)
88+
{
89+
return sqrshrl_sat48(value, shift);
90+
}
91+
92+
// CHECK-LABEL: @test_sqshl(
93+
// CHECK-NEXT: entry:
94+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.sqshl(i32 [[VALUE:%.*]], i32 2)
95+
// CHECK-NEXT: ret i32 [[TMP0]]
96+
//
97+
int32_t test_sqshl(int32_t value)
98+
{
99+
return sqshl(value, 2);
100+
}
101+
102+
// CHECK-LABEL: @test_sqshll(
103+
// CHECK-NEXT: entry:
104+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
105+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
106+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
107+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.sqshll(i32 [[TMP2]], i32 [[TMP1]], i32 17)
108+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
109+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
110+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
111+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
112+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
113+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
114+
// CHECK-NEXT: ret i64 [[TMP9]]
115+
//
116+
int64_t test_sqshll(int64_t value)
117+
{
118+
return sqshll(value, 17);
119+
}
120+
121+
// CHECK-LABEL: @test_srshr(
122+
// CHECK-NEXT: entry:
123+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.srshr(i32 [[VALUE:%.*]], i32 6)
124+
// CHECK-NEXT: ret i32 [[TMP0]]
125+
//
126+
int32_t test_srshr(int32_t value)
127+
{
128+
return srshr(value, 6);
129+
}
130+
131+
// CHECK-LABEL: @test_srshrl(
132+
// CHECK-NEXT: entry:
133+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
134+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
135+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
136+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.srshrl(i32 [[TMP2]], i32 [[TMP1]], i32 26)
137+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
138+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
139+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
140+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
141+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
142+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
143+
// CHECK-NEXT: ret i64 [[TMP9]]
144+
//
145+
int64_t test_srshrl(int64_t value)
146+
{
147+
return srshrl(value, 26);
148+
}
149+
150+
// CHECK-LABEL: @test_uqrshl(
151+
// CHECK-NEXT: entry:
152+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.uqrshl(i32 [[VALUE:%.*]], i32 [[SHIFT:%.*]])
153+
// CHECK-NEXT: ret i32 [[TMP0]]
154+
//
155+
uint32_t test_uqrshl(uint32_t value, int32_t shift)
156+
{
157+
return uqrshl(value, shift);
158+
}
159+
160+
// CHECK-LABEL: @test_uqrshll(
161+
// CHECK-NEXT: entry:
162+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
163+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
164+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
165+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.uqrshll(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]], i32 64)
166+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
167+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
168+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
169+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
170+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
171+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
172+
// CHECK-NEXT: ret i64 [[TMP9]]
173+
//
174+
uint64_t test_uqrshll(uint64_t value, int32_t shift)
175+
{
176+
return uqrshll(value, shift);
177+
}
178+
179+
// CHECK-LABEL: @test_uqrshll_sat48(
180+
// CHECK-NEXT: entry:
181+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
182+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
183+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
184+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.uqrshll(i32 [[TMP2]], i32 [[TMP1]], i32 [[SHIFT:%.*]], i32 48)
185+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
186+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
187+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
188+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
189+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
190+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
191+
// CHECK-NEXT: ret i64 [[TMP9]]
192+
//
193+
uint64_t test_uqrshll_sat48(uint64_t value, int32_t shift)
194+
{
195+
return uqrshll_sat48(value, shift);
196+
}
197+
198+
// CHECK-LABEL: @test_uqshl(
199+
// CHECK-NEXT: entry:
200+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.uqshl(i32 [[VALUE:%.*]], i32 21)
201+
// CHECK-NEXT: ret i32 [[TMP0]]
202+
//
203+
uint32_t test_uqshl(uint32_t value)
204+
{
205+
return uqshl(value, 21);
206+
}
207+
208+
// CHECK-LABEL: @test_uqshll(
209+
// CHECK-NEXT: entry:
210+
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
211+
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
212+
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
213+
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.uqshll(i32 [[TMP2]], i32 [[TMP1]], i32 16)
214+
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
215+
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
216+
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
217+
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
218+
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
219+
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
220+
// CHECK-NEXT: ret i64 [[TMP9]]
221+
//
222+
uint64_t test_uqshll(uint64_t value)
223+
{
224+
return uqshll(value, 16);
225+
}
226+
227+
// CHECK-LABEL: @test_urshr(
228+
// CHECK-NEXT: entry:
229+
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.urshr(i32 [[VALUE:%.*]], i32 22)
230+
// CHECK-NEXT: ret i32 [[TMP0]]
231+
//
232+
uint32_t test_urshr(uint32_t value)
233+
{
234+
return urshr(value, 22);
235+
}
236+
6237
// CHECK-LABEL: @test_urshrl(
7238
// CHECK-NEXT: entry:
8239
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32

llvm/include/llvm/IR/IntrinsicsARM.td

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -850,9 +850,25 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
850850
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
851851
llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
852852

853-
def int_arm_mve_urshrl: Intrinsic<
854-
[llvm_i32_ty, llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
855-
[IntrNoMem]>;
853+
// MVE scalar shifts.
854+
class ARM_MVE_qrshift_single<list<LLVMType> value,
855+
list<LLVMType> saturate = []> :
856+
Intrinsic<value, value # [llvm_i32_ty] # saturate, [IntrNoMem]>;
857+
multiclass ARM_MVE_qrshift<list<LLVMType> saturate = []> {
858+
// Most of these shifts come in 32- and 64-bit versions. But only
859+
// the 64-bit ones have the extra saturation argument (if any).
860+
def "": ARM_MVE_qrshift_single<[llvm_i32_ty]>;
861+
def l: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty], saturate>;
862+
}
863+
defm int_arm_mve_urshr: ARM_MVE_qrshift;
864+
defm int_arm_mve_uqshl: ARM_MVE_qrshift;
865+
defm int_arm_mve_srshr: ARM_MVE_qrshift;
866+
defm int_arm_mve_sqshl: ARM_MVE_qrshift;
867+
defm int_arm_mve_uqrshl: ARM_MVE_qrshift<[llvm_i32_ty]>;
868+
defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>;
869+
// LSLL and ASRL only have 64-bit versions, not 32.
870+
def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
871+
def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>;
856872

857873
def int_arm_mve_vadc: Intrinsic<
858874
[llvm_anyvector_ty, llvm_i32_ty],

0 commit comments

Comments
 (0)