Skip to content

Commit 32893f0

Browse files
committed
[ARM] Lower sadd_sat to qadd8 and qadd16
Lower the target independent signed saturating intrinsics to qadd8 and qadd16. This custom lowers them from a sadd_sat, catching the node early before it is promoted. It also adds a QADD8b and QADD16b node to mean the bottom "lane" of a qadd8/qadd16, so that we can call demand bits on it to show that it does not use the upper bits. Also handles QSUB8 and QSUB16. Differential Revision: https://reviews.llvm.org/D68974 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375402 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 7f7fc30 commit 32893f0

File tree

8 files changed

+330
-165
lines changed

8 files changed

+330
-165
lines changed

lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
10211021

10221022
setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
10231023
setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
1024+
if (Subtarget->hasDSP()) {
1025+
setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
1026+
setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
1027+
setOperationAction(ISD::SADDSAT, MVT::i16, Custom);
1028+
setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
1029+
}
10241030

10251031
// i64 operation support.
10261032
setOperationAction(ISD::MUL, MVT::i64, Expand);
@@ -1622,6 +1628,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
16221628
case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
16231629
case ARMISD::SMMLAR: return "ARMISD::SMMLAR";
16241630
case ARMISD::SMMLSR: return "ARMISD::SMMLSR";
1631+
case ARMISD::QADD16b: return "ARMISD::QADD16b";
1632+
case ARMISD::QSUB16b: return "ARMISD::QSUB16b";
1633+
case ARMISD::QADD8b: return "ARMISD::QADD8b";
1634+
case ARMISD::QSUB8b: return "ARMISD::QSUB8b";
16251635
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
16261636
case ARMISD::BFI: return "ARMISD::BFI";
16271637
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -4445,6 +4455,35 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
44454455
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
44464456
}
44474457

4458+
static SDValue LowerSADDSUBSAT(SDValue Op, SelectionDAG &DAG,
4459+
const ARMSubtarget *Subtarget) {
4460+
EVT VT = Op.getValueType();
4461+
if (!Subtarget->hasDSP())
4462+
return SDValue();
4463+
if (!VT.isSimple())
4464+
return SDValue();
4465+
4466+
unsigned NewOpcode;
4467+
bool IsAdd = Op->getOpcode() == ISD::SADDSAT;
4468+
switch (VT.getSimpleVT().SimpleTy) {
4469+
default:
4470+
return SDValue();
4471+
case MVT::i8:
4472+
NewOpcode = IsAdd ? ARMISD::QADD8b : ARMISD::QSUB8b;
4473+
break;
4474+
case MVT::i16:
4475+
NewOpcode = IsAdd ? ARMISD::QADD16b : ARMISD::QSUB16b;
4476+
break;
4477+
}
4478+
4479+
SDLoc dl(Op);
4480+
SDValue Add =
4481+
DAG.getNode(NewOpcode, dl, MVT::i32,
4482+
DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
4483+
DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
4484+
return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
4485+
}
4486+
44484487
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
44494488
SDValue Cond = Op.getOperand(0);
44504489
SDValue SelectTrue = Op.getOperand(1);
@@ -9121,6 +9160,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
91219160
case ISD::UADDO:
91229161
case ISD::USUBO:
91239162
return LowerUnsignedALUO(Op, DAG);
9163+
case ISD::SADDSAT:
9164+
case ISD::SSUBSAT:
9165+
return LowerSADDSUBSAT(Op, DAG, Subtarget);
91249166
case ISD::LOAD:
91259167
return LowerPredicateLoad(Op, DAG);
91269168
case ISD::STORE:
@@ -9205,6 +9247,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
92059247
Results.push_back(Res.getValue(0));
92069248
Results.push_back(Res.getValue(1));
92079249
return;
9250+
case ISD::SADDSAT:
9251+
case ISD::SSUBSAT:
9252+
Res = LowerSADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
9253+
break;
92089254
case ISD::READCYCLECOUNTER:
92099255
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
92109256
return;
@@ -14382,7 +14428,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1438214428
return SDValue();
1438314429
break;
1438414430
}
14385-
case ARMISD::SMLALBB: {
14431+
case ARMISD::SMLALBB:
14432+
case ARMISD::QADD16b:
14433+
case ARMISD::QSUB16b: {
1438614434
unsigned BitWidth = N->getValueType(0).getSizeInBits();
1438714435
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
1438814436
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
@@ -14418,6 +14466,15 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
1441814466
return SDValue();
1441914467
break;
1442014468
}
14469+
case ARMISD::QADD8b:
14470+
case ARMISD::QSUB8b: {
14471+
unsigned BitWidth = N->getValueType(0).getSizeInBits();
14472+
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
14473+
if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
14474+
(SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
14475+
return SDValue();
14476+
break;
14477+
}
1442114478
case ISD::INTRINSIC_VOID:
1442214479
case ISD::INTRINSIC_W_CHAIN:
1442314480
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {

lib/Target/ARM/ARMISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,12 @@ class VectorType;
219219
SMMLAR, // Signed multiply long, round and add
220220
SMMLSR, // Signed multiply long, subtract and round
221221

222+
// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for.
223+
QADD8b,
224+
QSUB8b,
225+
QADD16b,
226+
QSUB16b,
227+
222228
// Operands of the standard BUILD_VECTOR node are not legalized, which
223229
// is fine if BUILD_VECTORs are always lowered to shuffles or other
224230
// operations, but for ARM some BUILD_VECTORs are legal as-is and their

lib/Target/ARM/ARMInstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,11 @@ def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
238238
def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
239239
def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
240240

241+
def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>;
242+
def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>;
243+
def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>;
244+
def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>;
245+
241246
// Vector operations shared between NEON and MVE
242247

243248
def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
@@ -3750,6 +3755,15 @@ let DecoderMethod = "DecodeQADDInstruction" in
37503755
[(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
37513756
}
37523757

3758+
def : ARMV6Pat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
3759+
(QADD8 rGPR:$Rm, rGPR:$Rn)>;
3760+
def : ARMV6Pat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
3761+
(QSUB8 rGPR:$Rm, rGPR:$Rn)>;
3762+
def : ARMV6Pat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
3763+
(QADD16 rGPR:$Rm, rGPR:$Rn)>;
3764+
def : ARMV6Pat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
3765+
(QSUB16 rGPR:$Rm, rGPR:$Rn)>;
3766+
37533767
def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>;
37543768
def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>;
37553769
def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>;

lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2395,6 +2395,15 @@ def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
23952395
def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
23962396
(t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
23972397

2398+
def : Thumb2DSPPat<(ARMqadd8b rGPR:$Rm, rGPR:$Rn),
2399+
(t2QADD8 rGPR:$Rm, rGPR:$Rn)>;
2400+
def : Thumb2DSPPat<(ARMqsub8b rGPR:$Rm, rGPR:$Rn),
2401+
(t2QSUB8 rGPR:$Rm, rGPR:$Rn)>;
2402+
def : Thumb2DSPPat<(ARMqadd16b rGPR:$Rm, rGPR:$Rn),
2403+
(t2QADD16 rGPR:$Rm, rGPR:$Rn)>;
2404+
def : Thumb2DSPPat<(ARMqsub16b rGPR:$Rm, rGPR:$Rn),
2405+
(t2QSUB16 rGPR:$Rm, rGPR:$Rn)>;
2406+
23982407
// Signed/Unsigned add/subtract
23992408

24002409
def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>;

test/CodeGen/ARM/sadd_sat.ll

Lines changed: 95 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -233,35 +233,63 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
233233
; CHECK-T1-NEXT: .LCPI2_1:
234234
; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
235235
;
236-
; CHECK-T2-LABEL: func16:
237-
; CHECK-T2: @ %bb.0:
238-
; CHECK-T2-NEXT: add r0, r1
239-
; CHECK-T2-NEXT: movw r1, #32767
240-
; CHECK-T2-NEXT: cmp r0, r1
241-
; CHECK-T2-NEXT: it lt
242-
; CHECK-T2-NEXT: movlt r1, r0
243-
; CHECK-T2-NEXT: movw r0, #32768
244-
; CHECK-T2-NEXT: cmn.w r1, #32768
245-
; CHECK-T2-NEXT: movt r0, #65535
246-
; CHECK-T2-NEXT: it gt
247-
; CHECK-T2-NEXT: movgt r0, r1
248-
; CHECK-T2-NEXT: bx lr
236+
; CHECK-T2NODSP-LABEL: func16:
237+
; CHECK-T2NODSP: @ %bb.0:
238+
; CHECK-T2NODSP-NEXT: add r0, r1
239+
; CHECK-T2NODSP-NEXT: movw r1, #32767
240+
; CHECK-T2NODSP-NEXT: cmp r0, r1
241+
; CHECK-T2NODSP-NEXT: it lt
242+
; CHECK-T2NODSP-NEXT: movlt r1, r0
243+
; CHECK-T2NODSP-NEXT: movw r0, #32768
244+
; CHECK-T2NODSP-NEXT: cmn.w r1, #32768
245+
; CHECK-T2NODSP-NEXT: movt r0, #65535
246+
; CHECK-T2NODSP-NEXT: it gt
247+
; CHECK-T2NODSP-NEXT: movgt r0, r1
248+
; CHECK-T2NODSP-NEXT: bx lr
249249
;
250-
; CHECK-ARM-LABEL: func16:
251-
; CHECK-ARM: @ %bb.0:
252-
; CHECK-ARM-NEXT: add r0, r0, r1
253-
; CHECK-ARM-NEXT: mov r1, #255
254-
; CHECK-ARM-NEXT: orr r1, r1, #32512
255-
; CHECK-ARM-NEXT: cmp r0, r1
256-
; CHECK-ARM-NEXT: movlt r1, r0
257-
; CHECK-ARM-NEXT: ldr r0, .LCPI2_0
258-
; CHECK-ARM-NEXT: cmn r1, #32768
259-
; CHECK-ARM-NEXT: movgt r0, r1
260-
; CHECK-ARM-NEXT: bx lr
261-
; CHECK-ARM-NEXT: .p2align 2
262-
; CHECK-ARM-NEXT: @ %bb.1:
263-
; CHECK-ARM-NEXT: .LCPI2_0:
264-
; CHECK-ARM-NEXT: .long 4294934528 @ 0xffff8000
250+
; CHECK-T2DSP-LABEL: func16:
251+
; CHECK-T2DSP: @ %bb.0:
252+
; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1
253+
; CHECK-T2DSP-NEXT: sxth r0, r0
254+
; CHECK-T2DSP-NEXT: bx lr
255+
;
256+
; CHECK-ARMNODPS-LABEL: func16:
257+
; CHECK-ARMNODPS: @ %bb.0:
258+
; CHECK-ARMNODPS-NEXT: add r0, r0, r1
259+
; CHECK-ARMNODPS-NEXT: mov r1, #255
260+
; CHECK-ARMNODPS-NEXT: orr r1, r1, #32512
261+
; CHECK-ARMNODPS-NEXT: cmp r0, r1
262+
; CHECK-ARMNODPS-NEXT: movlt r1, r0
263+
; CHECK-ARMNODPS-NEXT: ldr r0, .LCPI2_0
264+
; CHECK-ARMNODPS-NEXT: cmn r1, #32768
265+
; CHECK-ARMNODPS-NEXT: movgt r0, r1
266+
; CHECK-ARMNODPS-NEXT: bx lr
267+
; CHECK-ARMNODPS-NEXT: .p2align 2
268+
; CHECK-ARMNODPS-NEXT: @ %bb.1:
269+
; CHECK-ARMNODPS-NEXT: .LCPI2_0:
270+
; CHECK-ARMNODPS-NEXT: .long 4294934528 @ 0xffff8000
271+
;
272+
; CHECK-ARMBASEDSP-LABEL: func16:
273+
; CHECK-ARMBASEDSP: @ %bb.0:
274+
; CHECK-ARMBASEDSP-NEXT: add r0, r0, r1
275+
; CHECK-ARMBASEDSP-NEXT: mov r1, #255
276+
; CHECK-ARMBASEDSP-NEXT: orr r1, r1, #32512
277+
; CHECK-ARMBASEDSP-NEXT: cmp r0, r1
278+
; CHECK-ARMBASEDSP-NEXT: movlt r1, r0
279+
; CHECK-ARMBASEDSP-NEXT: ldr r0, .LCPI2_0
280+
; CHECK-ARMBASEDSP-NEXT: cmn r1, #32768
281+
; CHECK-ARMBASEDSP-NEXT: movgt r0, r1
282+
; CHECK-ARMBASEDSP-NEXT: bx lr
283+
; CHECK-ARMBASEDSP-NEXT: .p2align 2
284+
; CHECK-ARMBASEDSP-NEXT: @ %bb.1:
285+
; CHECK-ARMBASEDSP-NEXT: .LCPI2_0:
286+
; CHECK-ARMBASEDSP-NEXT: .long 4294934528 @ 0xffff8000
287+
;
288+
; CHECK-ARMDSP-LABEL: func16:
289+
; CHECK-ARMDSP: @ %bb.0:
290+
; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1
291+
; CHECK-ARMDSP-NEXT: sxth r0, r0
292+
; CHECK-ARMDSP-NEXT: bx lr
265293
%tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y)
266294
ret i16 %tmp
267295
}
@@ -284,25 +312,46 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
284312
; CHECK-T1-NEXT: .LBB3_4:
285313
; CHECK-T1-NEXT: bx lr
286314
;
287-
; CHECK-T2-LABEL: func8:
288-
; CHECK-T2: @ %bb.0:
289-
; CHECK-T2-NEXT: add r0, r1
290-
; CHECK-T2-NEXT: cmp r0, #127
291-
; CHECK-T2-NEXT: it ge
292-
; CHECK-T2-NEXT: movge r0, #127
293-
; CHECK-T2-NEXT: cmn.w r0, #128
294-
; CHECK-T2-NEXT: it le
295-
; CHECK-T2-NEXT: mvnle r0, #127
296-
; CHECK-T2-NEXT: bx lr
315+
; CHECK-T2NODSP-LABEL: func8:
316+
; CHECK-T2NODSP: @ %bb.0:
317+
; CHECK-T2NODSP-NEXT: add r0, r1
318+
; CHECK-T2NODSP-NEXT: cmp r0, #127
319+
; CHECK-T2NODSP-NEXT: it ge
320+
; CHECK-T2NODSP-NEXT: movge r0, #127
321+
; CHECK-T2NODSP-NEXT: cmn.w r0, #128
322+
; CHECK-T2NODSP-NEXT: it le
323+
; CHECK-T2NODSP-NEXT: mvnle r0, #127
324+
; CHECK-T2NODSP-NEXT: bx lr
297325
;
298-
; CHECK-ARM-LABEL: func8:
299-
; CHECK-ARM: @ %bb.0:
300-
; CHECK-ARM-NEXT: add r0, r0, r1
301-
; CHECK-ARM-NEXT: cmp r0, #127
302-
; CHECK-ARM-NEXT: movge r0, #127
303-
; CHECK-ARM-NEXT: cmn r0, #128
304-
; CHECK-ARM-NEXT: mvnle r0, #127
305-
; CHECK-ARM-NEXT: bx lr
326+
; CHECK-T2DSP-LABEL: func8:
327+
; CHECK-T2DSP: @ %bb.0:
328+
; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1
329+
; CHECK-T2DSP-NEXT: sxtb r0, r0
330+
; CHECK-T2DSP-NEXT: bx lr
331+
;
332+
; CHECK-ARMNODPS-LABEL: func8:
333+
; CHECK-ARMNODPS: @ %bb.0:
334+
; CHECK-ARMNODPS-NEXT: add r0, r0, r1
335+
; CHECK-ARMNODPS-NEXT: cmp r0, #127
336+
; CHECK-ARMNODPS-NEXT: movge r0, #127
337+
; CHECK-ARMNODPS-NEXT: cmn r0, #128
338+
; CHECK-ARMNODPS-NEXT: mvnle r0, #127
339+
; CHECK-ARMNODPS-NEXT: bx lr
340+
;
341+
; CHECK-ARMBASEDSP-LABEL: func8:
342+
; CHECK-ARMBASEDSP: @ %bb.0:
343+
; CHECK-ARMBASEDSP-NEXT: add r0, r0, r1
344+
; CHECK-ARMBASEDSP-NEXT: cmp r0, #127
345+
; CHECK-ARMBASEDSP-NEXT: movge r0, #127
346+
; CHECK-ARMBASEDSP-NEXT: cmn r0, #128
347+
; CHECK-ARMBASEDSP-NEXT: mvnle r0, #127
348+
; CHECK-ARMBASEDSP-NEXT: bx lr
349+
;
350+
; CHECK-ARMDSP-LABEL: func8:
351+
; CHECK-ARMDSP: @ %bb.0:
352+
; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1
353+
; CHECK-ARMDSP-NEXT: sxtb r0, r0
354+
; CHECK-ARMDSP-NEXT: bx lr
306355
%tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y)
307356
ret i8 %tmp
308357
}

test/CodeGen/ARM/sadd_sat_plus.ll

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -258,29 +258,15 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
258258
; CHECK-T2DSP-LABEL: func16:
259259
; CHECK-T2DSP: @ %bb.0:
260260
; CHECK-T2DSP-NEXT: muls r1, r2, r1
261-
; CHECK-T2DSP-NEXT: sxtah r0, r0, r1
262-
; CHECK-T2DSP-NEXT: movw r1, #32767
263-
; CHECK-T2DSP-NEXT: cmp r0, r1
264-
; CHECK-T2DSP-NEXT: it lt
265-
; CHECK-T2DSP-NEXT: movlt r1, r0
266-
; CHECK-T2DSP-NEXT: movw r0, #32768
267-
; CHECK-T2DSP-NEXT: cmn.w r1, #32768
268-
; CHECK-T2DSP-NEXT: movt r0, #65535
269-
; CHECK-T2DSP-NEXT: it gt
270-
; CHECK-T2DSP-NEXT: movgt r0, r1
261+
; CHECK-T2DSP-NEXT: qadd16 r0, r0, r1
262+
; CHECK-T2DSP-NEXT: sxth r0, r0
271263
; CHECK-T2DSP-NEXT: bx lr
272264
;
273265
; CHECK-ARM-LABEL: func16:
274266
; CHECK-ARM: @ %bb.0:
275267
; CHECK-ARM-NEXT: smulbb r1, r1, r2
276-
; CHECK-ARM-NEXT: sxtah r0, r0, r1
277-
; CHECK-ARM-NEXT: movw r1, #32767
278-
; CHECK-ARM-NEXT: cmp r0, r1
279-
; CHECK-ARM-NEXT: movlt r1, r0
280-
; CHECK-ARM-NEXT: movw r0, #32768
281-
; CHECK-ARM-NEXT: movt r0, #65535
282-
; CHECK-ARM-NEXT: cmn r1, #32768
283-
; CHECK-ARM-NEXT: movgt r0, r1
268+
; CHECK-ARM-NEXT: qadd16 r0, r0, r1
269+
; CHECK-ARM-NEXT: sxth r0, r0
284270
; CHECK-ARM-NEXT: bx lr
285271
%a = mul i16 %y, %z
286272
%tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
@@ -323,23 +309,15 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
323309
; CHECK-T2DSP-LABEL: func8:
324310
; CHECK-T2DSP: @ %bb.0:
325311
; CHECK-T2DSP-NEXT: muls r1, r2, r1
326-
; CHECK-T2DSP-NEXT: sxtab r0, r0, r1
327-
; CHECK-T2DSP-NEXT: cmp r0, #127
328-
; CHECK-T2DSP-NEXT: it ge
329-
; CHECK-T2DSP-NEXT: movge r0, #127
330-
; CHECK-T2DSP-NEXT: cmn.w r0, #128
331-
; CHECK-T2DSP-NEXT: it le
332-
; CHECK-T2DSP-NEXT: mvnle r0, #127
312+
; CHECK-T2DSP-NEXT: qadd8 r0, r0, r1
313+
; CHECK-T2DSP-NEXT: sxtb r0, r0
333314
; CHECK-T2DSP-NEXT: bx lr
334315
;
335316
; CHECK-ARM-LABEL: func8:
336317
; CHECK-ARM: @ %bb.0:
337318
; CHECK-ARM-NEXT: smulbb r1, r1, r2
338-
; CHECK-ARM-NEXT: sxtab r0, r0, r1
339-
; CHECK-ARM-NEXT: cmp r0, #127
340-
; CHECK-ARM-NEXT: movge r0, #127
341-
; CHECK-ARM-NEXT: cmn r0, #128
342-
; CHECK-ARM-NEXT: mvnle r0, #127
319+
; CHECK-ARM-NEXT: qadd8 r0, r0, r1
320+
; CHECK-ARM-NEXT: sxtb r0, r0
343321
; CHECK-ARM-NEXT: bx lr
344322
%a = mul i8 %y, %z
345323
%tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %a)

0 commit comments

Comments
 (0)