Skip to content

Commit 73f702f

Browse files
committed
[InstCombine] Non-canonical clamp-like pattern handling
Summary: Given a pattern like: ``` %old_cmp1 = icmp slt i32 %x, C2 %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high %old_x_offseted = add i32 %x, C1 %old_cmp0 = icmp ult i32 %old_x_offseted, C0 %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement ``` it can be rewritten as more canonical pattern: ``` %new_cmp1 = icmp slt i32 %x, -C1 %new_cmp2 = icmp sge i32 %x, C0-C1 %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low ``` Iff `-C1 s<= C2 s<= C0-C1` Also, `ULT` predicate can also be `UGE`; or `UGT` iff `C0 != -1` (+invert result) Also, `SLT` predicate can also be `SGE`; or `SGT` iff `C2 != INT_MAX` (+invert result) If `C1 == 0`, then all 3 instructions must be one-use; else at most either `%old_cmp1` or `%old_x_offseted` can have extra uses. NOTE: if we could reuse `%old_cmp1` as one of the comparisons we'll have to build, this could be less limiting. So there are two icmp's, each one with 3 predicate variants, so there are 9 fold variants: | | ULT | UGE | UGT | | SLT | https://rise4fun.com/Alive/yIJ | https://rise4fun.com/Alive/5BfN | https://rise4fun.com/Alive/INH | | SGE | https://rise4fun.com/Alive/hd8 | https://rise4fun.com/Alive/Abk | https://rise4fun.com/Alive/PlzS | | SGT | https://rise4fun.com/Alive/VYG | https://rise4fun.com/Alive/oMY | https://rise4fun.com/Alive/KrzC | {F9730206} This fold was brought up in https://reviews.llvm.org/D65148#1603922 by @dmgreen, and is needed to unblock that patch. This patch requires D65530. Reviewers: spatel, nikic, xbolva00, dmgreen Reviewed By: spatel Subscribers: hiraditya, llvm-commits, dmgreen Tags: #llvm Differential Revision: https://reviews.llvm.org/D65765 llvm-svn: 368687
1 parent 0410489 commit 73f702f

4 files changed

+300
-155
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,149 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
11181118
return nullptr;
11191119
}
11201120

1121+
// See if this is a pattern like:
1122+
// %old_cmp1 = icmp slt i32 %x, C2
1123+
// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high
1124+
// %old_x_offseted = add i32 %x, C1
1125+
// %old_cmp0 = icmp ult i32 %old_x_offseted, C0
1126+
// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
1127+
// This can be rewritten as more canonical pattern:
1128+
// %new_cmp1 = icmp slt i32 %x, -C1
1129+
// %new_cmp2 = icmp sge i32 %x, C0-C1
1130+
// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x
1131+
// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low
1132+
// Iff -C1 s<= C2 s<= C0-C1
1133+
// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
1134+
// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
1135+
static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
1136+
InstCombiner::BuilderTy &Builder) {
1137+
Value *X = Sel0.getTrueValue();
1138+
Value *Sel1 = Sel0.getFalseValue();
1139+
1140+
// First match the condition of the outermost select.
1141+
// Said condition must be one-use.
1142+
if (!Cmp0.hasOneUse())
1143+
return nullptr;
1144+
Value *Cmp00 = Cmp0.getOperand(0);
1145+
Constant *C0;
1146+
if (!match(Cmp0.getOperand(1),
1147+
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
1148+
return nullptr;
1149+
// Canonicalize Cmp0 into the form we expect.
1150+
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
1151+
switch (Cmp0.getPredicate()) {
1152+
case ICmpInst::Predicate::ICMP_ULT:
1153+
break; // Great!
1154+
case ICmpInst::Predicate::ICMP_ULE:
1155+
// We'd have to increment C0 by one, and for that it must not have all-ones
1156+
// element, but then it would have been canonicalized to 'ult' before
1157+
// we get here. So we can't do anything useful with 'ule'.
1158+
return nullptr;
1159+
case ICmpInst::Predicate::ICMP_UGT:
1160+
// We want to canonicalize it to 'ult', so we'll need to increment C0,
1161+
// which again means it must not have any all-ones elements.
1162+
if (!match(C0,
1163+
m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
1164+
APInt::getAllOnesValue(
1165+
C0->getType()->getScalarSizeInBits()))))
1166+
return nullptr; // Can't do, have all-ones element[s].
1167+
C0 = AddOne(C0);
1168+
std::swap(X, Sel1);
1169+
break;
1170+
case ICmpInst::Predicate::ICMP_UGE:
1171+
// The only way we'd get this predicate if this `icmp` has extra uses,
1172+
// but then we won't be able to do this fold.
1173+
return nullptr;
1174+
default:
1175+
return nullptr; // Unknown predicate.
1176+
}
1177+
1178+
// Now that we've canonicalized the ICmp, we know the X we expect;
1179+
// the select in other hand should be one-use.
1180+
if (!Sel1->hasOneUse())
1181+
return nullptr;
1182+
1183+
// We now can finish matching the condition of the outermost select:
1184+
// it should either be the X itself, or an addition of some constant to X.
1185+
Constant *C1;
1186+
if (Cmp00 == X)
1187+
C1 = ConstantInt::getNullValue(Sel0.getType());
1188+
else if (!match(Cmp00,
1189+
m_Add(m_Specific(X),
1190+
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
1191+
return nullptr;
1192+
1193+
Value *Cmp1;
1194+
ICmpInst::Predicate Pred1;
1195+
Constant *C2;
1196+
Value *ReplacementLow, *ReplacementHigh;
1197+
if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
1198+
m_Value(ReplacementHigh))) ||
1199+
!match(Cmp1,
1200+
m_ICmp(Pred1, m_Specific(X),
1201+
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
1202+
return nullptr;
1203+
1204+
if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
1205+
return nullptr; // Not enough one-use instructions for the fold.
1206+
// FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
1207+
// two comparisons we'll need to build.
1208+
1209+
// Canonicalize Cmp1 into the form we expect.
1210+
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
1211+
switch (Pred1) {
1212+
case ICmpInst::Predicate::ICMP_SLT:
1213+
break;
1214+
case ICmpInst::Predicate::ICMP_SLE:
1215+
// We'd have to increment C2 by one, and for that it must not have signed
1216+
// max element, but then it would have been canonicalized to 'slt' before
1217+
// we get here. So we can't do anything useful with 'sle'.
1218+
return nullptr;
1219+
case ICmpInst::Predicate::ICMP_SGT:
1220+
// We want to canonicalize it to 'slt', so we'll need to increment C2,
1221+
// which again means it must not have any signed max elements.
1222+
if (!match(C2,
1223+
m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
1224+
APInt::getSignedMaxValue(
1225+
C2->getType()->getScalarSizeInBits()))))
1226+
return nullptr; // Can't do, have signed max element[s].
1227+
C2 = AddOne(C2);
1228+
LLVM_FALLTHROUGH;
1229+
case ICmpInst::Predicate::ICMP_SGE:
1230+
// Also non-canonical, but here we don't need to change C2,
1231+
// so we don't have any restrictions on C2, so we can just handle it.
1232+
std::swap(ReplacementLow, ReplacementHigh);
1233+
break;
1234+
default:
1235+
return nullptr; // Unknown predicate.
1236+
}
1237+
1238+
// The thresholds of this clamp-like pattern.
1239+
auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
1240+
auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
1241+
1242+
// The fold has a precondition 1: C2 s>= ThresholdLow
1243+
auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2,
1244+
ThresholdLowIncl);
1245+
if (!match(Precond1, m_One()))
1246+
return nullptr;
1247+
// The fold has a precondition 2: C2 s<= ThresholdHigh
1248+
auto *Precond2 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SLE, C2,
1249+
ThresholdHighExcl);
1250+
if (!match(Precond2, m_One()))
1251+
return nullptr;
1252+
1253+
// All good, finally emit the new pattern.
1254+
Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
1255+
Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
1256+
Value *MaybeReplacedLow =
1257+
Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
1258+
Instruction *MaybeReplacedHigh =
1259+
SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
1260+
1261+
return MaybeReplacedHigh;
1262+
}
1263+
11211264
/// Visit a SelectInst that has an ICmpInst as its first operand.
11221265
Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
11231266
ICmpInst *ICI) {
@@ -1130,6 +1273,9 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
11301273
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, Builder))
11311274
return NewAbs;
11321275

1276+
if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
1277+
return NewAbs;
1278+
11331279
bool Changed = adjustMinMax(SI, *ICI);
11341280

11351281
if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))

llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll

Lines changed: 53 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,10 @@
2525

2626
define i32 @t0_ult_slt_128(i32 %x, i32 %replacement_low, i32 %replacement_high) {
2727
; CHECK-LABEL: @t0_ult_slt_128(
28-
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
29-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
30-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
31-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
32-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
28+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
29+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
30+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
31+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
3332
; CHECK-NEXT: ret i32 [[R]]
3433
;
3534
%t0 = icmp slt i32 %x, 128
@@ -41,11 +40,10 @@ define i32 @t0_ult_slt_128(i32 %x, i32 %replacement_low, i32 %replacement_high)
4140
}
4241
define i32 @t1_ult_slt_0(i32 %x, i32 %replacement_low, i32 %replacement_high) {
4342
; CHECK-LABEL: @t1_ult_slt_0(
44-
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], -16
45-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
46-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
47-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
48-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
43+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
44+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
45+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
46+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
4947
; CHECK-NEXT: ret i32 [[R]]
5048
;
5149
%t0 = icmp slt i32 %x, -16
@@ -58,11 +56,10 @@ define i32 @t1_ult_slt_0(i32 %x, i32 %replacement_low, i32 %replacement_high) {
5856

5957
define i32 @t2_ult_sgt_128(i32 %x, i32 %replacement_low, i32 %replacement_high) {
6058
; CHECK-LABEL: @t2_ult_sgt_128(
61-
; CHECK-NEXT: [[T0:%.*]] = icmp sgt i32 [[X:%.*]], 127
62-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_HIGH:%.*]], i32 [[REPLACEMENT_LOW:%.*]]
63-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
64-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
65-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
59+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
60+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
61+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
62+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
6663
; CHECK-NEXT: ret i32 [[R]]
6764
;
6865
%t0 = icmp sgt i32 %x, 127
@@ -74,11 +71,10 @@ define i32 @t2_ult_sgt_128(i32 %x, i32 %replacement_low, i32 %replacement_high)
7471
}
7572
define i32 @t3_ult_sgt_neg1(i32 %x, i32 %replacement_low, i32 %replacement_high) {
7673
; CHECK-LABEL: @t3_ult_sgt_neg1(
77-
; CHECK-NEXT: [[T0:%.*]] = icmp sgt i32 [[X:%.*]], -17
78-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_HIGH:%.*]], i32 [[REPLACEMENT_LOW:%.*]]
79-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
80-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
81-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
74+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
75+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
76+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
77+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
8278
; CHECK-NEXT: ret i32 [[R]]
8379
;
8480
%t0 = icmp sgt i32 %x, -17
@@ -91,11 +87,10 @@ define i32 @t3_ult_sgt_neg1(i32 %x, i32 %replacement_low, i32 %replacement_high)
9187

9288
define i32 @t4_ugt_slt_128(i32 %x, i32 %replacement_low, i32 %replacement_high) {
9389
; CHECK-LABEL: @t4_ugt_slt_128(
94-
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 128
95-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
96-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
97-
; CHECK-NEXT: [[T3:%.*]] = icmp ugt i32 [[T2]], 143
98-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[T1]], i32 [[X]]
90+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
91+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
92+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
93+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
9994
; CHECK-NEXT: ret i32 [[R]]
10095
;
10196
%t0 = icmp slt i32 %x, 128
@@ -107,11 +102,10 @@ define i32 @t4_ugt_slt_128(i32 %x, i32 %replacement_low, i32 %replacement_high)
107102
}
108103
define i32 @t5_ugt_slt_0(i32 %x, i32 %replacement_low, i32 %replacement_high) {
109104
; CHECK-LABEL: @t5_ugt_slt_0(
110-
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], -16
111-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
112-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
113-
; CHECK-NEXT: [[T3:%.*]] = icmp ugt i32 [[T2]], 143
114-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[T1]], i32 [[X]]
105+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
106+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
107+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
108+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
115109
; CHECK-NEXT: ret i32 [[R]]
116110
;
117111
%t0 = icmp slt i32 %x, -16
@@ -124,11 +118,10 @@ define i32 @t5_ugt_slt_0(i32 %x, i32 %replacement_low, i32 %replacement_high) {
124118

125119
define i32 @t6_ugt_sgt_128(i32 %x, i32 %replacement_low, i32 %replacement_high) {
126120
; CHECK-LABEL: @t6_ugt_sgt_128(
127-
; CHECK-NEXT: [[T0:%.*]] = icmp sgt i32 [[X:%.*]], 127
128-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_HIGH:%.*]], i32 [[REPLACEMENT_LOW:%.*]]
129-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
130-
; CHECK-NEXT: [[T3:%.*]] = icmp ugt i32 [[T2]], 143
131-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[T1]], i32 [[X]]
121+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
122+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
123+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
124+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
132125
; CHECK-NEXT: ret i32 [[R]]
133126
;
134127
%t0 = icmp sgt i32 %x, 127
@@ -140,11 +133,10 @@ define i32 @t6_ugt_sgt_128(i32 %x, i32 %replacement_low, i32 %replacement_high)
140133
}
141134
define i32 @t7_ugt_sgt_neg1(i32 %x, i32 %replacement_low, i32 %replacement_high) {
142135
; CHECK-LABEL: @t7_ugt_sgt_neg1(
143-
; CHECK-NEXT: [[T0:%.*]] = icmp sgt i32 [[X:%.*]], -17
144-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_HIGH:%.*]], i32 [[REPLACEMENT_LOW:%.*]]
145-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
146-
; CHECK-NEXT: [[T3:%.*]] = icmp ugt i32 [[T2]], 143
147-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[T1]], i32 [[X]]
136+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], -16
137+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
138+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
139+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
148140
; CHECK-NEXT: ret i32 [[R]]
149141
;
150142
%t0 = icmp sgt i32 %x, -17
@@ -205,10 +197,10 @@ define i32 @t10_oneuse0(i32 %x, i32 %replacement_low, i32 %replacement_high) {
205197
; CHECK-LABEL: @t10_oneuse0(
206198
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 64
207199
; CHECK-NEXT: call void @use1(i1 [[T0]])
208-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
209-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
210-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
211-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
200+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], -16
201+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
202+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
203+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
212204
; CHECK-NEXT: ret i32 [[R]]
213205
;
214206
%t0 = icmp slt i32 %x, 64
@@ -241,12 +233,12 @@ define i32 @n11_oneuse1(i32 %x, i32 %replacement_low, i32 %replacement_high) {
241233
; This one is ok.
242234
define i32 @t12_oneuse2(i32 %x, i32 %replacement_low, i32 %replacement_high) {
243235
; CHECK-LABEL: @t12_oneuse2(
244-
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 [[X:%.*]], 64
245-
; CHECK-NEXT: [[T1:%.*]] = select i1 [[T0]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[REPLACEMENT_HIGH:%.*]]
246-
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X]], 16
236+
; CHECK-NEXT: [[T2:%.*]] = add i32 [[X:%.*]], 16
247237
; CHECK-NEXT: call void @use32(i32 [[T2]])
248-
; CHECK-NEXT: [[T3:%.*]] = icmp ult i32 [[T2]], 144
249-
; CHECK-NEXT: [[R:%.*]] = select i1 [[T3]], i32 [[X]], i32 [[T1]]
238+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], -16
239+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[X]], 128
240+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i32 [[REPLACEMENT_LOW:%.*]], i32 [[X]]
241+
; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i32 [[TMP3]], i32 [[REPLACEMENT_HIGH:%.*]]
250242
; CHECK-NEXT: ret i32 [[R]]
251243
;
252244
%t0 = icmp slt i32 %x, 64
@@ -414,11 +406,10 @@ define i32 @n19_oneuse9(i32 %x, i32 %replacement_low, i32 %replacement_high) {
414406

415407
define <2 x i32> @t20_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) {
416408
; CHECK-LABEL: @t20_ult_slt_vec_splat(
417-
; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 128, i32 128>
418-
; CHECK-NEXT: [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
419-
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], <i32 16, i32 16>
420-
; CHECK-NEXT: [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 144, i32 144>
421-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[X]], <2 x i32> [[T1]]
409+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 -16, i32 -16>
410+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[X]], <i32 128, i32 128>
411+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]]
412+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
422413
; CHECK-NEXT: ret <2 x i32> [[R]]
423414
;
424415
%t0 = icmp slt <2 x i32> %x, <i32 128, i32 128>
@@ -430,11 +421,10 @@ define <2 x i32> @t20_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low
430421
}
431422
define <2 x i32> @t21_ult_slt_vec_nonsplat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) {
432423
; CHECK-LABEL: @t21_ult_slt_vec_nonsplat(
433-
; CHECK-NEXT: [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 128, i32 64>
434-
; CHECK-NEXT: [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
435-
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], <i32 16, i32 8>
436-
; CHECK-NEXT: [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 144, i32 264>
437-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[X]], <2 x i32> [[T1]]
424+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 -16, i32 -8>
425+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[X]], <i32 128, i32 256>
426+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]]
427+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
438428
; CHECK-NEXT: ret <2 x i32> [[R]]
439429
;
440430
%t0 = icmp slt <2 x i32> %x, <i32 128, i32 64>
@@ -473,10 +463,10 @@ define <2 x i32> @t23_ult_sge(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32
473463
; CHECK-LABEL: @t23_ult_sge(
474464
; CHECK-NEXT: [[T0:%.*]] = icmp sge <2 x i32> [[X:%.*]], <i32 128, i32 -2147483648>
475465
; CHECK-NEXT: call void @use2xi1(<2 x i1> [[T0]])
476-
; CHECK-NEXT: [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[REPLACEMENT_LOW:%.*]]
477-
; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[X]], <i32 16, i32 -2147483648>
478-
; CHECK-NEXT: [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 144, i32 -1>
479-
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[X]], <2 x i32> [[T1]]
466+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X]], <i32 -16, i32 -2147483648>
467+
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[X]], <i32 128, i32 2147483647>
468+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]]
469+
; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
480470
; CHECK-NEXT: ret <2 x i32> [[R]]
481471
;
482472
%t0 = icmp sge <2 x i32> %x, <i32 128, i32 -2147483648>

0 commit comments

Comments
 (0)