diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 3d7ccd55ee042..c6af880794bfa 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -405,13 +405,13 @@ class IRTranslator : public MachineFunctionPass { BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, bool FallthroughUnreachable); - bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable, - BranchProbability UnhandledProbs, - MachineBasicBlock *CurMBB, - MachineIRBuilder &MIB, - MachineBasicBlock *SwitchMBB); + bool lowerSwitchAndOrRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, + BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, + MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB); bool lowerBitTestWorkItem( SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 9f1d6f7b4f952..6b7cb8d9ce45a 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -35,7 +35,8 @@ enum CaseClusterKind { /// A cluster of cases suitable for jump table lowering. CC_JumpTable, /// A cluster of cases suitable for bit test lowering. - CC_BitTests + CC_BitTests, + CC_And }; /// A cluster of case labels. @@ -141,6 +142,8 @@ struct CaseBlock { BranchProbability TrueProb, FalseProb; bool IsUnpredictable; + bool EmitAnd = false; + // Constructor for SelectionDAG. CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, const Value *cmpmiddle, MachineBasicBlock *truebb, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 541269ab6bfce..2374e33d16ee9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -930,7 +930,14 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. - if (!CB.CmpMHS) { + if (CB.EmitAnd) { + const LLT Ty = getLLTForType(*CB.CmpRHS->getType(), *DL); + Register CondLHS = getOrCreateVReg(*CB.CmpLHS); + Register C = getOrCreateVReg(*CB.CmpRHS); + Register And = MIB.buildAnd(Ty, CondLHS, C).getReg(0); + auto Zero = MIB.buildConstant(Ty, 0); + Cond = MIB.buildICmp(CmpInst::ICMP_EQ, i1Ty, And, Zero).getReg(0); + } else if (!CB.CmpMHS) { const auto *CI = dyn_cast(CB.CmpRHS); // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, @@ -1058,18 +1065,15 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, } return true; } -bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, - Value *Cond, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable, - BranchProbability UnhandledProbs, - MachineBasicBlock *CurMBB, - MachineIRBuilder &MIB, - MachineBasicBlock *SwitchMBB) { +bool IRTranslator::lowerSwitchAndOrRangeWorkItem( + SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB) { using namespace SwitchCG; const Value *RHS, *LHS, *MHS; CmpInst::Predicate Pred; - if (I->Low == I->High) { + if (I->Low == I->High || I->Kind == CC_And) { // Check Cond == I->Low. Pred = CmpInst::ICMP_EQ; LHS = Cond; @@ -1087,6 +1091,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, // The false probability is the sum of all unhandled cases. CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs); + CB.EmitAnd = I->Kind == CC_And; emitSwitchCase(CB, SwitchMBB, MIB); return true; @@ -1326,10 +1331,11 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, } break; } + case CC_And: case CC_Range: { - if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough, - FallthroughUnreachable, UnhandledProbs, - CurMBB, MIB, SwitchMBB)) { + if (!lowerSwitchAndOrRangeWorkItem(I, Cond, Fallthrough, + FallthroughUnreachable, UnhandledProbs, + CurMBB, MIB, SwitchMBB)) { LLVM_DEBUG(dbgs() << "Failed to lower switch range"); return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 62ba801f69929..be6e3774452cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2887,7 +2887,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); // Build the setcc now. - if (!CB.CmpMHS) { + if (CB.EmitAnd) { + SDLoc dl = getCurSDLoc(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), CB.CmpRHS->getType(), true); + SDValue C = DAG.getConstant(*cast(CB.CmpRHS), dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue CondLHS = getValue(CB.CmpLHS); + SDValue And = DAG.getNode(ISD::AND, dl, C.getValueType(), CondLHS, C); + Cond = DAG.getSetCC(dl, MVT::i1, And, Zero, ISD::SETEQ); + } else if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && @@ -12308,6 +12318,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } break; } + case CC_And: case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; @@ -12319,6 +12330,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. + assert(I->Kind != CC_And && "CC_And must be handled above"); CC = ISD::SETLE; LHS = I->Low; MHS = Cond; @@ -12333,6 +12345,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, getCurSDLoc(), I->Prob, UnhandledProbs); + CB.EmitAnd = I->Kind == CC_And; if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 038c499fe236e..9c6448cb0e173 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -362,6 +362,47 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, } } Clusters.resize(DstIndex); + + // Don't try to fold clusters checking for zero and a power-of-2 constant, if + // larger ranges may be lowered as balanced binary trees later on, which won't + // work correctly after applying the transform below. + if (Clusters.size() > 4) + return; + + // Check if the clusters contain one checking for 0 and another one checking + // for a power-of-2 constant with matching destinations. Those clusters can be + // combined to a single one with CC_And. + unsigned ZeroIdx = -1; + for (const auto &[Idx, C] : enumerate(Clusters)) { + if (C.Kind != CC_Range || C.Low != C.High) + continue; + if (C.Low->isZero()) { + ZeroIdx = Idx; + break; + } + } + if (ZeroIdx == -1u) + return; + + unsigned Pow2Idx = -1; + for (const auto &[Idx, C] : enumerate(Clusters)) { + if (C.Kind != CC_Range || C.Low != C.High || C.MBB != Clusters[ZeroIdx].MBB) + continue; + if (C.Low->getValue().isPowerOf2()) { + Pow2Idx = Idx; + break; + } + } + if (Pow2Idx == -1u) + return; + + APInt Pow2 = Clusters[Pow2Idx].Low->getValue(); + APInt NewC = ~Pow2; + Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC); + Clusters[ZeroIdx].High = ConstantInt::get(SI->getContext(), NewC); + Clusters[ZeroIdx].Kind = CC_And; + Clusters[ZeroIdx].Prob += Clusters[Pow2Idx].Prob; + Clusters.erase(Clusters.begin() + Pow2Idx); } bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 775ab3fe110e0..716b7c0ee860c 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -5,57 +5,50 @@ define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB0_1: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff -; CHECK-NEXT: b.eq LBB0_6 +; CHECK-NEXT: tst w0, w9 +; CHECK-NEXT: b.eq LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB0_6 -; CHECK-NEXT: ; %bb.3: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB0_7 -; CHECK-NEXT: ; %bb.4: ; %loop.latch +; CHECK-NEXT: b.eq LBB0_5 +; CHECK-NEXT: ; %bb.3: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB0_1 -; CHECK-NEXT: ; %bb.5: -; CHECK-NEXT: mov w8, #20 ; =0x14 -; CHECK-NEXT: LBB0_6: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: LBB0_4: +; CHECK-NEXT: mov w0, #20 ; =0x14 ; CHECK-NEXT: ret -; CHECK-NEXT: LBB0_7: ; %e2 +; CHECK-NEXT: LBB0_5: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: mov w8, w0 -; GISEL-NEXT: mov w0, #20 ; =0x14 ; GISEL-NEXT: mov w9, #100 ; =0x64 +; GISEL-NEXT: mov w10, #223 ; =0xdf +; GISEL-NEXT: mov w0, #20 ; =0x14 ; GISEL-NEXT: LBB0_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w8, #0xff -; GISEL-NEXT: b.eq LBB0_5 +; GISEL-NEXT: tst w8, w10 +; GISEL-NEXT: b.eq LBB0_4 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: cmp w10, #32 +; GISEL-NEXT: and w11, w8, #0xff +; GISEL-NEXT: cmp w11, #124 ; GISEL-NEXT: b.eq LBB0_5 -; GISEL-NEXT: ; %bb.3: ; %loop.header -; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: cmp w10, #124 -; GISEL-NEXT: b.eq LBB0_6 -; GISEL-NEXT: ; %bb.4: ; %loop.latch +; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; GISEL-NEXT: subs w9, w9, #1 ; GISEL-NEXT: b.ne LBB0_1 -; GISEL-NEXT: LBB0_5: ; %common.ret +; GISEL-NEXT: LBB0_4: ; %common.ret ; GISEL-NEXT: ret -; GISEL-NEXT: LBB0_6: ; %e2 +; GISEL-NEXT: LBB0_5: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e ; GISEL-NEXT: ret entry: @@ -84,59 +77,55 @@ e2: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB1_1: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff -; CHECK-NEXT: b.eq LBB1_6 +; CHECK-NEXT: tst w0, w9 +; CHECK-NEXT: b.eq LBB1_5 ; CHECK-NEXT: ; %bb.2: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB1_6 -; CHECK-NEXT: ; %bb.3: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB1_7 -; CHECK-NEXT: ; %bb.4: ; %loop.latch +; CHECK-NEXT: b.eq LBB1_6 +; CHECK-NEXT: ; %bb.3: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB1_1 -; CHECK-NEXT: ; %bb.5: -; CHECK-NEXT: mov w8, #10 ; =0xa -; CHECK-NEXT: LBB1_6: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: mov w0, #10 ; =0xa +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_5: +; CHECK-NEXT: mov w0, #20 ; =0x14 ; CHECK-NEXT: ret -; CHECK-NEXT: LBB1_7: ; %e2 +; CHECK-NEXT: LBB1_6: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w9, #100 ; =0x64 -; GISEL-NEXT: mov w8, #20 ; =0x14 +; GISEL-NEXT: mov w8, #100 ; =0x64 +; GISEL-NEXT: mov w9, #223 ; =0xdf ; GISEL-NEXT: LBB1_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w0, #0xff -; GISEL-NEXT: b.eq LBB1_6 +; GISEL-NEXT: tst w0, w9 +; GISEL-NEXT: b.eq LBB1_5 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: cmp w10, #32 -; GISEL-NEXT: b.eq LBB1_6 -; GISEL-NEXT: ; %bb.3: ; %loop.header -; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; GISEL-NEXT: and w10, w0, #0xff ; GISEL-NEXT: cmp w10, #124 -; GISEL-NEXT: b.eq LBB1_7 -; GISEL-NEXT: ; %bb.4: ; %loop.latch +; GISEL-NEXT: b.eq LBB1_6 +; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: subs w9, w9, #1 +; GISEL-NEXT: subs w8, w8, #1 ; GISEL-NEXT: b.ne LBB1_1 -; GISEL-NEXT: ; %bb.5: -; GISEL-NEXT: mov w8, #10 ; =0xa -; GISEL-NEXT: LBB1_6: ; %common.ret -; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: ; %bb.4: +; GISEL-NEXT: mov w0, #10 ; =0xa ; GISEL-NEXT: ret -; GISEL-NEXT: LBB1_7: ; %e2 +; GISEL-NEXT: LBB1_5: +; GISEL-NEXT: mov w0, #20 ; =0x14 +; GISEL-NEXT: ret +; GISEL-NEXT: LBB1_6: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e ; GISEL-NEXT: ret entry: @@ -168,66 +157,64 @@ e2: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: tbz w1, #0, LBB2_8 +; CHECK-NEXT: tbz w1, #0, LBB2_6 ; CHECK-NEXT: ; %bb.1: ; %loop.header.preheader -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB2_2: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff +; CHECK-NEXT: tst w0, w9 ; CHECK-NEXT: b.eq LBB2_7 ; CHECK-NEXT: ; %bb.3: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB2_7 -; CHECK-NEXT: ; %bb.4: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB2_9 -; CHECK-NEXT: ; %bb.5: ; %loop.latch +; CHECK-NEXT: b.eq LBB2_8 +; CHECK-NEXT: ; %bb.4: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB2_2 -; CHECK-NEXT: ; %bb.6: -; CHECK-NEXT: mov w8, #10 ; =0xa -; CHECK-NEXT: LBB2_7: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ; %bb.5: +; CHECK-NEXT: mov w0, #10 ; =0xa ; CHECK-NEXT: ret -; CHECK-NEXT: LBB2_8: +; CHECK-NEXT: LBB2_6: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret -; CHECK-NEXT: LBB2_9: ; %e2 +; CHECK-NEXT: LBB2_7: +; CHECK-NEXT: mov w0, #20 ; =0x14 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB2_8: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: mov w8, wzr -; GISEL-NEXT: tbz w1, #0, LBB2_7 +; GISEL-NEXT: tbz w1, #0, LBB2_6 ; GISEL-NEXT: ; %bb.1: ; %loop.header.preheader -; GISEL-NEXT: mov w9, #100 ; =0x64 -; GISEL-NEXT: mov w8, #20 ; =0x14 +; GISEL-NEXT: mov w8, #100 ; =0x64 +; GISEL-NEXT: mov w9, #223 ; =0xdf ; GISEL-NEXT: LBB2_2: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w0, #0xff +; GISEL-NEXT: tst w0, w9 ; GISEL-NEXT: b.eq LBB2_7 ; GISEL-NEXT: ; %bb.3: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: cmp w10, #32 -; GISEL-NEXT: b.eq LBB2_7 -; GISEL-NEXT: ; %bb.4: ; %loop.header -; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 +; GISEL-NEXT: and w10, w0, #0xff ; GISEL-NEXT: cmp w10, #124 ; GISEL-NEXT: b.eq LBB2_8 -; GISEL-NEXT: ; %bb.5: ; %loop.latch +; GISEL-NEXT: ; %bb.4: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: subs w9, w9, #1 +; GISEL-NEXT: subs w8, w8, #1 ; GISEL-NEXT: b.ne LBB2_2 -; GISEL-NEXT: ; %bb.6: +; GISEL-NEXT: ; %bb.5: ; GISEL-NEXT: mov w8, #10 ; =0xa -; GISEL-NEXT: LBB2_7: ; %common.ret +; GISEL-NEXT: LBB2_6: ; %common.ret ; GISEL-NEXT: mov w0, w8 ; GISEL-NEXT: ret +; GISEL-NEXT: LBB2_7: +; GISEL-NEXT: mov w0, #20 ; =0x14 +; GISEL-NEXT: ret ; GISEL-NEXT: LBB2_8: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e ; GISEL-NEXT: ret @@ -361,21 +348,18 @@ define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) { ; CHECK-NEXT: LBB4_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb w9, [x8], #1 -; CHECK-NEXT: cbz w9, LBB4_4 +; CHECK-NEXT: tst w9, #0xffffffdf +; CHECK-NEXT: b.eq LBB4_4 ; CHECK-NEXT: ; %bb.2: ; %loop ; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1 ; CHECK-NEXT: cmp w9, #124 -; CHECK-NEXT: b.eq LBB4_5 -; CHECK-NEXT: ; %bb.3: ; %loop -; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-NEXT: cmp w9, #32 ; CHECK-NEXT: b.ne LBB4_1 +; CHECK-NEXT: ; %bb.3: ; %e2.loopexit +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret ; CHECK-NEXT: LBB4_4: ; %e1 ; CHECK-NEXT: mov w0, #-1 ; =0xffffffff ; CHECK-NEXT: ret -; CHECK-NEXT: LBB4_5: ; %e2.loopexit -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_in_loop_with_matching_dests_0_and_pow2_3_cases: ; GISEL: ; %bb.0: ; %entry @@ -383,21 +367,18 @@ define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) { ; GISEL-NEXT: LBB4_1: ; %loop ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: ldrb w9, [x8], #1 -; GISEL-NEXT: cbz w9, LBB4_4 +; GISEL-NEXT: tst w9, #0xffffffdf +; GISEL-NEXT: b.eq LBB4_4 ; GISEL-NEXT: ; %bb.2: ; %loop ; GISEL-NEXT: ; in Loop: Header=BB4_1 Depth=1 ; GISEL-NEXT: cmp w9, #124 -; GISEL-NEXT: b.eq LBB4_5 -; GISEL-NEXT: ; %bb.3: ; %loop -; GISEL-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; GISEL-NEXT: cmp w9, #32 ; GISEL-NEXT: b.ne LBB4_1 +; GISEL-NEXT: ; %bb.3: ; %e2.loopexit +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret ; GISEL-NEXT: LBB4_4: ; %e1 ; GISEL-NEXT: mov w0, #-1 ; =0xffffffff ; GISEL-NEXT: ret -; GISEL-NEXT: LBB4_5: ; %e2.loopexit -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: ret entry: br label %loop @@ -581,13 +562,9 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) { ; GISEL-NEXT: ldr w8, [x0] ; GISEL-NEXT: str wzr, [x0] ; GISEL-NEXT: mov x0, x1 -; GISEL-NEXT: cbz w8, LBB7_1 -; GISEL-NEXT: ; %bb.2: ; %loop -; GISEL-NEXT: ; in Loop: Header=BB7_1 Depth=1 -; GISEL-NEXT: cmp w8, #4 -; GISEL-NEXT: mov x0, x1 +; GISEL-NEXT: tst w8, #0xfffffffb ; GISEL-NEXT: b.eq LBB7_1 -; GISEL-NEXT: ; %bb.3: ; %exit +; GISEL-NEXT: ; %bb.2: ; %exit ; GISEL-NEXT: ret entry: br label %loop @@ -842,42 +819,40 @@ e1: define void @merge_with_stores(ptr %A, i16 %v) { ; CHECK-LABEL: merge_with_stores: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sub w9, w8, #10 -; CHECK-NEXT: cmp w9, #2 -; CHECK-NEXT: b.lo LBB11_4 +; CHECK-NEXT: mov w8, #65533 ; =0xfffd +; CHECK-NEXT: tst w1, w8 +; CHECK-NEXT: b.eq LBB11_3 ; CHECK-NEXT: ; %bb.1: ; %entry -; CHECK-NEXT: cbz w8, LBB11_5 -; CHECK-NEXT: ; %bb.2: ; %entry +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: sub w8, w8, #10 ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.eq LBB11_5 -; CHECK-NEXT: ; %bb.3: ; %default.dst -; CHECK-NEXT: strh wzr, [x0] -; CHECK-NEXT: ret -; CHECK-NEXT: LBB11_4: ; %other.dst +; CHECK-NEXT: b.hs LBB11_4 +; CHECK-NEXT: ; %bb.2: ; %other.dst ; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: strh w8, [x0, #36] -; CHECK-NEXT: LBB11_5: ; %pow2.dst +; CHECK-NEXT: LBB11_3: ; %pow2.dst +; CHECK-NEXT: ret +; CHECK-NEXT: LBB11_4: ; %default.dst +; CHECK-NEXT: strh wzr, [x0] ; CHECK-NEXT: ret ; ; GISEL-LABEL: merge_with_stores: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: and w8, w1, #0xffff -; GISEL-NEXT: sub w9, w8, #10 -; GISEL-NEXT: cmp w9, #1 -; GISEL-NEXT: b.ls LBB11_4 +; GISEL-NEXT: mov w8, #65533 ; =0xfffd +; GISEL-NEXT: tst w1, w8 +; GISEL-NEXT: b.eq LBB11_3 ; GISEL-NEXT: ; %bb.1: ; %entry -; GISEL-NEXT: cbz w8, LBB11_5 -; GISEL-NEXT: ; %bb.2: ; %entry -; GISEL-NEXT: cmp w8, #2 -; GISEL-NEXT: b.eq LBB11_5 -; GISEL-NEXT: ; %bb.3: ; %default.dst -; GISEL-NEXT: strh wzr, [x0] -; GISEL-NEXT: ret -; GISEL-NEXT: LBB11_4: ; %other.dst +; GISEL-NEXT: mov w8, #-10 ; =0xfffffff6 +; GISEL-NEXT: add w8, w8, w1, uxth +; GISEL-NEXT: cmp w8, #1 +; GISEL-NEXT: b.hi LBB11_4 +; GISEL-NEXT: ; %bb.2: ; %other.dst ; GISEL-NEXT: mov w8, #1 ; =0x1 ; GISEL-NEXT: strh w8, [x0, #36] -; GISEL-NEXT: LBB11_5: ; %pow2.dst +; GISEL-NEXT: LBB11_3: ; %pow2.dst +; GISEL-NEXT: ret +; GISEL-NEXT: LBB11_4: ; %default.dst +; GISEL-NEXT: strh wzr, [x0] ; GISEL-NEXT: ret entry: switch i16 %v, label %default.dst [