@@ -2666,74 +2666,24 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
2666
2666
}
2667
2667
case Instruction::Store: {
2668
2668
// Check if the stores are consecutive or if we need to swizzle them.
2669
- llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand ()->getType ();
2670
- // Make sure all stores in the bundle are simple - we can't vectorize
2671
- // atomic or volatile stores.
2672
- SmallVector<Value *, 4 > PointerOps (VL.size ());
2673
- ValueList Operands (VL.size ());
2674
- auto POIter = PointerOps.begin ();
2675
- auto OIter = Operands.begin ();
2676
- for (Value *V : VL) {
2677
- auto *SI = cast<StoreInst>(V);
2678
- if (!SI->isSimple ()) {
2669
+ for (unsigned i = 0 , e = VL.size () - 1 ; i < e; ++i)
2670
+ if (!isConsecutiveAccess (VL[i], VL[i + 1 ], *DL, *SE)) {
2679
2671
BS.cancelScheduling (VL, VL0);
2680
2672
newTreeEntry (VL, None /* not vectorized*/ , S, UserTreeIdx,
2681
2673
ReuseShuffleIndicies);
2682
- LLVM_DEBUG (dbgs () << " SLP: Gathering non-simple stores .\n " );
2674
+ LLVM_DEBUG (dbgs () << " SLP: Non-consecutive store .\n " );
2683
2675
return ;
2684
2676
}
2685
- *POIter = SI->getPointerOperand ();
2686
- *OIter = SI->getValueOperand ();
2687
- ++POIter;
2688
- ++OIter;
2689
- }
2690
2677
2691
- OrdersType CurrentOrder;
2692
- // Check the order of pointer operands.
2693
- if (llvm::sortPtrAccesses (PointerOps, *DL, *SE, CurrentOrder)) {
2694
- Value *Ptr0;
2695
- Value *PtrN;
2696
- if (CurrentOrder.empty ()) {
2697
- Ptr0 = PointerOps.front ();
2698
- PtrN = PointerOps.back ();
2699
- } else {
2700
- Ptr0 = PointerOps[CurrentOrder.front ()];
2701
- PtrN = PointerOps[CurrentOrder.back ()];
2702
- }
2703
- const SCEV *Scev0 = SE->getSCEV (Ptr0);
2704
- const SCEV *ScevN = SE->getSCEV (PtrN);
2705
- const auto *Diff =
2706
- dyn_cast<SCEVConstant>(SE->getMinusSCEV (ScevN, Scev0));
2707
- uint64_t Size = DL->getTypeAllocSize (ScalarTy);
2708
- // Check that the sorted pointer operands are consecutive.
2709
- if (Diff && Diff->getAPInt () == (VL.size () - 1 ) * Size) {
2710
- if (CurrentOrder.empty ()) {
2711
- // Original stores are consecutive and does not require reordering.
2712
- ++NumOpsWantToKeepOriginalOrder;
2713
- TreeEntry *TE = newTreeEntry (VL, Bundle /* vectorized*/ , S,
2714
- UserTreeIdx, ReuseShuffleIndicies);
2715
- TE->setOperandsInOrder ();
2716
- buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2717
- LLVM_DEBUG (dbgs () << " SLP: added a vector of stores.\n " );
2718
- } else {
2719
- // Need to reorder.
2720
- auto I = NumOpsWantToKeepOrder.try_emplace (CurrentOrder).first ;
2721
- ++(I->getSecond ());
2722
- TreeEntry *TE =
2723
- newTreeEntry (VL, Bundle /* vectorized*/ , S, UserTreeIdx,
2724
- ReuseShuffleIndicies, I->getFirst ());
2725
- TE->setOperandsInOrder ();
2726
- buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2727
- LLVM_DEBUG (dbgs () << " SLP: added a vector of jumbled stores.\n " );
2728
- }
2729
- return ;
2730
- }
2731
- }
2678
+ TreeEntry *TE = newTreeEntry (VL, Bundle /* vectorized*/ , S, UserTreeIdx,
2679
+ ReuseShuffleIndicies);
2680
+ LLVM_DEBUG (dbgs () << " SLP: added a vector of stores.\n " );
2732
2681
2733
- BS.cancelScheduling (VL, VL0);
2734
- newTreeEntry (VL, None /* not vectorized*/ , S, UserTreeIdx,
2735
- ReuseShuffleIndicies);
2736
- LLVM_DEBUG (dbgs () << " SLP: Non-consecutive store.\n " );
2682
+ ValueList Operands;
2683
+ for (Value *V : VL)
2684
+ Operands.push_back (cast<Instruction>(V)->getOperand (0 ));
2685
+ TE->setOperandsInOrder ();
2686
+ buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2737
2687
return ;
2738
2688
}
2739
2689
case Instruction::Call: {
@@ -3231,23 +3181,15 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
3231
3181
}
3232
3182
case Instruction::Store: {
3233
3183
// We know that we can merge the stores. Calculate the cost.
3234
- bool IsReorder = !E->ReorderIndices .empty ();
3235
- auto *SI =
3236
- cast<StoreInst>(IsReorder ? VL[E->ReorderIndices .front ()] : VL0);
3237
- MaybeAlign Alignment (SI->getAlignment ());
3184
+ MaybeAlign alignment (cast<StoreInst>(VL0)->getAlignment ());
3238
3185
int ScalarEltCost =
3239
- TTI->getMemoryOpCost (Instruction::Store, ScalarTy, Alignment , 0 , VL0);
3186
+ TTI->getMemoryOpCost (Instruction::Store, ScalarTy, alignment , 0 , VL0);
3240
3187
if (NeedToShuffleReuses) {
3241
3188
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size ()) * ScalarEltCost;
3242
3189
}
3243
3190
int ScalarStCost = VecTy->getNumElements () * ScalarEltCost;
3244
- int VecStCost = TTI->getMemoryOpCost (Instruction::Store,
3245
- VecTy, Alignment, 0 , VL0);
3246
- if (IsReorder) {
3247
- // TODO: Merge this shuffle with the ReuseShuffleCost.
3248
- VecStCost += TTI->getShuffleCost (
3249
- TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
3250
- }
3191
+ int VecStCost =
3192
+ TTI->getMemoryOpCost (Instruction::Store, VecTy, alignment, 0 , VL0);
3251
3193
return ReuseShuffleCost + VecStCost - ScalarStCost;
3252
3194
}
3253
3195
case Instruction::Call: {
@@ -4109,22 +4051,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
4109
4051
return V;
4110
4052
}
4111
4053
case Instruction::Store: {
4112
- bool IsReorder = !E->ReorderIndices .empty ();
4113
- auto *SI = cast<StoreInst>(
4114
- IsReorder ? E->Scalars [E->ReorderIndices .front ()] : VL0);
4054
+ StoreInst *SI = cast<StoreInst>(VL0);
4115
4055
unsigned Alignment = SI->getAlignment ();
4116
4056
unsigned AS = SI->getPointerAddressSpace ();
4117
4057
4118
4058
setInsertPointAfterBundle (E);
4119
4059
4120
4060
Value *VecValue = vectorizeTree (E->getOperand (0 ));
4121
- if (IsReorder) {
4122
- OrdersType Mask;
4123
- inversePermutation (E->ReorderIndices , Mask);
4124
- VecValue = Builder.CreateShuffleVector (
4125
- VecValue, UndefValue::get (VecValue->getType ()), E->ReorderIndices ,
4126
- " reorder_shuffle" );
4127
- }
4128
4061
Value *ScalarPtr = SI->getPointerOperand ();
4129
4062
Value *VecPtr = Builder.CreateBitCast (ScalarPtr, VecTy->getPointerTo (AS));
4130
4063
StoreInst *ST = Builder.CreateStore (VecValue, VecPtr);
@@ -5414,14 +5347,6 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
5414
5347
<< " \n " );
5415
5348
5416
5349
R.buildTree (Chain);
5417
- Optional<ArrayRef<unsigned >> Order = R.bestOrder ();
5418
- if (Order) {
5419
- // TODO: reorder tree nodes without tree rebuilding.
5420
- SmallVector<Value *, 4 > ReorderedOps (Chain.rbegin (), Chain.rend ());
5421
- llvm::transform (*Order, ReorderedOps.begin (),
5422
- [Chain](const unsigned Idx) { return Chain[Idx]; });
5423
- R.buildTree (ReorderedOps);
5424
- }
5425
5350
if (R.isTreeTinyAndNotFullyVectorizable ())
5426
5351
return false ;
5427
5352
0 commit comments