@@ -2666,24 +2666,74 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
2666
2666
}
2667
2667
case Instruction::Store: {
2668
2668
// Check if the stores are consecutive or if we need to swizzle them.
2669
- for (unsigned i = 0 , e = VL.size () - 1 ; i < e; ++i)
2670
- if (!isConsecutiveAccess (VL[i], VL[i + 1 ], *DL, *SE)) {
2669
+ llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand ()->getType ();
2670
+ // Make sure all stores in the bundle are simple - we can't vectorize
2671
+ // atomic or volatile stores.
2672
+ SmallVector<Value *, 4 > PointerOps (VL.size ());
2673
+ ValueList Operands (VL.size ());
2674
+ auto POIter = PointerOps.begin ();
2675
+ auto OIter = Operands.begin ();
2676
+ for (Value *V : VL) {
2677
+ auto *SI = cast<StoreInst>(V);
2678
+ if (!SI->isSimple ()) {
2671
2679
BS.cancelScheduling (VL, VL0);
2672
2680
newTreeEntry (VL, None /* not vectorized*/ , S, UserTreeIdx,
2673
2681
ReuseShuffleIndicies);
2674
- LLVM_DEBUG (dbgs () << " SLP: Non-consecutive store .\n " );
2682
+ LLVM_DEBUG (dbgs () << " SLP: Gathering non-simple stores .\n " );
2675
2683
return ;
2676
2684
}
2685
+ *POIter = SI->getPointerOperand ();
2686
+ *OIter = SI->getValueOperand ();
2687
+ ++POIter;
2688
+ ++OIter;
2689
+ }
2677
2690
2678
- TreeEntry *TE = newTreeEntry (VL, Bundle /* vectorized*/ , S, UserTreeIdx,
2679
- ReuseShuffleIndicies);
2680
- LLVM_DEBUG (dbgs () << " SLP: added a vector of stores.\n " );
2691
+ OrdersType CurrentOrder;
2692
+ // Check the order of pointer operands.
2693
+ if (llvm::sortPtrAccesses (PointerOps, *DL, *SE, CurrentOrder)) {
2694
+ Value *Ptr0;
2695
+ Value *PtrN;
2696
+ if (CurrentOrder.empty ()) {
2697
+ Ptr0 = PointerOps.front ();
2698
+ PtrN = PointerOps.back ();
2699
+ } else {
2700
+ Ptr0 = PointerOps[CurrentOrder.front ()];
2701
+ PtrN = PointerOps[CurrentOrder.back ()];
2702
+ }
2703
+ const SCEV *Scev0 = SE->getSCEV (Ptr0);
2704
+ const SCEV *ScevN = SE->getSCEV (PtrN);
2705
+ const auto *Diff =
2706
+ dyn_cast<SCEVConstant>(SE->getMinusSCEV (ScevN, Scev0));
2707
+ uint64_t Size = DL->getTypeAllocSize (ScalarTy);
2708
+ // Check that the sorted pointer operands are consecutive.
2709
+ if (Diff && Diff->getAPInt () == (VL.size () - 1 ) * Size) {
2710
+ if (CurrentOrder.empty ()) {
2711
+ // Original stores are consecutive and does not require reordering.
2712
+ ++NumOpsWantToKeepOriginalOrder;
2713
+ TreeEntry *TE = newTreeEntry (VL, Bundle /* vectorized*/ , S,
2714
+ UserTreeIdx, ReuseShuffleIndicies);
2715
+ TE->setOperandsInOrder ();
2716
+ buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2717
+ LLVM_DEBUG (dbgs () << " SLP: added a vector of stores.\n " );
2718
+ } else {
2719
+ // Need to reorder.
2720
+ auto I = NumOpsWantToKeepOrder.try_emplace (CurrentOrder).first ;
2721
+ ++(I->getSecond ());
2722
+ TreeEntry *TE =
2723
+ newTreeEntry (VL, Bundle /* vectorized*/ , S, UserTreeIdx,
2724
+ ReuseShuffleIndicies, I->getFirst ());
2725
+ TE->setOperandsInOrder ();
2726
+ buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2727
+ LLVM_DEBUG (dbgs () << " SLP: added a vector of jumbled stores.\n " );
2728
+ }
2729
+ return ;
2730
+ }
2731
+ }
2681
2732
2682
- ValueList Operands;
2683
- for (Value *V : VL)
2684
- Operands.push_back (cast<Instruction>(V)->getOperand (0 ));
2685
- TE->setOperandsInOrder ();
2686
- buildTree_rec (Operands, Depth + 1 , {TE, 0 });
2733
+ BS.cancelScheduling (VL, VL0);
2734
+ newTreeEntry (VL, None /* not vectorized*/ , S, UserTreeIdx,
2735
+ ReuseShuffleIndicies);
2736
+ LLVM_DEBUG (dbgs () << " SLP: Non-consecutive store.\n " );
2687
2737
return ;
2688
2738
}
2689
2739
case Instruction::Call: {
@@ -3181,15 +3231,23 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
3181
3231
}
3182
3232
case Instruction::Store: {
3183
3233
// We know that we can merge the stores. Calculate the cost.
3184
- MaybeAlign alignment (cast<StoreInst>(VL0)->getAlignment ());
3234
+ bool IsReorder = !E->ReorderIndices .empty ();
3235
+ auto *SI =
3236
+ cast<StoreInst>(IsReorder ? VL[E->ReorderIndices .front ()] : VL0);
3237
+ MaybeAlign Alignment (SI->getAlignment ());
3185
3238
int ScalarEltCost =
3186
- TTI->getMemoryOpCost (Instruction::Store, ScalarTy, alignment , 0 , VL0);
3239
+ TTI->getMemoryOpCost (Instruction::Store, ScalarTy, Alignment , 0 , VL0);
3187
3240
if (NeedToShuffleReuses) {
3188
3241
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size ()) * ScalarEltCost;
3189
3242
}
3190
3243
int ScalarStCost = VecTy->getNumElements () * ScalarEltCost;
3191
- int VecStCost =
3192
- TTI->getMemoryOpCost (Instruction::Store, VecTy, alignment, 0 , VL0);
3244
+ int VecStCost = TTI->getMemoryOpCost (Instruction::Store,
3245
+ VecTy, Alignment, 0 , VL0);
3246
+ if (IsReorder) {
3247
+ // TODO: Merge this shuffle with the ReuseShuffleCost.
3248
+ VecStCost += TTI->getShuffleCost (
3249
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
3250
+ }
3193
3251
return ReuseShuffleCost + VecStCost - ScalarStCost;
3194
3252
}
3195
3253
case Instruction::Call: {
@@ -4051,13 +4109,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
4051
4109
return V;
4052
4110
}
4053
4111
case Instruction::Store: {
4054
- StoreInst *SI = cast<StoreInst>(VL0);
4112
+ bool IsReorder = !E->ReorderIndices .empty ();
4113
+ auto *SI = cast<StoreInst>(
4114
+ IsReorder ? E->Scalars [E->ReorderIndices .front ()] : VL0);
4055
4115
unsigned Alignment = SI->getAlignment ();
4056
4116
unsigned AS = SI->getPointerAddressSpace ();
4057
4117
4058
4118
setInsertPointAfterBundle (E);
4059
4119
4060
4120
Value *VecValue = vectorizeTree (E->getOperand (0 ));
4121
+ if (IsReorder) {
4122
+ OrdersType Mask;
4123
+ inversePermutation (E->ReorderIndices , Mask);
4124
+ VecValue = Builder.CreateShuffleVector (
4125
+ VecValue, UndefValue::get (VecValue->getType ()), E->ReorderIndices ,
4126
+ " reorder_shuffle" );
4127
+ }
4061
4128
Value *ScalarPtr = SI->getPointerOperand ();
4062
4129
Value *VecPtr = Builder.CreateBitCast (ScalarPtr, VecTy->getPointerTo (AS));
4063
4130
StoreInst *ST = Builder.CreateStore (VecValue, VecPtr);
@@ -5347,6 +5414,14 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
5347
5414
<< " \n " );
5348
5415
5349
5416
R.buildTree (Chain);
5417
+ Optional<ArrayRef<unsigned >> Order = R.bestOrder ();
5418
+ if (Order) {
5419
+ // TODO: reorder tree nodes without tree rebuilding.
5420
+ SmallVector<Value *, 4 > ReorderedOps (Chain.rbegin (), Chain.rend ());
5421
+ llvm::transform (*Order, ReorderedOps.begin (),
5422
+ [Chain](const unsigned Idx) { return Chain[Idx]; });
5423
+ R.buildTree (ReorderedOps);
5424
+ }
5350
5425
if (R.isTreeTinyAndNotFullyVectorizable ())
5351
5426
return false ;
5352
5427
0 commit comments