Revert "[SLP] Make sure instructions are ordered when computing spill cost."

fhahn · fhahn · commit 04b85e2bcbff · 2020-07-07T23:15:01.000+01:00
This seems to break http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/24371 This reverts commit eb46137.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3760,24 +3760,11 @@ int BoUpSLP::getSpillCost() const {
   SmallPtrSet<Instruction*, 4> LiveValues;
   Instruction *PrevInst = nullptr;
 
-  // The entries in VectorizableTree are not necessarily ordered by their
-  // position in basic blocks. Collect them and order them by dominance so later
-  // instructions are guaranteed to be visited first. For instructions in
-  // different basic blocks, we only scan to the beginning of the block, so
-  // their order does not matter, as long as all instructions in a basic block
-  // are grouped together. Using dominance ensures a deterministic order.
-  SmallVector<Instruction *, 16> OrderedScalars;
   for (const auto &TEPtr : VectorizableTree) {
     Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
     if (!Inst)
       continue;
-    OrderedScalars.push_back(Inst);
-  }
-  llvm::stable_sort(OrderedScalars, [this](Instruction *A, Instruction *B) {
-    return !DT->dominates(A, B);
-  });
 
-  for (Instruction *Inst : OrderedScalars) {
     if (!PrevInst) {
       PrevInst = Inst;
       continue;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-order.ll
@@ -13,19 +13,22 @@ define void @test(i64* %ptr, i64* noalias %res) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[CALL_I_I:%.*]] = call i32* @get_ptr()
+; CHECK-NEXT:    [[L_0_0:%.*]] = load i32, i32* [[CALL_I_I]], align 2
 ; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 2
+; CHECK-NEXT:    [[L_1_0:%.*]] = load i32, i32* [[GEP_1]], align 2
+; CHECK-NEXT:    [[EXT_0_0:%.*]] = zext i32 [[L_0_0]] to i64
+; CHECK-NEXT:    [[EXT_1_0:%.*]] = zext i32 [[L_1_0]] to i64
+; CHECK-NEXT:    [[SUB_1:%.*]] = sub nsw i64 [[EXT_0_0]], [[EXT_1_0]]
 ; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 1
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[CALL_I_I]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 2
+; CHECK-NEXT:    [[L_0_1:%.*]] = load i32, i32* [[GEP_2]], align 2
 ; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr i32, i32* [[CALL_I_I]], i32 3
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[GEP_1]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
-; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <2 x i64> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[RES_1:%.*]] = getelementptr i64, i64* [[RES:%.*]], i64 1
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64* [[RES]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:    [[L_1_1:%.*]] = load i32, i32* [[GEP_3]], align 2
+; CHECK-NEXT:    [[EXT_0_1:%.*]] = zext i32 [[L_0_1]] to i64
+; CHECK-NEXT:    [[EXT_1_1:%.*]] = zext i32 [[L_1_1]] to i64
+; CHECK-NEXT:    [[SUB_2:%.*]] = sub nsw i64 [[EXT_0_1]], [[EXT_1_1]]
+; CHECK-NEXT:    store i64 [[SUB_1]], i64* [[RES:%.*]], align 8
+; CHECK-NEXT:    [[RES_1:%.*]] = getelementptr i64, i64* [[RES]], i64 1
+; CHECK-NEXT:    store i64 [[SUB_2]], i64* [[RES_1]], align 8
 ; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C]], label [[FOR_BODY]], label [[EXIT:%.*]]
 ; CHECK:       exit: