Skip to content

Commit ffd8a26

Browse files
committed
Revert "[NFC] factor out unrolling decision logic"
This patch added a requirement for C++17, while LLVM is supposed to build with C++14 (https://llvm.org/docs/CodingStandards.html#c-standard-versions). Posted a note to the original review thread (https://reviews.llvm.org/D106001). This reverts commit 4d55983. Differential Revision: https://reviews.llvm.org/D108314
1 parent 9dbc968 commit ffd8a26

File tree

1 file changed

+111
-199
lines changed

1 file changed

+111
-199
lines changed

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Lines changed: 111 additions & 199 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@
6565
#include <cassert>
6666
#include <cstdint>
6767
#include <limits>
68-
#include <optional>
6968
#include <string>
7069
#include <tuple>
7170
#include <utility>
@@ -320,16 +319,6 @@ struct EstimatedUnrollCost {
320319
unsigned RolledDynamicCost;
321320
};
322321

323-
struct PragmaInfo {
324-
PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
325-
: UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
326-
PragmaEnableUnroll(PEU) {}
327-
const bool UserUnrollCount;
328-
const bool PragmaFullUnroll;
329-
const unsigned PragmaCount;
330-
const bool PragmaEnableUnroll;
331-
};
332-
333322
} // end anonymous namespace
334323

335324
/// Figure out if the loop is worth full unrolling.
@@ -758,132 +747,13 @@ class UnrollCostEstimator {
758747

759748
// Returns loop size estimation for unrolled loop, given the unrolling
760749
// configuration specified by UP.
761-
uint64_t
762-
getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
763-
const unsigned CountOverwrite = 0) const {
750+
uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
764751
assert(LoopSize >= UP.BEInsns &&
765752
"LoopSize should not be less than BEInsns!");
766-
if (CountOverwrite)
767-
return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
768-
UP.BEInsns;
769-
else
770-
return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
771-
UP.BEInsns;
753+
return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
772754
}
773755
};
774756

775-
static Optional<unsigned>
776-
shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
777-
const unsigned TripMultiple, const unsigned TripCount,
778-
const UnrollCostEstimator UCE,
779-
const TargetTransformInfo::UnrollingPreferences &UP) {
780-
781-
// Using unroll pragma
782-
// 1st priority is unroll count set by "unroll-count" option.
783-
784-
if (PInfo.UserUnrollCount) {
785-
if (UP.AllowRemainder &&
786-
UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
787-
return (unsigned)UnrollCount;
788-
}
789-
790-
// 2nd priority is unroll count set by pragma.
791-
if (PInfo.PragmaCount > 0) {
792-
if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
793-
UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
794-
return PInfo.PragmaCount;
795-
}
796-
797-
if (PInfo.PragmaFullUnroll && TripCount != 0) {
798-
if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
799-
return TripCount;
800-
}
801-
// if didn't return until here, should continue to other priorties
802-
return None;
803-
}
804-
805-
static Optional<unsigned> shouldFullUnroll(
806-
Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
807-
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
808-
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
809-
const TargetTransformInfo::UnrollingPreferences &UP) {
810-
811-
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
812-
// When computing the unrolled size, note that BEInsns are not replicated
813-
// like the rest of the loop body.
814-
if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
815-
return FullUnrollTripCount;
816-
817-
} else {
818-
// The loop isn't that small, but we still can fully unroll it if that
819-
// helps to remove a significant number of instructions.
820-
// To check that, run additional analysis on the loop.
821-
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
822-
L, FullUnrollTripCount, DT, SE, EphValues, TTI,
823-
UP.Threshold * UP.MaxPercentThresholdBoost / 100,
824-
UP.MaxIterationsCountToAnalyze)) {
825-
unsigned Boost =
826-
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
827-
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
828-
return FullUnrollTripCount;
829-
}
830-
}
831-
}
832-
}
833-
return None;
834-
}
835-
836-
static Optional<unsigned>
837-
shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
838-
const UnrollCostEstimator UCE,
839-
const TargetTransformInfo::UnrollingPreferences &UP) {
840-
841-
unsigned count = UP.Count;
842-
if (TripCount) {
843-
if (!UP.Partial) {
844-
LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
845-
<< "-unroll-allow-partial not given\n");
846-
count = 0;
847-
return count;
848-
}
849-
if (count == 0)
850-
count = TripCount;
851-
if (UP.PartialThreshold != NoThreshold) {
852-
// Reduce unroll count to be modulo of TripCount for partial unrolling.
853-
if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
854-
count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
855-
(LoopSize - UP.BEInsns);
856-
if (count > UP.MaxCount)
857-
count = UP.MaxCount;
858-
while (count != 0 && TripCount % count != 0)
859-
count--;
860-
if (UP.AllowRemainder && count <= 1) {
861-
// If there is no Count that is modulo of TripCount, set Count to
862-
// largest power-of-two factor that satisfies the threshold limit.
863-
// As we'll create fixup loop, do the type of unrolling only if
864-
// remainder loop is allowed.
865-
count = UP.DefaultUnrollRuntimeCount;
866-
while (count != 0 &&
867-
UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
868-
count >>= 1;
869-
}
870-
if (count < 2) {
871-
count = 0;
872-
}
873-
} else {
874-
count = TripCount;
875-
}
876-
if (count > UP.MaxCount)
877-
count = UP.MaxCount;
878-
879-
LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
880-
881-
return count;
882-
}
883-
884-
// if didn't return until here, should continue to other priorties
885-
return None;
886-
}
887757
// Returns true if unroll count was set explicitly.
888758
// Calculates unroll count and writes it to UP.Count.
889759
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -901,18 +771,7 @@ bool llvm::computeUnrollCount(
901771
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
902772

903773
UnrollCostEstimator UCE(*L, LoopSize);
904-
Optional<unsigned> UnrollFactor;
905-
906-
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
907-
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
908-
const unsigned PragmaCount = unrollCountPragmaValue(L);
909-
const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
910-
911-
const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
912-
PragmaEnableUnroll || UserUnrollCount;
913774

914-
PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
915-
PragmaEnableUnroll);
916775
// Use an explicit peel count that has been specified for testing. In this
917776
// case it's not permitted to also specify an explicit unroll count.
918777
if (PP.PeelCount) {
@@ -924,29 +783,47 @@ bool llvm::computeUnrollCount(
924783
UP.Runtime = false;
925784
return true;
926785
}
786+
927787
// Check for explicit Count.
928788
// 1st priority is unroll count set by "unroll-count" option.
929-
// 2nd priority is unroll count set by pragma.
930-
UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
789+
bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
790+
if (UserUnrollCount) {
791+
UP.Count = UnrollCount;
792+
UP.AllowExpensiveTripCount = true;
793+
UP.Force = true;
794+
if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
795+
return true;
796+
}
931797

932-
if (UnrollFactor) {
933-
UP.Count = *UnrollFactor;
798+
// 2nd priority is unroll count set by pragma.
799+
unsigned PragmaCount = unrollCountPragmaValue(L);
800+
if (PragmaCount > 0) {
801+
UP.Count = PragmaCount;
802+
UP.Runtime = true;
803+
UP.AllowExpensiveTripCount = true;
804+
UP.Force = true;
805+
if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
806+
UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
807+
return true;
808+
}
809+
bool PragmaFullUnroll = hasUnrollFullPragma(L);
810+
if (PragmaFullUnroll && TripCount != 0) {
811+
UP.Count = TripCount;
812+
if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
813+
return false;
814+
}
934815

935-
if (UserUnrollCount || (PragmaCount > 0)) {
936-
UP.AllowExpensiveTripCount = true;
937-
UP.Force = true;
938-
}
939-
UP.Runtime |= (PragmaCount > 0);
940-
return ExplicitUnroll;
941-
} else {
942-
if (ExplicitUnroll && TripCount != 0) {
943-
// If the loop has an unrolling pragma, we want to be more aggressive with
944-
// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
945-
// value which is larger than the default limits.
946-
UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
947-
UP.PartialThreshold =
948-
std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
949-
}
816+
bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
817+
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
818+
PragmaEnableUnroll || UserUnrollCount;
819+
820+
if (ExplicitUnroll && TripCount != 0) {
821+
// If the loop has an unrolling pragma, we want to be more aggressive with
822+
// unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
823+
// value which is larger than the default limits.
824+
UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
825+
UP.PartialThreshold =
826+
std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
950827
}
951828

952829
// 3rd priority is full unroll count.
@@ -976,20 +853,28 @@ bool llvm::computeUnrollCount(
976853
unsigned FullUnrollTripCount =
977854
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
978855
UP.Count = FullUnrollTripCount;
979-
980-
UnrollFactor =
981-
shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
982-
983-
// if shouldFullUnroll can do the unrolling, some side parameteres should be
984-
// set
985-
if (UnrollFactor) {
986-
UP.Count = *UnrollFactor;
987-
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
988-
TripCount = FullUnrollTripCount;
989-
TripMultiple = UP.UpperBound ? 1 : TripMultiple;
990-
return ExplicitUnroll;
991-
} else {
992-
UP.Count = FullUnrollTripCount;
856+
if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
857+
// When computing the unrolled size, note that BEInsns are not replicated
858+
// like the rest of the loop body.
859+
if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
860+
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
861+
return ExplicitUnroll;
862+
} else {
863+
// The loop isn't that small, but we still can fully unroll it if that
864+
// helps to remove a significant number of instructions.
865+
// To check that, run additional analysis on the loop.
866+
if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
867+
L, FullUnrollTripCount, DT, SE, EphValues, TTI,
868+
UP.Threshold * UP.MaxPercentThresholdBoost / 100,
869+
UP.MaxIterationsCountToAnalyze)) {
870+
unsigned Boost =
871+
getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
872+
if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
873+
UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
874+
return ExplicitUnroll;
875+
}
876+
}
877+
}
993878
}
994879

995880
// 4th priority is loop peeling.
@@ -1000,31 +885,39 @@ bool llvm::computeUnrollCount(
1000885
return ExplicitUnroll;
1001886
}
1002887

1003-
// Before starting partial unrolling, set up.partial to true,
1004-
// if user explicitly asked for unrolling
1005-
if (TripCount)
1006-
UP.Partial |= ExplicitUnroll;
1007-
1008888
// 5th priority is partial unrolling.
1009889
// Try partial unroll only when TripCount could be statically calculated.
1010-
UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
1011-
1012-
if (UnrollFactor) {
1013-
UP.Count = *UnrollFactor;
1014-
1015-
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1016-
UP.Count != TripCount)
1017-
ORE->emit([&]() {
1018-
return OptimizationRemarkMissed(DEBUG_TYPE,
1019-
"FullUnrollAsDirectedTooLarge",
1020-
L->getStartLoc(), L->getHeader())
1021-
<< "Unable to fully unroll loop as directed by unroll pragma "
1022-
"because "
1023-
"unrolled size is too large.";
1024-
});
1025-
890+
if (TripCount) {
891+
UP.Partial |= ExplicitUnroll;
892+
if (!UP.Partial) {
893+
LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
894+
<< "-unroll-allow-partial not given\n");
895+
UP.Count = 0;
896+
return false;
897+
}
898+
if (UP.Count == 0)
899+
UP.Count = TripCount;
1026900
if (UP.PartialThreshold != NoThreshold) {
1027-
if (UP.Count == 0) {
901+
// Reduce unroll count to be modulo of TripCount for partial unrolling.
902+
if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
903+
UP.Count =
904+
(std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
905+
(LoopSize - UP.BEInsns);
906+
if (UP.Count > UP.MaxCount)
907+
UP.Count = UP.MaxCount;
908+
while (UP.Count != 0 && TripCount % UP.Count != 0)
909+
UP.Count--;
910+
if (UP.AllowRemainder && UP.Count <= 1) {
911+
// If there is no Count that is modulo of TripCount, set Count to
912+
// largest power-of-two factor that satisfies the threshold limit.
913+
// As we'll create fixup loop, do the type of unrolling only if
914+
// remainder loop is allowed.
915+
UP.Count = UP.DefaultUnrollRuntimeCount;
916+
while (UP.Count != 0 &&
917+
UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
918+
UP.Count >>= 1;
919+
}
920+
if (UP.Count < 2) {
1028921
if (PragmaEnableUnroll)
1029922
ORE->emit([&]() {
1030923
return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -1034,8 +927,25 @@ bool llvm::computeUnrollCount(
1034927
"pragma "
1035928
"because unrolled size is too large.";
1036929
});
930+
UP.Count = 0;
1037931
}
932+
} else {
933+
UP.Count = TripCount;
1038934
}
935+
if (UP.Count > UP.MaxCount)
936+
UP.Count = UP.MaxCount;
937+
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
938+
UP.Count != TripCount)
939+
ORE->emit([&]() {
940+
return OptimizationRemarkMissed(DEBUG_TYPE,
941+
"FullUnrollAsDirectedTooLarge",
942+
L->getStartLoc(), L->getHeader())
943+
<< "Unable to fully unroll loop as directed by unroll pragma "
944+
"because "
945+
"unrolled size is too large.";
946+
});
947+
LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
948+
<< "\n");
1039949
return ExplicitUnroll;
1040950
}
1041951
assert(TripCount == 0 &&
@@ -1072,6 +982,8 @@ bool llvm::computeUnrollCount(
1072982
UP.AllowExpensiveTripCount = true;
1073983
}
1074984
}
985+
986+
// Reduce count based on the type of unrolling and the threshold values.
1075987
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1076988
if (!UP.Runtime) {
1077989
LLVM_DEBUG(
@@ -1106,7 +1018,7 @@ bool llvm::computeUnrollCount(
11061018

11071019
using namespace ore;
11081020

1109-
if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
1021+
if (PragmaCount > 0 && !UP.AllowRemainder)
11101022
ORE->emit([&]() {
11111023
return OptimizationRemarkMissed(DEBUG_TYPE,
11121024
"DifferentUnrollCountFromDirected",

0 commit comments

Comments
 (0)