65
65
#include < cassert>
66
66
#include < cstdint>
67
67
#include < limits>
68
- #include < optional>
69
68
#include < string>
70
69
#include < tuple>
71
70
#include < utility>
@@ -320,16 +319,6 @@ struct EstimatedUnrollCost {
320
319
unsigned RolledDynamicCost;
321
320
};
322
321
323
- struct PragmaInfo {
324
- PragmaInfo (bool UUC, bool PFU, unsigned PC, bool PEU)
325
- : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
326
- PragmaEnableUnroll (PEU) {}
327
- const bool UserUnrollCount;
328
- const bool PragmaFullUnroll;
329
- const unsigned PragmaCount;
330
- const bool PragmaEnableUnroll;
331
- };
332
-
333
322
} // end anonymous namespace
334
323
335
324
// / Figure out if the loop is worth full unrolling.
@@ -758,132 +747,13 @@ class UnrollCostEstimator {
758
747
759
748
// Returns loop size estimation for unrolled loop, given the unrolling
760
749
// configuration specified by UP.
761
- uint64_t
762
- getUnrolledLoopSize (const TargetTransformInfo::UnrollingPreferences &UP,
763
- const unsigned CountOverwrite = 0 ) const {
750
+ uint64_t getUnrolledLoopSize (TargetTransformInfo::UnrollingPreferences &UP) {
764
751
assert (LoopSize >= UP.BEInsns &&
765
752
" LoopSize should not be less than BEInsns!" );
766
- if (CountOverwrite)
767
- return static_cast <uint64_t >(LoopSize - UP.BEInsns ) * CountOverwrite +
768
- UP.BEInsns ;
769
- else
770
- return static_cast <uint64_t >(LoopSize - UP.BEInsns ) * UP.Count +
771
- UP.BEInsns ;
753
+ return (uint64_t )(LoopSize - UP.BEInsns ) * UP.Count + UP.BEInsns ;
772
754
}
773
755
};
774
756
775
- static Optional<unsigned >
776
- shouldPragmaUnroll (Loop *L, const PragmaInfo &PInfo,
777
- const unsigned TripMultiple, const unsigned TripCount,
778
- const UnrollCostEstimator UCE,
779
- const TargetTransformInfo::UnrollingPreferences &UP) {
780
-
781
- // Using unroll pragma
782
- // 1st priority is unroll count set by "unroll-count" option.
783
-
784
- if (PInfo.UserUnrollCount ) {
785
- if (UP.AllowRemainder &&
786
- UCE.getUnrolledLoopSize (UP, (unsigned )UnrollCount) < UP.Threshold )
787
- return (unsigned )UnrollCount;
788
- }
789
-
790
- // 2nd priority is unroll count set by pragma.
791
- if (PInfo.PragmaCount > 0 ) {
792
- if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0 )) &&
793
- UCE.getUnrolledLoopSize (UP, PInfo.PragmaCount ) < PragmaUnrollThreshold)
794
- return PInfo.PragmaCount ;
795
- }
796
-
797
- if (PInfo.PragmaFullUnroll && TripCount != 0 ) {
798
- if (UCE.getUnrolledLoopSize (UP, TripCount) < PragmaUnrollThreshold)
799
- return TripCount;
800
- }
801
- // if didn't return until here, should continue to other priorties
802
- return None;
803
- }
804
-
805
- static Optional<unsigned > shouldFullUnroll (
806
- Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
807
- ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
808
- const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
809
- const TargetTransformInfo::UnrollingPreferences &UP) {
810
-
811
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount ) {
812
- // When computing the unrolled size, note that BEInsns are not replicated
813
- // like the rest of the loop body.
814
- if (UCE.getUnrolledLoopSize (UP) < UP.Threshold ) {
815
- return FullUnrollTripCount;
816
-
817
- } else {
818
- // The loop isn't that small, but we still can fully unroll it if that
819
- // helps to remove a significant number of instructions.
820
- // To check that, run additional analysis on the loop.
821
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost (
822
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
823
- UP.Threshold * UP.MaxPercentThresholdBoost / 100 ,
824
- UP.MaxIterationsCountToAnalyze )) {
825
- unsigned Boost =
826
- getFullUnrollBoostingFactor (*Cost, UP.MaxPercentThresholdBoost );
827
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100 ) {
828
- return FullUnrollTripCount;
829
- }
830
- }
831
- }
832
- }
833
- return None;
834
- }
835
-
836
- static Optional<unsigned >
837
- shouldPartialUnroll (const unsigned LoopSize, const unsigned TripCount,
838
- const UnrollCostEstimator UCE,
839
- const TargetTransformInfo::UnrollingPreferences &UP) {
840
-
841
- unsigned count = UP.Count ;
842
- if (TripCount) {
843
- if (!UP.Partial ) {
844
- LLVM_DEBUG (dbgs () << " will not try to unroll partially because "
845
- << " -unroll-allow-partial not given\n " );
846
- count = 0 ;
847
- return count;
848
- }
849
- if (count == 0 )
850
- count = TripCount;
851
- if (UP.PartialThreshold != NoThreshold) {
852
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
853
- if (UCE.getUnrolledLoopSize (UP, count) > UP.PartialThreshold )
854
- count = (std::max (UP.PartialThreshold , UP.BEInsns + 1 ) - UP.BEInsns ) /
855
- (LoopSize - UP.BEInsns );
856
- if (count > UP.MaxCount )
857
- count = UP.MaxCount ;
858
- while (count != 0 && TripCount % count != 0 )
859
- count--;
860
- if (UP.AllowRemainder && count <= 1 ) {
861
- // If there is no Count that is modulo of TripCount, set Count to
862
- // largest power-of-two factor that satisfies the threshold limit.
863
- // As we'll create fixup loop, do the type of unrolling only if
864
- // remainder loop is allowed.
865
- count = UP.DefaultUnrollRuntimeCount ;
866
- while (count != 0 &&
867
- UCE.getUnrolledLoopSize (UP, count) > UP.PartialThreshold )
868
- count >>= 1 ;
869
- }
870
- if (count < 2 ) {
871
- count = 0 ;
872
- }
873
- } else {
874
- count = TripCount;
875
- }
876
- if (count > UP.MaxCount )
877
- count = UP.MaxCount ;
878
-
879
- LLVM_DEBUG (dbgs () << " partially unrolling with count: " << count << " \n " );
880
-
881
- return count;
882
- }
883
-
884
- // if didn't return until here, should continue to other priorties
885
- return None;
886
- }
887
757
// Returns true if unroll count was set explicitly.
888
758
// Calculates unroll count and writes it to UP.Count.
889
759
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -901,18 +771,7 @@ bool llvm::computeUnrollCount(
901
771
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
902
772
903
773
UnrollCostEstimator UCE (*L, LoopSize);
904
- Optional<unsigned > UnrollFactor;
905
-
906
- const bool UserUnrollCount = UnrollCount.getNumOccurrences () > 0 ;
907
- const bool PragmaFullUnroll = hasUnrollFullPragma (L);
908
- const unsigned PragmaCount = unrollCountPragmaValue (L);
909
- const bool PragmaEnableUnroll = hasUnrollEnablePragma (L);
910
-
911
- const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
912
- PragmaEnableUnroll || UserUnrollCount;
913
774
914
- PragmaInfo PInfo (UserUnrollCount, PragmaFullUnroll, PragmaCount,
915
- PragmaEnableUnroll);
916
775
// Use an explicit peel count that has been specified for testing. In this
917
776
// case it's not permitted to also specify an explicit unroll count.
918
777
if (PP.PeelCount ) {
@@ -924,29 +783,47 @@ bool llvm::computeUnrollCount(
924
783
UP.Runtime = false ;
925
784
return true ;
926
785
}
786
+
927
787
// Check for explicit Count.
928
788
// 1st priority is unroll count set by "unroll-count" option.
929
- // 2nd priority is unroll count set by pragma.
930
- UnrollFactor = shouldPragmaUnroll (L, PInfo, TripMultiple, TripCount, UCE, UP);
789
+ bool UserUnrollCount = UnrollCount.getNumOccurrences () > 0 ;
790
+ if (UserUnrollCount) {
791
+ UP.Count = UnrollCount;
792
+ UP.AllowExpensiveTripCount = true ;
793
+ UP.Force = true ;
794
+ if (UP.AllowRemainder && UCE.getUnrolledLoopSize (UP) < UP.Threshold )
795
+ return true ;
796
+ }
931
797
932
- if (UnrollFactor) {
933
- UP.Count = *UnrollFactor;
798
+ // 2nd priority is unroll count set by pragma.
799
+ unsigned PragmaCount = unrollCountPragmaValue (L);
800
+ if (PragmaCount > 0 ) {
801
+ UP.Count = PragmaCount;
802
+ UP.Runtime = true ;
803
+ UP.AllowExpensiveTripCount = true ;
804
+ UP.Force = true ;
805
+ if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0 )) &&
806
+ UCE.getUnrolledLoopSize (UP) < PragmaUnrollThreshold)
807
+ return true ;
808
+ }
809
+ bool PragmaFullUnroll = hasUnrollFullPragma (L);
810
+ if (PragmaFullUnroll && TripCount != 0 ) {
811
+ UP.Count = TripCount;
812
+ if (UCE.getUnrolledLoopSize (UP) < PragmaUnrollThreshold)
813
+ return false ;
814
+ }
934
815
935
- if (UserUnrollCount || (PragmaCount > 0 )) {
936
- UP.AllowExpensiveTripCount = true ;
937
- UP.Force = true ;
938
- }
939
- UP.Runtime |= (PragmaCount > 0 );
940
- return ExplicitUnroll;
941
- } else {
942
- if (ExplicitUnroll && TripCount != 0 ) {
943
- // If the loop has an unrolling pragma, we want to be more aggressive with
944
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
945
- // value which is larger than the default limits.
946
- UP.Threshold = std::max<unsigned >(UP.Threshold , PragmaUnrollThreshold);
947
- UP.PartialThreshold =
948
- std::max<unsigned >(UP.PartialThreshold , PragmaUnrollThreshold);
949
- }
816
+ bool PragmaEnableUnroll = hasUnrollEnablePragma (L);
817
+ bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
818
+ PragmaEnableUnroll || UserUnrollCount;
819
+
820
+ if (ExplicitUnroll && TripCount != 0 ) {
821
+ // If the loop has an unrolling pragma, we want to be more aggressive with
822
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
823
+ // value which is larger than the default limits.
824
+ UP.Threshold = std::max<unsigned >(UP.Threshold , PragmaUnrollThreshold);
825
+ UP.PartialThreshold =
826
+ std::max<unsigned >(UP.PartialThreshold , PragmaUnrollThreshold);
950
827
}
951
828
952
829
// 3rd priority is full unroll count.
@@ -976,20 +853,28 @@ bool llvm::computeUnrollCount(
976
853
unsigned FullUnrollTripCount =
977
854
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
978
855
UP.Count = FullUnrollTripCount;
979
-
980
- UnrollFactor =
981
- shouldFullUnroll (L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
982
-
983
- // if shouldFullUnroll can do the unrolling, some side parameteres should be
984
- // set
985
- if (UnrollFactor) {
986
- UP.Count = *UnrollFactor;
987
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
988
- TripCount = FullUnrollTripCount;
989
- TripMultiple = UP.UpperBound ? 1 : TripMultiple;
990
- return ExplicitUnroll;
991
- } else {
992
- UP.Count = FullUnrollTripCount;
856
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount ) {
857
+ // When computing the unrolled size, note that BEInsns are not replicated
858
+ // like the rest of the loop body.
859
+ if (UCE.getUnrolledLoopSize (UP) < UP.Threshold ) {
860
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
861
+ return ExplicitUnroll;
862
+ } else {
863
+ // The loop isn't that small, but we still can fully unroll it if that
864
+ // helps to remove a significant number of instructions.
865
+ // To check that, run additional analysis on the loop.
866
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost (
867
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
868
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100 ,
869
+ UP.MaxIterationsCountToAnalyze )) {
870
+ unsigned Boost =
871
+ getFullUnrollBoostingFactor (*Cost, UP.MaxPercentThresholdBoost );
872
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100 ) {
873
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
874
+ return ExplicitUnroll;
875
+ }
876
+ }
877
+ }
993
878
}
994
879
995
880
// 4th priority is loop peeling.
@@ -1000,31 +885,39 @@ bool llvm::computeUnrollCount(
1000
885
return ExplicitUnroll;
1001
886
}
1002
887
1003
- // Before starting partial unrolling, set up.partial to true,
1004
- // if user explicitly asked for unrolling
1005
- if (TripCount)
1006
- UP.Partial |= ExplicitUnroll;
1007
-
1008
888
// 5th priority is partial unrolling.
1009
889
// Try partial unroll only when TripCount could be statically calculated.
1010
- UnrollFactor = shouldPartialUnroll (LoopSize, TripCount, UCE, UP);
1011
-
1012
- if (UnrollFactor) {
1013
- UP.Count = *UnrollFactor;
1014
-
1015
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
1016
- UP.Count != TripCount)
1017
- ORE->emit ([&]() {
1018
- return OptimizationRemarkMissed (DEBUG_TYPE,
1019
- " FullUnrollAsDirectedTooLarge" ,
1020
- L->getStartLoc (), L->getHeader ())
1021
- << " Unable to fully unroll loop as directed by unroll pragma "
1022
- " because "
1023
- " unrolled size is too large." ;
1024
- });
1025
-
890
+ if (TripCount) {
891
+ UP.Partial |= ExplicitUnroll;
892
+ if (!UP.Partial ) {
893
+ LLVM_DEBUG (dbgs () << " will not try to unroll partially because "
894
+ << " -unroll-allow-partial not given\n " );
895
+ UP.Count = 0 ;
896
+ return false ;
897
+ }
898
+ if (UP.Count == 0 )
899
+ UP.Count = TripCount;
1026
900
if (UP.PartialThreshold != NoThreshold) {
1027
- if (UP.Count == 0 ) {
901
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
902
+ if (UCE.getUnrolledLoopSize (UP) > UP.PartialThreshold )
903
+ UP.Count =
904
+ (std::max (UP.PartialThreshold , UP.BEInsns + 1 ) - UP.BEInsns ) /
905
+ (LoopSize - UP.BEInsns );
906
+ if (UP.Count > UP.MaxCount )
907
+ UP.Count = UP.MaxCount ;
908
+ while (UP.Count != 0 && TripCount % UP.Count != 0 )
909
+ UP.Count --;
910
+ if (UP.AllowRemainder && UP.Count <= 1 ) {
911
+ // If there is no Count that is modulo of TripCount, set Count to
912
+ // largest power-of-two factor that satisfies the threshold limit.
913
+ // As we'll create fixup loop, do the type of unrolling only if
914
+ // remainder loop is allowed.
915
+ UP.Count = UP.DefaultUnrollRuntimeCount ;
916
+ while (UP.Count != 0 &&
917
+ UCE.getUnrolledLoopSize (UP) > UP.PartialThreshold )
918
+ UP.Count >>= 1 ;
919
+ }
920
+ if (UP.Count < 2 ) {
1028
921
if (PragmaEnableUnroll)
1029
922
ORE->emit ([&]() {
1030
923
return OptimizationRemarkMissed (DEBUG_TYPE,
@@ -1034,8 +927,25 @@ bool llvm::computeUnrollCount(
1034
927
" pragma "
1035
928
" because unrolled size is too large." ;
1036
929
});
930
+ UP.Count = 0 ;
1037
931
}
932
+ } else {
933
+ UP.Count = TripCount;
1038
934
}
935
+ if (UP.Count > UP.MaxCount )
936
+ UP.Count = UP.MaxCount ;
937
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
938
+ UP.Count != TripCount)
939
+ ORE->emit ([&]() {
940
+ return OptimizationRemarkMissed (DEBUG_TYPE,
941
+ " FullUnrollAsDirectedTooLarge" ,
942
+ L->getStartLoc (), L->getHeader ())
943
+ << " Unable to fully unroll loop as directed by unroll pragma "
944
+ " because "
945
+ " unrolled size is too large." ;
946
+ });
947
+ LLVM_DEBUG (dbgs () << " partially unrolling with count: " << UP.Count
948
+ << " \n " );
1039
949
return ExplicitUnroll;
1040
950
}
1041
951
assert (TripCount == 0 &&
@@ -1072,6 +982,8 @@ bool llvm::computeUnrollCount(
1072
982
UP.AllowExpensiveTripCount = true ;
1073
983
}
1074
984
}
985
+
986
+ // Reduce count based on the type of unrolling and the threshold values.
1075
987
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
1076
988
if (!UP.Runtime ) {
1077
989
LLVM_DEBUG (
@@ -1106,7 +1018,7 @@ bool llvm::computeUnrollCount(
1106
1018
1107
1019
using namespace ore ;
1108
1020
1109
- if (unrollCountPragmaValue (L) > 0 && !UP.AllowRemainder )
1021
+ if (PragmaCount > 0 && !UP.AllowRemainder )
1110
1022
ORE->emit ([&]() {
1111
1023
return OptimizationRemarkMissed (DEBUG_TYPE,
1112
1024
" DifferentUnrollCountFromDirected" ,
0 commit comments