Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/StaticDataProfileInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class StaticDataProfileInfo {
LLVM_ABI std::optional<uint64_t>
getConstantProfileCount(const Constant *C) const;

LLVM_ABI std::optional<StringRef>
getDataHotnessBasedOnProfileCount(const Constant *C,
const ProfileSummaryInfo *PSI) const;

public:
StaticDataProfileInfo() = default;

Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,11 @@ class IndexedMemProfReader {
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
getMemProfCallerCalleePairs() const;

// Returns non-owned pointer to data access profile data.
memprof::DataAccessProfData *getDataAccessProfileData() const {
return DataAccessProfileData.get();
}

// Return the entire MemProf profile.
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;

Expand Down Expand Up @@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
return MemProfReader.getSummary();
}

/// Returns non-owned pointer to the data access profile data.
/// Will be null if unavailable (version < 4).
memprof::DataAccessProfData *getDataAccessProfileData() const {
return MemProfReader.getDataAccessProfileData();
}

Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
Error printBinaryIds(raw_ostream &OS) override;
};
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Compiler.h"

Expand All @@ -36,6 +37,10 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

private:
// Annotate global variables' section prefix based on data access profile.
bool
annotateGlobalVariables(Module &M,
const memprof::DataAccessProfData *DataAccessProf);
std::string MemoryProfileFileName;
IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
Expand Down
46 changes: 41 additions & 5 deletions llvm/lib/Analysis/StaticDataProfileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,15 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
return I->second;
}

StringRef StaticDataProfileInfo::getConstantSectionPrefix(
std::optional<StringRef>
StaticDataProfileInfo::getDataHotnessBasedOnProfileCount(
const Constant *C, const ProfileSummaryInfo *PSI) const {
auto Count = getConstantProfileCount(C);
// The constant `C` doesn't have a profile count. `C` might be a external
// linkage global variable, whose PGO-based counter is not tracked within one
// IR module.
if (!Count)
return "";
return std::nullopt;
// The accummulated counter shows the constant is hot. Return 'hot' whether
// this variable is seen by unprofiled functions or not.
if (PSI->isHotCount(*Count))
Expand All @@ -41,14 +45,46 @@ StringRef StaticDataProfileInfo::getConstantSectionPrefix(
// assign it to unlikely sections, even if the counter says 'cold'. So return
// an empty prefix before checking whether the counter is cold.
if (ConstantWithoutCounts.count(C))
return "";
return std::nullopt;
// The accummulated counter shows the constant is cold. Return 'unlikely'.
if (PSI->isColdCount(*Count))
if (PSI->isColdCount(*Count)) {
return "unlikely";
// The counter says lukewarm. Return an empty prefix.
}
return "";
}

static StringRef reconcileHotness(StringRef SectionPrefix, StringRef Hotness) {
assert((SectionPrefix == "hot" || SectionPrefix == "unlikely") &&
"Section prefix must be 'hot' or 'unlikely'");

if (SectionPrefix == "hot" || Hotness == "hot")
return "hot";
assert(SectionPrefix == "unlikely" && "Section prefix must be 'unlikely'.");
return Hotness;
}

static StringRef
reconcileOptionalHotness(std::optional<StringRef> SectionPrefix,
std::optional<StringRef> Hotness) {
if (!SectionPrefix)
return Hotness.value_or("");
if (!Hotness)
return SectionPrefix.value_or("");

return reconcileHotness(*SectionPrefix, *Hotness);
}

StringRef StaticDataProfileInfo::getConstantSectionPrefix(
const Constant *C, const ProfileSummaryInfo *PSI) const {
std::optional<StringRef> HotnessBasedOnCount =
getDataHotnessBasedOnProfileCount(C, PSI);
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
return reconcileOptionalHotness(GV->getSectionPrefix(),
HotnessBasedOnCount);

return HotnessBasedOnCount.value_or("");
}

bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
Info.reset(new StaticDataProfileInfo());
return false;
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/CodeGen/StaticDataAnnotator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,6 @@ bool StaticDataAnnotator::runOnModule(Module &M) {
if (GV.isDeclarationForLinker())
continue;

// The implementation below assumes prior passes don't set section prefixes,
// and specifically do 'assign' rather than 'update'. So report error if a
// section prefix is already set.
if (auto maybeSectionPrefix = GV.getSectionPrefix();
maybeSectionPrefix && !maybeSectionPrefix->empty())
llvm::report_fatal_error("Global variable " + GV.getName() +
" already has a section prefix " +
*maybeSectionPrefix);

StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI);
if (SectionPrefix.empty())
continue;
Expand Down
80 changes: 78 additions & 2 deletions llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProfCommon.h"
Expand Down Expand Up @@ -75,6 +76,17 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
cl::desc("Min percent of cold bytes matched to hint allocation cold"));

static cl::opt<bool> AnnotationStaticDataPrefix(
"annotate-static-data-prefix", cl::init(false), cl::Hidden,
cl::desc("If true, annotate the static data section prefix"));

static cl::opt<bool>
PrintStaticDataPrefix("print-static-data-prefix", cl::init(false),
cl::Hidden,
cl::desc("If true, print the static data section "
"prefix in errs(). This option is "
"meant for debugging."));

// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
Expand Down Expand Up @@ -674,8 +686,9 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
}

PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
// Return immediately if the module doesn't contain any function.
if (M.empty())
// Return immediately if the module doesn't contain any function or global
// variables.
if (M.empty() && M.globals().empty())
return PreservedAnalyses::all();

LLVM_DEBUG(dbgs() << "Read in memory profile:");
Expand Down Expand Up @@ -703,6 +716,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}

const bool Changed =
annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());

// If the module doesn't contain any function, return after we process all
// global variables.
if (M.empty())
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();

auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
Expand Down Expand Up @@ -752,3 +773,58 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {

return PreservedAnalyses::none();
}

bool MemProfUsePass::annotateGlobalVariables(
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
if (!AnnotationStaticDataPrefix || M.globals().empty() || !DataAccessProf)
return false;

bool Changed = false;
for (GlobalVariable &GVar : M.globals()) {
assert(!GVar.getSectionPrefix().has_value() &&
"GVar shouldn't have section prefix yet");
if (GVar.isDeclarationForLinker())
continue;

StringRef Name = GVar.getName();
// Skip string literals whose mangled names doesn't stay stable across
// binary releases.
// TODO: Track string content hash in the profiles and compute it inside the
// compiler to categeorize the hotness string literals.
if (Name.starts_with(".str"))
continue;

// DataAccessProfRecord's look-up methods will canonicalize the variable
// name before looking up methods, so optimizer doesn't need to do it.
std::optional<DataAccessProfRecord> Record =
DataAccessProf->getProfileRecord(Name);
// Annotate a global variable as hot if it has non-zero sampled count, and
// annotate it as cold if it's seen in the profiled binary
// file but doesn't have any access sample.
if (Record && Record->AccessCount > 0) {
GVar.setSectionPrefix("hot");
Changed = true;
} else if (DataAccessProf->isKnownColdSymbol(Name)) {
GVar.setSectionPrefix("unlikely");
Changed = true;
}
}

// Optimization remark emitter requires a llvm::Function, but it's not well
// defined to associate a global variable with a function. So we just print
// out the static data section prefix in errs().
if (PrintStaticDataPrefix) {
for (GlobalVariable &GVar : M.globals()) {
if (GVar.isDeclarationForLinker())
continue;
StringRef Name = GVar.getName();
auto SectionPrefix = GVar.getSectionPrefix();
if (SectionPrefix.has_value())
errs() << "Global variable " << Name
<< " has section prefix: " << SectionPrefix.value() << "\n";
else
errs() << "Global variable " << Name << " has no section prefix\n";
}
}
return Changed;
}
28 changes: 18 additions & 10 deletions llvm/test/CodeGen/X86/global-variable-partition.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,24 @@ target triple = "x86_64-unknown-linux-gnu"
; UNIQ-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,6
; AGG-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits

; Currently static-data-splitter only analyzes access from code.
; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
; and @cold_relro_array. A follow-up item is to analyze indirect access via data
; and prune the unlikely list.
; For @bss2

; @bss2 and @data3 are indirectly accessed via @hot_relro_array and
; @cold_relro_array, and actually hot due to accesses via @hot_relro_array.
; Under the hood, the static data splitter pass analyzes accesses from code but
; won't aggressively propgate the hotness of @hot_relro_array into the array
; elements -- instead, this pass reconciles the hotness information from both
; global variable section prefix and PGO counters.

; @bss2 has a section prefix 'hot' in the IR. StaticDataProfileInfo reconciles
; it into a hot prefix.
; COMMON: .type bss2,@object
; SYM-NEXT: .section .bss.unlikely.bss2,"aw",@nobits
; UNIQ-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,7
; AGG-NEXT: .section .bss.unlikely.,"aw",@nobits
; SYM-NEXT: .section .bss.hot.bss2,"aw",@nobits
; UNIQ-NEXT: .section .bss.hot.,"aw",@nobits,unique,7
; AGG-NEXT: .section .bss.hot.,"aw",@nobits

; For @data3
; @data3 doesn't have data access profile coverage and thereby doesn't have a
; section prefix. PGO counter analysis categorizes it as cold, so it will have
; section name `.data.unlikely`.
; COMMON: .type data3,@object
; SYM-NEXT: .section .data.unlikely.data3,"aw",@progbits
; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8
Expand Down Expand Up @@ -133,7 +140,7 @@ target triple = "x86_64-unknown-linux-gnu"
@cold_data = internal global i32 4
@cold_data_custom_foo_section = internal global i32 100, section "foo"
@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
@bss2 = internal global i32 0
@bss2 = internal global i32 0, !section_prefix !17
@data3 = internal global i32 3
@data_with_unknown_hotness = private global i32 5
@hot_data_custom_bar_section = internal global i32 101 #0
Expand Down Expand Up @@ -227,3 +234,4 @@ attributes #0 = {"data-section"="bar"}
!14 = !{!"function_entry_count", i64 100000}
!15 = !{!"function_entry_count", i64 1}
!16 = !{!"branch_weights", i32 1, i32 99999}
!17 = !{!"section_prefix", !"hot"}
87 changes: 87 additions & 0 deletions llvm/test/Transforms/PGOProfile/data-access-profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; RUN: rm -rf %t && split-file %s %t && cd %t

;; Read a text profile and merge it into indexed profile.
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata

;; Run optimizer pass on the IR, and check the section prefix.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -annotate-static-data-prefix \
; RUN: -S input.ll -o - 2>&1 | FileCheck %s

;; Repeat the command line above and enable -print-static-data-prefix. Test both IR and log.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -annotate-static-data-prefix \
; RUN: -print-static-data-prefix -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,CHECK

; LOG: Global variable .str has no section prefix
; LOG: Global variable var1 has section prefix: hot
; LOG: Global variable var2.llvm.125 has section prefix: hot
; LOG: Global variable foo has section prefix: unlikely
; LOG: Global variable bar has no section prefix

;; String literals are not annotated.
; CHECK: @.str = unnamed_addr constant [5 x i8] c"abcde"
; CHECK-NOT: section_prefix
; CHECK: @var1 = global i32 123, !section_prefix !0

;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
; CHECK-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
; CHECK-NEXT: @foo = global i8 2, !section_prefix !1

;; @bar is not seen in hot symbol or known symbol set, so it doesn't get
;; a section prefix. It's up to the linker to decide how to map input sections
;; to output, and one conservative practice is to map unlikely-prefixed ones to
;; unlikely output section, and map the rest (hot-prefixed or prefix-less) to
;; the canonical output section.
; CHECK-NEXT: @bar = global i16 3

; CHECK: !0 = !{!"section_prefix", !"hot"}
; CHECK-NEXT: !1 = !{!"section_prefix", !"unlikely"}

;--- memprof.yaml
---
HeapProfileRecords:
- GUID: 0xdeadbeef12345678
AllocSites:
- Callstack:
- { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
- { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 111
TotalSize: 222
TotalLifetime: 333
TotalLifetimeAccessDensity: 444
CallSites:
- Frames:
- { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
- { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
CalleeGuids: [ 0x100, 0x200 ]
DataAccessProfiles:
SampledRecords:
- Symbol: var1
AccessCount: 1000
- Symbol: var2
AccessCount: 5
- Hash: 101010
AccessCount: 145
KnownColdSymbols:
- foo
KnownColdStrHashes: [ 999, 1001 ]
...
;--- input.ll

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = unnamed_addr constant [5 x i8] c"abcde"
@var1 = global i32 123
@var2.llvm.125 = global i64 0
@foo = global i8 2
@bar = global i16 3

define i32 @func() {
%a = load i32, ptr @var1
%b = load i32, ptr @var2.llvm.125
%ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
ret i32 %ret
}

declare i32 @func_taking_arbitrary_param(...)