diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index deb5cd17d8fd9..134195059f9e6 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -729,6 +729,11 @@ class IndexedMemProfReader { LLVM_ABI DenseMap> getMemProfCallerCalleePairs() const; + // Returns non-owned pointer to data access profile data. + LLVM_ABI memprof::DataAccessProfData *getDataAccessProfileData() const { + return DataAccessProfileData.get(); + } + // Return the entire MemProf profile. LLVM_ABI memprof::AllMemProfData getAllMemProfData() const; @@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader { return MemProfReader.getSummary(); } + /// Returns non-owned pointer to the data access profile data. + /// Will be null if unavailable (version < 4). + memprof::DataAccessProfData *getDataAccessProfileData() const { + return MemProfReader.getDataAccessProfileData(); + } + Error readBinaryIds(std::vector &BinaryIds) override; Error printBinaryIds(raw_ostream &OS) override; }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h index 6170bf48e4695..1fbb2bcb194ef 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h @@ -14,6 +14,7 @@ #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/Support/Compiler.h" @@ -36,6 +37,11 @@ class MemProfUsePass : public PassInfoMixin { LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: + // Annotate global variables' section prefix based on data access profile, + // return true if any global variable is annotated and false otherwise. + bool + annotateGlobalVariables(Module &M, + const memprof::DataAccessProfData *DataAccessProf); std::string MemoryProfileFileName; IntrusiveRefCntPtr FS; }; diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp index a9a0731f16d90..ecb2f2dbc552b 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/MemProfCommon.h" @@ -75,6 +76,10 @@ static cl::opt MinMatchedColdBytePercent( "memprof-matching-cold-threshold", cl::init(100), cl::Hidden, cl::desc("Min percent of cold bytes matched to hint allocation cold")); +static cl::opt AnnotateStaticDataSectionPrefix( + "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden, + cl::desc("If true, annotate the static data section prefix")); + // Matching statistics STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); STATISTIC(NumOfMemProfMismatch, @@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs, "Number of matched memory profile allocs."); STATISTIC(NumOfMemProfMatchedCallSites, "Number of matched memory profile callsites."); +STATISTIC(NumOfMemProfHotGlobalVars, + "Number of global vars annotated with 'hot' section prefix."); +STATISTIC(NumOfMemProfColdGlobalVars, + "Number of global vars annotated with 'unlikely' section prefix."); +STATISTIC(NumOfMemProfUnknownGlobalVars, + "Number of global vars with unknown hotness (no section prefix)."); +STATISTIC(NumOfMemProfExplicitSectionGlobalVars, + "Number of global vars with user-specified section (not annotated)."); static void addCallsiteMetadata(Instruction &I, ArrayRef InlinedCallStack, @@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, } PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { - // Return immediately if the module doesn't contain any function. - if (M.empty()) + // Return immediately if the module doesn't contain any function or global + // variables. + if (M.empty() && M.globals().empty()) return PreservedAnalyses::all(); - LLVM_DEBUG(dbgs() << "Read in memory profile:"); + LLVM_DEBUG(dbgs() << "Read in memory profile:\n"); auto &Ctx = M.getContext(); auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); if (Error E = ReaderOrErr.takeError()) { @@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::all(); } + const bool Changed = + annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData()); + + // If the module doesn't contain any function, return after we process all + // global variables. + if (M.empty()) + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + auto &FAM = AM.getResult(M).getManager(); TargetLibraryInfo &TLI = FAM.getResult(*M.begin()); @@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { return PreservedAnalyses::none(); } + +// Returns true iff the global variable has custom section either by +// __attribute__((section("name"))) +// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate) +// or #pragma clang section directives +// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section). +static bool hasExplicitSectionName(const GlobalVariable &GVar) { + if (GVar.hasSection()) + return true; + + auto Attrs = GVar.getAttributes(); + if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") || + Attrs.hasAttribute("relro-section") || + Attrs.hasAttribute("rodata-section")) + return true; + return false; +} + +bool MemProfUsePass::annotateGlobalVariables( + Module &M, const memprof::DataAccessProfData *DataAccessProf) { + if (!AnnotateStaticDataSectionPrefix || M.globals().empty()) + return false; + + if (!DataAccessProf) { + M.getContext().diagnose(DiagnosticInfoPGOProfile( + MemoryProfileFileName.data(), + StringRef("Data access profiles not found in memprof. Ignore " + "-memprof-annotate-static-data-prefix."), + DS_Warning)); + return false; + } + + bool Changed = false; + // Iterate all global variables in the module and annotate them based on + // data access profiles. Note it's up to the linker to decide how to map input + // sections to output sections, and one conservative practice is to map + // unlikely-prefixed ones to unlikely output section, and map the rest + // (hot-prefixed or prefix-less) to the canonical output section. + for (GlobalVariable &GVar : M.globals()) { + assert(!GVar.getSectionPrefix().has_value() && + "GVar shouldn't have section prefix yet"); + if (GVar.isDeclarationForLinker()) + continue; + + if (hasExplicitSectionName(GVar)) { + ++NumOfMemProfExplicitSectionGlobalVars; + LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName() + << " has explicit section name. Skip annotating.\n"); + continue; + } + + StringRef Name = GVar.getName(); + // Skip string literals as their mangled names don't stay stable across + // binary releases. + // TODO: Track string content hash in the profiles and compute it inside the + // compiler to categeorize the hotness string literals. + if (Name.starts_with(".str")) { + + LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n"); + continue; + } + + // DataAccessProfRecord's get* methods will canonicalize the name under the + // hood before looking it up, so optimizer doesn't need to do it. + std::optional Record = + DataAccessProf->getProfileRecord(Name); + // Annotate a global variable as hot if it has non-zero sampled count, and + // annotate it as cold if it's seen in the profiled binary + // file but doesn't have any access sample. + // For logging, optimization remark emitter requires a llvm::Function, but + // it's not well defined how to associate a global variable with a function. + // So we just print out the static data section prefix in LLVM_DEBUG. + if (Record && Record->AccessCount > 0) { + ++NumOfMemProfHotGlobalVars; + GVar.setSectionPrefix("hot"); + Changed = true; + LLVM_DEBUG(dbgs() << "Global variable " << Name + << " is annotated as hot\n"); + } else if (DataAccessProf->isKnownColdSymbol(Name)) { + ++NumOfMemProfColdGlobalVars; + GVar.setSectionPrefix("unlikely"); + Changed = true; + LLVM_DEBUG(dbgs() << "Global variable " << Name + << " is annotated as unlikely\n"); + } else { + ++NumOfMemProfUnknownGlobalVars; + LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n"); + } + } + + return Changed; +} diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll new file mode 100644 index 0000000000000..29198f34ccbba --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll @@ -0,0 +1,112 @@ +; REQUIRES: asserts +; asserts are required for -debug-only= + +; RUN: rm -rf %t && split-file %s %t && cd %t + +;; Read a text profile and merge it into indexed profile. +; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata + +;; Run optimizer pass on an IR module without IR functions, and test that global +;; variables in the module could be annotated (i.e., no early return), +; RUN: opt -passes='memprof-use' -memprof-annotate-static-data-prefix \ +; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT + +;; Run optimizer pass on the IR, and check the section prefix. +; RUN: opt -passes='memprof-use' -memprof-annotate-static-data-prefix \ +; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT + +;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix. +;; The output text IR shouldn't have `section_prefix` +; RUN: opt -passes='memprof-use' \ +; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix" + +; LOG: Skip annotating string literal .str +; LOG: Global variable var1 is annotated as hot +; LOG: Global variable var2.llvm.125 is annotated as hot +; LOG: Global variable bar is not annotated +; LOG: Global variable foo is annotated as unlikely +; LOG: Global variable var3 has explicit section name. Skip annotating. +; LOG: Global variable var4 has explicit section name. Skip annotating. + +;; String literals are not annotated. +; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde" +; PREFIX-NOT: section_prefix +; PREFIX: @var1 = global i32 123, !section_prefix !0 + +;; @var.llvm.125 will be canonicalized to @var2 for profile look-up. +; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 + +;; @bar is not seen in hot symbol or known symbol set, so it won't get a section +;; prefix. Test this by testing that there is no section_prefix between @bar and +;; @foo. +; PREFIX-NEXT: @bar = global i16 3 +; PREFIX-NOT: !section_prefix + +;; @foo is unlikely. +; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1 + +; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" +; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 + +; PREFIX: attributes #0 = { "rodata-section"="sec2" } + +; PREFIX: !0 = !{!"section_prefix", !"hot"} +; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"} + +; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix. +; STAT: 2 memprof - Number of global vars with user-specified section (not annotated). +; STAT: 2 memprof - Number of global vars annotated with 'hot' section prefix. +; STAT: 1 memprof - Number of global vars with unknown hotness (no section prefix). + +;--- memprof.yaml +--- +DataAccessProfiles: + SampledRecords: + - Symbol: var1 + AccessCount: 1000 + - Symbol: var2 + AccessCount: 5 + - Hash: 101010 + AccessCount: 145 + KnownColdSymbols: + - foo + KnownColdStrHashes: [ 999, 1001 ] +... +;--- input.ll + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = unnamed_addr constant [5 x i8] c"abcde" +@var1 = global i32 123 +@var2.llvm.125 = global i64 0 +@bar = global i16 3 +@foo = global i8 2 +@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" +@var4 = constant [1 x i64][i64 98765] #0 + +define i32 @func() { + %a = load i32, ptr @var1 + %b = load i32, ptr @var2.llvm.125 + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b) + ret i32 %ret +} + +declare i32 @func_taking_arbitrary_param(...) + +attributes #0 = { "rodata-section"="sec2" } + +;--- funcless-module.ll + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = unnamed_addr constant [5 x i8] c"abcde" +@var1 = global i32 123 +@var2.llvm.125 = global i64 0 +@bar = global i16 3 +@foo = global i8 2 +@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" +@var4 = constant [1 x i64][i64 98765] #0 + +attributes #0 = { "rodata-section"="sec2" }