Skip to content

Commit df3e39f

Browse files
committed
[InstrProfiling] Use !associated metadata for counters, data and values
C identifier name input sections such as __llvm_prf_* are GC roots so they cannot be discarded. In LLD, the SHF_LINK_ORDER flag overrides the C identifier name semantics. The !associated metadata may be attached to a global object declaration with a single argument that references another global object, and it gets lowered to SHF_LINK_ORDER flag. When a function symbol is discarded by the linker, setting up !associated metadata allows linker to discard counters, data and values associated with that function symbol. Note that !associated metadata is only supported by ELF, it does not have any effect on non-ELF targets. Differential Revision: https://reviews.llvm.org/D76802
1 parent 93345e8 commit df3e39f

File tree

10 files changed

+172
-2
lines changed

10 files changed

+172
-2
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,9 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts,
615615
Options.NoRedZone = CodeGenOpts.DisableRedZone;
616616
Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
617617
Options.Atomic = CodeGenOpts.AtomicProfileUpdate;
618+
std::pair<int, int> BinutilsVersion =
619+
llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion);
620+
Options.CounterLinkOrder = BinutilsVersion >= std::make_pair(2, 36);
618621
return Options;
619622
}
620623

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// REQUIRES: linux, lld-available
2+
3+
// RUN: %clang_profgen=%t.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -o %t %s
4+
// RUN: %run %t
5+
// RUN: llvm-profdata merge -o %t.profdata %t.profraw
6+
// RUN: llvm-profdata show --all-functions %t.profdata | FileCheck %s -check-prefix=PROF
7+
// RUN: llvm-cov show %t -instr-profile %t.profdata | FileCheck %s -check-prefix=COV
8+
// RUN: llvm-nm %t | FileCheck %s -check-prefix=NM
9+
// RUN: llvm-readelf -x __llvm_prf_names %t | FileCheck %s -check-prefix=PRF_NAMES
10+
// RUN: llvm-readelf -x __llvm_prf_cnts %t | FileCheck %s -check-prefix=PRF_CNTS
11+
12+
// RUN: %clang_lto_profgen=%t.lto.profraw -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -mllvm -enable-name-compression=false -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -flto -o %t.lto %s
13+
// RUN: %run %t.lto
14+
// RUN: llvm-profdata merge -o %t.lto.profdata %t.lto.profraw
15+
// RUN: llvm-profdata show --all-functions %t.lto.profdata | FileCheck %s -check-prefix=PROF
16+
// RUN: llvm-cov show %t.lto -instr-profile %t.lto.profdata | FileCheck %s -check-prefix=COV
17+
// RUN: llvm-nm %t.lto | FileCheck %s -check-prefix=NM
18+
// RUN: llvm-readelf -x __llvm_prf_names %t.lto | FileCheck %s -check-prefix=PRF_NAMES
19+
// RUN: llvm-readelf -x __llvm_prf_cnts %t.lto | FileCheck %s -check-prefix=PRF_CNTS
20+
21+
// Note: We expect foo() and some of the profiling data associated with it to
22+
// be garbage collected.
23+
24+
// Note: When there is no code in a program, we expect to see the exact same
25+
// set of external functions provided by the profile runtime.
26+
27+
// RUN: %clang_profgen -fuse-ld=lld -fcoverage-mapping -mllvm -counter-link-order -ffunction-sections -fdata-sections -Wl,--gc-sections -shared -o %t.nocode.so %s
28+
// RUN: llvm-nm -jgU %t.nocode.so | grep -vE "__start_.*|__stop_.*" > %t.nocode.syms
29+
// RUN: llvm-nm -jgU %t | grep -vE "main|_start|_IO_stdin_used|__libc_.*" > %t.code.syms
30+
// RUN: diff %t.nocode.syms %t.code.syms
31+
32+
// Note: We also check the IR instrumentation and expect foo() to be garbage
33+
// collected as well.
34+
35+
// RUN: %clang_pgogen=%t.pgo.profraw -fuse-ld=lld -mllvm -counter-link-order -DCODE=1 -ffunction-sections -fdata-sections -Wl,--gc-sections -o %t.pgo %s
36+
// RUN: %run %t.pgo
37+
// RUN: llvm-profdata merge -o %t.pgo.profdata %t.pgo.profraw
38+
// RUN: llvm-profdata show --all-functions %t.pgo.profdata | FileCheck %s -check-prefix=PGO
39+
// RUN: llvm-nm %t.pgo | FileCheck %s -check-prefix=NM
40+
41+
#ifdef CODE
42+
43+
// COV: [[@LINE+1]]{{ *}}|{{ *}}0|void foo()
44+
void foo() {}
45+
46+
// COV: [[@LINE+1]]{{ *}}|{{ *}}1|int main
47+
int main() { return 0; }
48+
49+
#endif // CODE
50+
51+
// NM-NOT: foo
52+
53+
// PROF: Counters:
54+
// PROF-NEXT: main:
55+
// PROF-NEXT: Hash:
56+
// PROF-NEXT: Counters: 1
57+
// PROF-NEXT: Function count: 1
58+
// PROF-NEXT: Instrumentation level: Front-end
59+
// PROF-NEXT: Functions shown: 1
60+
// PROF-NEXT: Total functions: 1
61+
// PROF-NEXT: Maximum function count:
62+
// PROF-NEXT: Maximum internal block count:
63+
64+
// Note: We don't expect the names of garbage collected functions to disappear
65+
// from __llvm_prf_names, because collectPGOFuncNameStrings() glues the names
66+
// together.
67+
68+
// PRF_NAMES: Hex dump of section '__llvm_prf_names':
69+
// PRF_NAMES-NEXT: {{.*}} 0800666f 6f016d61 696e {{.*$}}
70+
// | | f o o # m a i n
71+
// | |___________|
72+
// | |
73+
// UncompressedLen = 8 |
74+
// |
75+
// CompressedLen = 0
76+
77+
// Note: We expect the profile counters for garbage collected functions to also
78+
// be garbage collected.
79+
80+
// PRF_CNTS: Hex dump of section '__llvm_prf_cnts':
81+
// PRF_CNTS-NEXT: {{.*}} 00000000 00000000 {{.*$}}
82+
83+
// PGO: Counters:
84+
// PGO-NEXT: main:
85+
// PGO-NEXT: Hash:
86+
// PGO-NEXT: Counters: 1
87+
// PGO-NEXT: Instrumentation level: IR
88+
// PGO-NEXT: Functions shown: 1
89+
// PGO-NEXT: Total functions: 1
90+
// PGO-NEXT: Maximum function count:
91+
// PGO-NEXT: Maximum internal block count:

llvm/include/llvm/Transforms/Instrumentation.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ struct InstrProfOptions {
132132
// Use BFI to guide register promotion
133133
bool UseBFIInPromotion = false;
134134

135+
// Use !associated metadata to enable linker GC
136+
bool CounterLinkOrder = false;
137+
135138
// Name of the profile file to use as output
136139
std::string InstrProfileOutput;
137140

llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
8383
/// Returns true if profile counter update register promotion is enabled.
8484
bool isCounterPromotionEnabled() const;
8585

86+
/// Returns true if the use of !associated metadata is enabled.
87+
bool isCounterLinkOrderEnabled() const;
88+
8689
/// Count the number of instrumented value sites for the function.
8790
void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
8891

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ cl::opt<bool> RuntimeCounterRelocation(
6969
cl::desc("Enable relocating counters at runtime."),
7070
cl::init(false));
7171

72+
cl::opt<bool> CounterLinkOrder(
73+
"counter-link-order",
74+
cl::desc("Set counter associated metadata to enable garbage collection at link time."),
75+
cl::init(false));
76+
7277
cl::opt<bool> ValueProfileStaticAlloc(
7378
"vp-static-alloc",
7479
cl::desc("Do static counter allocation for value profiler"),
@@ -479,6 +484,13 @@ bool InstrProfiling::isCounterPromotionEnabled() const {
479484
return Options.DoCounterPromotion;
480485
}
481486

487+
bool InstrProfiling::isCounterLinkOrderEnabled() const {
488+
if (CounterLinkOrder.getNumOccurrences() > 0)
489+
return CounterLinkOrder;
490+
491+
return Options.CounterLinkOrder;
492+
}
493+
482494
void InstrProfiling::promoteCounterLoadStores(Function *F) {
483495
if (!isCounterPromotionEnabled())
484496
return;
@@ -850,6 +862,12 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
850862
CounterPtr->setAlignment(Align(8));
851863
MaybeSetComdat(CounterPtr);
852864
CounterPtr->setLinkage(Linkage);
865+
// We need a self-link for the counter variable because the ELF section name
866+
// (that is __llvm_prf_cnts) is a C identifier and considered a GC root in the
867+
// absence of the SHF_LINK_ORDER flag.
868+
if (isCounterLinkOrderEnabled())
869+
CounterPtr->setMetadata(LLVMContext::MD_associated,
870+
MDNode::get(Ctx, ValueAsMetadata::get(Fn)));
853871

854872
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
855873
// Allocate statically the array of pointers to value profile nodes for
@@ -871,6 +889,10 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
871889
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
872890
ValuesVar->setAlignment(Align(8));
873891
MaybeSetComdat(ValuesVar);
892+
if (isCounterLinkOrderEnabled())
893+
ValuesVar->setMetadata(
894+
LLVMContext::MD_associated,
895+
MDNode::get(Ctx, ValueAsMetadata::get(CounterPtr)));
874896
ValuesPtrExpr =
875897
ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
876898
}
@@ -905,6 +927,9 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
905927
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
906928
MaybeSetComdat(Data);
907929
Data->setLinkage(Linkage);
930+
if (isCounterLinkOrderEnabled())
931+
Data->setMetadata(LLVMContext::MD_associated,
932+
MDNode::get(Ctx, ValueAsMetadata::get(CounterPtr)));
908933

909934
PD.RegionCounters = CounterPtr;
910935
PD.DataVar = Data;

llvm/test/Instrumentation/InstrProfiling/icall.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
; RUN: opt < %s -mtriple=mips64-unknown-linux -instrprof -vp-static-alloc=true -S | FileCheck %s --check-prefix=STATIC-SEXT
1212
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -vp-static-alloc=false -instrprof -S | FileCheck %s --check-prefix=DYN
1313
; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -vp-static-alloc=false -S | FileCheck %s --check-prefix=DYN
14+
; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -counter-link-order -vp-static-alloc=true -S | FileCheck %s --check-prefix=METADATA
1415

1516

1617
@__profn_foo = private constant [3 x i8] c"foo"
@@ -57,3 +58,12 @@ attributes #0 = { nounwind }
5758
; STATIC: declare void @__llvm_profile_instrument_target(i64, i8*, i32)
5859
; STATIC-EXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 zeroext)
5960
; STATIC-SEXT: declare void @__llvm_profile_instrument_target(i64, i8*, i32 signext)
61+
62+
; METADATA: @__profc_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8, !associated !0
63+
; METADATA: @__profvp_foo = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", align 8, !associated !1
64+
; METADATA: @__profc_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8, !associated !2
65+
; METADATA: @__profvp_bar = private global [1 x i64] zeroinitializer, section "{{[^"]+}}", comdat($__profd_bar), align 8, !associated !3
66+
; METADATA: !0 = !{i32 (i32 ()*)* @foo}
67+
; METADATA: !1 = !{[1 x i64]* @__profc_foo}
68+
; METADATA: !2 = !{i32 (i32 ()*)* @bar}
69+
; METADATA: !3 = !{[1 x i64]* @__profc_bar}

llvm/test/Instrumentation/InstrProfiling/linkage.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -S | FileCheck %s --check-prefixes=POSIX,LINUX
99
; RUN: opt < %s -mtriple=x86_64-unknown-fuchsia -passes=instrprof -S | FileCheck %s --check-prefixes=POSIX,LINUX
1010
; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -passes=instrprof -S | FileCheck %s --check-prefixes=COFF
11+
; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -counter-link-order -S | FileCheck %s --check-prefixes=LINUX,POSIX,METADATA
12+
; RUN: opt < %s -mtriple=x86_64-unknown-linux -passes=instrprof -counter-link-order -S | FileCheck %s --check-prefixes=LINUX,POSIX,METADATA
1113

1214
; MACHO: @__llvm_profile_runtime = external global i32
1315
; LINUX-NOT: @__llvm_profile_runtime = external global i32
@@ -19,7 +21,9 @@
1921
@__profn_foo_extern = linkonce_odr hidden constant [10 x i8] c"foo_extern"
2022

2123
; POSIX: @__profc_foo = hidden global
24+
; METADATA-SAME: !associated !0
2225
; POSIX: @__profd_foo = hidden global
26+
; METADATA-SAME: !associated !1
2327
; COFF: @__profc_foo = internal global
2428
; COFF-NOT: comdat
2529
; COFF: @__profd_foo = internal global
@@ -29,7 +33,9 @@ define void @foo() {
2933
}
3034

3135
; POSIX: @__profc_foo_weak = weak hidden global
36+
; METADATA: !associated !2
3237
; POSIX: @__profd_foo_weak = weak hidden global
38+
; METADATA: !associated !3
3339
; COFF: @__profc_foo_weak = internal global
3440
; COFF: @__profd_foo_weak = internal global
3541
define weak void @foo_weak() {
@@ -38,7 +44,9 @@ define weak void @foo_weak() {
3844
}
3945

4046
; POSIX: @"__profc_linkage.ll:foo_internal" = internal global
47+
; METADATA-SAME: !associated !4
4148
; POSIX: @"__profd_linkage.ll:foo_internal" = internal global
49+
; METADATA-SAME: !associated !5
4250
; COFF: @"__profc_linkage.ll:foo_internal" = internal global
4351
; COFF: @"__profd_linkage.ll:foo_internal" = internal global
4452
define internal void @foo_internal() {
@@ -47,7 +55,9 @@ define internal void @foo_internal() {
4755
}
4856

4957
; POSIX: @__profc_foo_inline = linkonce_odr hidden global
58+
; METADATA-SAME: !associated !6
5059
; POSIX: @__profd_foo_inline = linkonce_odr hidden global
60+
; METADATA-SAME: !associated !7
5161
; COFF: @__profc_foo_inline = internal global{{.*}} section ".lprfc$M", align 8
5262
; COFF: @__profd_foo_inline = internal global{{.*}} section ".lprfd$M", align 8
5363
define linkonce_odr void @foo_inline() {
@@ -56,7 +66,9 @@ define linkonce_odr void @foo_inline() {
5666
}
5767

5868
; LINUX: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_cnts", comdat($__profd_foo_extern), align 8
69+
; METADATA-SAME: !associated !8
5970
; LINUX: @__profd_foo_extern = linkonce_odr hidden global {{.*}}section "__llvm_prf_data", comdat, align 8
71+
; METADATA-SAME: !associated !9
6072
; MACHO: @__profc_foo_extern = linkonce_odr hidden global
6173
; MACHO: @__profd_foo_extern = linkonce_odr hidden global
6274
; COFF: @__profc_foo_extern = linkonce_odr hidden global {{.*}}section ".lprfc$M", comdat, align 8
@@ -75,3 +87,14 @@ declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
7587
; COFF: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} comdat {
7688
; LINUX-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
7789
; LINUX-NOT: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
90+
91+
; METADATA: !0 = !{void ()* @foo}
92+
; METADATA: !1 = !{[1 x i64]* @__profc_foo}
93+
; METADATA: !2 = !{void ()* @foo_weak}
94+
; METADATA: !3 = !{[1 x i64]* @__profc_foo_weak}
95+
; METADATA: !4 = !{void ()* @foo_internal}
96+
; METADATA: !5 = !{[1 x i64]* @"__profc_linkage.ll:foo_internal"}
97+
; METADATA: !6 = !{void ()* @foo_inline}
98+
; METADATA: !7 = !{[1 x i64]* @__profc_foo_inline}
99+
; METADATA: !8 = !{void ()* @foo_extern}
100+
; METADATA: !9 = !{[1 x i64]* @__profc_foo_extern}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
; RUN: opt < %s -pgo-instr-gen -instrprof -counter-link-order -S | FileCheck %s
2+
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -counter-link-order -S | FileCheck %s
3+
4+
; CHECK: @__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8, !associated !0
5+
; CHECK: @__profd_foo = private global {{.*}}, section "__llvm_prf_data", align 8, !associated !1
6+
7+
define void @foo() {
8+
ret void
9+
}
10+
11+
; CHECK: !0 = !{void ()* @foo}
12+
; CHECK: !1 = !{[1 x i64]* @__profc_foo}

llvm/test/Transforms/PGOProfile/counter_promo.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ bb12: ; preds = %bb9
6060
; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}0), i64 %[[LIVEOUT1]] seq_cst
6161
; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 %[[LIVEOUT2]] seq_cst
6262
; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 %[[LIVEOUT3]] seq_cst
63-
; PROMO-NOT: @__profc_foo
63+
; PROMO-NOT: @__profc_foo{{.*}})
6464

6565

6666
}

llvm/test/Transforms/PGOProfile/counter_promo_mexits.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ bb15_0: ; preds = %bb11
6969
; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 4)
7070
; PROMO-NEXT: add
7171
; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}4)
72-
; PROMO-NOT: @__profc_foo
72+
; PROMO-NOT: @__profc_foo{{.*}})
7373

7474

7575
bb15: ; preds = %bb14, %bb4

0 commit comments

Comments
 (0)