Skip to content

Commit 0be9cf2

Browse files
committed
[Attributor] Add "free"-based heap2stack deduction
Summary: If there is a unique free of the allocated that has to be reached from the malloc, we can apply the heap-2-stack transformation even if the pointer escapes. Reviewers: hfinkel, sstefan1, uenoku Subscribers: hiraditya, bollu, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68958
1 parent ed7bcb2 commit 0be9cf2

File tree

4 files changed

+101
-32
lines changed

4 files changed

+101
-32
lines changed

llvm/include/llvm/Analysis/MustExecute.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,30 @@ struct MustBeExecutedContextExplorer {
420420
}
421421
///}
422422

423+
/// Helper to look for \p I in the context of \p PP.
424+
///
425+
/// The context is expanded until \p I was found or no more expansion is
426+
/// possible.
427+
///
428+
/// \returns True, iff \p I was found.
429+
bool findInContextOf(const Instruction *I, const Instruction *PP) {
430+
auto EIt = begin(PP), EEnd = end(PP);
431+
return findInContextOf(I, EIt, EEnd);
432+
}
433+
434+
/// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
435+
///
436+
/// The context is expanded until \p I was found or no more expansion is
437+
/// possible.
438+
///
439+
/// \returns True, iff \p I was found.
440+
bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
441+
bool Found = EIt.count(I);
442+
while (!Found && EIt != EEnd)
443+
Found = (++EIt).getCurrentInst() == I;
444+
return Found;
445+
}
446+
423447
/// Return the next instruction that is guaranteed to be executed after \p PP.
424448
///
425449
/// \param It The iterator that is used to traverse the must be

llvm/lib/Transforms/IPO/Attributor.cpp

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -728,12 +728,10 @@ struct AAFromMustBeExecutedContext : public Base {
728728

729729
SetVector<const Use *> NextUses;
730730

731+
auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
731732
for (const Use *U : Uses) {
732733
if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
733-
auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
734-
bool Found = EIt.count(UserI);
735-
while (!Found && ++EIt != EEnd)
736-
Found = EIt.getCurrentInst() == UserI;
734+
bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
737735
if (Found && Base::followUse(A, U, UserI))
738736
for (const Use &Us : UserI->uses())
739737
NextUses.insert(&Us);
@@ -3634,7 +3632,21 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
36343632
const Function *F = getAssociatedFunction();
36353633
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
36363634

3635+
MustBeExecutedContextExplorer &Explorer =
3636+
A.getInfoCache().getMustBeExecutedContextExplorer();
3637+
3638+
auto FreeCheck = [&](Instruction &I) {
3639+
const auto &Frees = FreesForMalloc.lookup(&I);
3640+
if (Frees.size() != 1)
3641+
return false;
3642+
Instruction *UniqueFree = *Frees.begin();
3643+
return Explorer.findInContextOf(UniqueFree, I.getNextNode());
3644+
};
3645+
36373646
auto UsesCheck = [&](Instruction &I) {
3647+
bool ValidUsesOnly = true;
3648+
bool MustUse = true;
3649+
36383650
SmallPtrSet<const Use *, 8> Visited;
36393651
SmallVector<const Use *, 8> Worklist;
36403652

@@ -3652,10 +3664,12 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
36523664
continue;
36533665
if (auto *SI = dyn_cast<StoreInst>(UserI)) {
36543666
if (SI->getValueOperand() == U->get()) {
3655-
LLVM_DEBUG(dbgs() << "[H2S] escaping store to memory: " << *UserI << "\n");
3656-
return false;
3667+
LLVM_DEBUG(dbgs()
3668+
<< "[H2S] escaping store to memory: " << *UserI << "\n");
3669+
ValidUsesOnly = false;
3670+
} else {
3671+
// A store into the malloc'ed memory is fine.
36573672
}
3658-
// A store into the malloc'ed memory is fine.
36593673
continue;
36603674
}
36613675

@@ -3673,8 +3687,14 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
36733687

36743688
// Record malloc.
36753689
if (isFreeCall(UserI, TLI)) {
3676-
FreesForMalloc[&I].insert(
3677-
cast<Instruction>(const_cast<User *>(UserI)));
3690+
if (MustUse) {
3691+
FreesForMalloc[&I].insert(
3692+
cast<Instruction>(const_cast<User *>(UserI)));
3693+
} else {
3694+
LLVM_DEBUG(dbgs() << "[H2S] free potentially on different mallocs: "
3695+
<< *UserI << "\n");
3696+
ValidUsesOnly = false;
3697+
}
36783698
continue;
36793699
}
36803700

@@ -3688,22 +3708,25 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
36883708

36893709
if (!NoCaptureAA.isAssumedNoCapture() || !NoFreeAA.isAssumedNoFree()) {
36903710
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
3691-
return false;
3711+
ValidUsesOnly = false;
36923712
}
36933713
continue;
36943714
}
36953715

3696-
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI)) {
3716+
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
3717+
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
3718+
MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
36973719
for (Use &U : UserI->uses())
36983720
Worklist.push_back(&U);
36993721
continue;
37003722
}
37013723

3702-
// Unknown user.
3724+
// Unknown user for which we can not track uses further (in a way that
3725+
// makes sense).
37033726
LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
3704-
return false;
3727+
ValidUsesOnly = false;
37053728
}
3706-
return true;
3729+
return ValidUsesOnly;
37073730
};
37083731

37093732
auto MallocCallocCheck = [&](Instruction &I) {
@@ -3720,7 +3743,7 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
37203743
if (IsMalloc) {
37213744
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
37223745
if (Size->getValue().sle(MaxHeapToStackSize))
3723-
if (UsesCheck(I)) {
3746+
if (UsesCheck(I) || FreeCheck(I)) {
37243747
MallocCalls.insert(&I);
37253748
return true;
37263749
}
@@ -3730,7 +3753,7 @@ ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
37303753
if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
37313754
if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
37323755
.sle(MaxHeapToStackSize))
3733-
if (!Overflow && UsesCheck(I)) {
3756+
if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
37343757
MallocCalls.insert(&I);
37353758
return true;
37363759
}
@@ -3756,8 +3779,10 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl {
37563779
/// See AbstractAttribute::trackStatistics()
37573780
void trackStatistics() const override {
37583781
STATS_DECL(MallocCalls, Function,
3759-
"Number of MallocCalls converted to allocas");
3760-
BUILD_STAT_NAME(MallocCalls, Function) += MallocCalls.size();
3782+
"Number of malloc calls converted to allocas");
3783+
for (auto *C : MallocCalls)
3784+
if (!BadMallocCalls.count(C))
3785+
++BUILD_STAT_NAME(MallocCalls, Function);
37613786
}
37623787
};
37633788

llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ declare void @func_throws(...)
88

99
declare void @sync_func(i8* %p)
1010

11-
declare void @sync_will_return(i8* %p) willreturn
11+
declare void @sync_will_return(i8* %p) willreturn nounwind
1212

1313
declare void @no_sync_func(i8* nocapture %p) nofree nosync willreturn
1414

@@ -202,11 +202,11 @@ define i32 @test_lifetime() {
202202
}
203203

204204
; TEST 11
205-
; FIXME: should be ok
206205

207206
define void @test11() {
208207
%1 = tail call noalias i8* @malloc(i64 4)
209-
; CHECK: @malloc(i64 4)
208+
; CHECK: test11
209+
; CHECK-NEXT: alloc
210210
; CHECK-NEXT: @sync_will_return(i8* %1)
211211
tail call void @sync_will_return(i8* %1)
212212
tail call void @free(i8* %1)
@@ -330,9 +330,29 @@ define void @test16b(i8 %v, i8** %P) {
330330
%1 = tail call noalias i8* @malloc(i64 4)
331331
; CHECK-NEXT: store i8* %1, i8** %P
332332
store i8* %1, i8** %P
333-
; CHECK-NEXT: @no_sync_func(i8* %1)
333+
; CHECK-NEXT: @no_sync_func(i8* nocapture %1)
334334
tail call void @no_sync_func(i8* %1)
335335
; CHECK-NEXT: @free(i8* %1)
336336
tail call void @free(i8* %1)
337337
ret void
338338
}
339+
340+
define void @test16c(i8 %v, i8** %P) {
341+
; CHECK: %1 = alloca
342+
%1 = tail call noalias i8* @malloc(i64 4)
343+
; CHECK-NEXT: store i8* %1, i8** %P
344+
store i8* %1, i8** %P
345+
; CHECK-NEXT: @no_sync_func(i8* nocapture %1)
346+
tail call void @no_sync_func(i8* %1) nounwind
347+
; CHECK-NOT: @free
348+
tail call void @free(i8* %1)
349+
ret void
350+
}
351+
352+
define void @test16d(i8 %v, i8** %P) {
353+
; CHECK: %1 = tail call noalias i8* @malloc(i64 4)
354+
%1 = tail call noalias i8* @malloc(i64 4)
355+
; CHECK-NEXT: store i8* %1, i8** %P
356+
store i8* %1, i8** %P
357+
ret void
358+
}

llvm/test/Transforms/FunctionAttrs/noalias_returned.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ define void @test12_1() {
204204
; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]])
205205
; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nonnull align 4 dereferenceable(1) [[A]])
206206
; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]])
207-
; CHECK-NEXT: tail call void @use_nocapture(i8* noalias [[B]])
207+
; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]])
208208
; CHECK-NEXT: ret void
209209
;
210210
%A = alloca i8, align 4
@@ -221,10 +221,10 @@ define void @test12_2(){
221221
; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4)
222222
; FIXME: This should be @use_nocapture(i8* noalias [[A]])
223223
; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]])
224-
; FIXME: This should be @use_nocapture(i8* noalias [[A]])
225-
; CHECK-NEXT: tail call void @use_nocapture(i8* [[A]])
224+
; FIXME: This should be @use_nocapture(i8* noalias nocapture [[A]])
225+
; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]])
226226
; CHECK-NEXT: tail call void @use(i8* [[A]])
227-
; CHECK-NEXT: tail call void @use_nocapture(i8* [[A]])
227+
; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]])
228228
; CHECK-NEXT: ret void
229229
;
230230
%A = tail call noalias i8* @malloc(i64 4)
@@ -239,7 +239,7 @@ declare void @two_args(i8* nocapture , i8* nocapture)
239239
define void @test12_3(){
240240
; CHECK-LABEL: @test12_3(
241241
%A = tail call noalias i8* @malloc(i64 4)
242-
; CHECK: tail call void @two_args(i8* nocapture %A, i8* %A)
242+
; CHECK: tail call void @two_args(i8* nocapture %A, i8* nocapture %A)
243243
tail call void @two_args(i8* %A, i8* %A)
244244
ret void
245245
}
@@ -252,17 +252,17 @@ define void @test12_4(){
252252
%A_1 = getelementptr i8, i8* %A, i64 1
253253
%B_0 = getelementptr i8, i8* %B, i64 0
254254

255-
; CHECK: tail call void @two_args(i8* noalias %A, i8* noalias %B)
255+
; CHECK: tail call void @two_args(i8* noalias nocapture %A, i8* noalias nocapture %B)
256256
tail call void @two_args(i8* %A, i8* %B)
257257

258-
; CHECK: tail call void @two_args(i8* %A, i8* nocapture %A_0)
258+
; CHECK: tail call void @two_args(i8* nocapture %A, i8* nocapture %A_0)
259259
tail call void @two_args(i8* %A, i8* %A_0)
260260

261-
; CHECK: tail call void @two_args(i8* %A, i8* %A_1)
261+
; CHECK: tail call void @two_args(i8* nocapture %A, i8* nocapture %A_1)
262262
tail call void @two_args(i8* %A, i8* %A_1)
263263

264-
; FIXME: This should be @two_args(i8* noalias %A_0, i8* noalias %B_0)
265-
; CHECK: tail call void @two_args(i8* %A_0, i8* nocapture %B_0)
264+
; FIXME: This should be @two_args(i8* noalias nocapture %A_0, i8* noalias nocapture %B_0)
265+
; CHECK: tail call void @two_args(i8* nocapture %A_0, i8* nocapture %B_0)
266266
tail call void @two_args(i8* %A_0, i8* %B_0)
267267
ret void
268268
}

0 commit comments

Comments
 (0)