Skip to content

Commit 564fb58

Browse files
committed
[WinEH] Allocate space in funclets stack to save XMM CSRs
Summary: This is an alternate approach to D63396 Currently funclets reuse the same stack slots that are used in the parent function for saving callee-saved xmm registers. If the parent function modifies a callee-saved xmm register before an excpetion is thrown, the catch handler will overwrite the original saved value. This patch allocates space in funclets stack for saving callee-saved xmm registers and uses RSP instead RBP to access memory. Signed-off-by: Pengfei Wang <pengfei.wang@intel.com> Reviewers: rnk, RKSimon, craig.topper, annita.zhang, LuoYuanke, andrew.w.kaylor Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66596 Signed-off-by: Pengfei Wang <pengfei.wang@intel.com> llvm-svn: 370005
1 parent 6a808d5 commit 564fb58

File tree

8 files changed

+208
-19
lines changed

8 files changed

+208
-19
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,9 +1389,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
13891389
int FI;
13901390
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
13911391
if (X86::FR64RegClass.contains(Reg)) {
1392+
int Offset;
13921393
unsigned IgnoredFrameReg;
1393-
int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
1394-
Offset += SEHFrameOffset;
1394+
if (IsWin64Prologue && IsFunclet)
1395+
Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
1396+
else
1397+
Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
1398+
SEHFrameOffset;
13951399

13961400
HasWinCFI = true;
13971401
assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
@@ -1547,9 +1551,13 @@ X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
15471551

15481552
unsigned
15491553
X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
1554+
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
15501555
// This is the size of the pushed CSRs.
1551-
unsigned CSSize =
1552-
MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
1556+
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1557+
// This is the size of callee saved XMMs.
1558+
const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
1559+
unsigned XMMSize = WinEHXMMSlotInfo.size() *
1560+
TRI->getSpillSize(X86::VR128RegClass);
15531561
// This is the amount of stack a funclet needs to allocate.
15541562
unsigned UsedSize;
15551563
EHPersonality Personality =
@@ -1569,7 +1577,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
15691577
unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
15701578
// Subtract out the size of the callee saved registers. This is how much stack
15711579
// each funclet will allocate.
1572-
return FrameSizeMinusRBP - CSSize;
1580+
return FrameSizeMinusRBP + XMMSize - CSSize;
15731581
}
15741582

15751583
static bool isTailCallOpcode(unsigned Opc) {
@@ -1843,6 +1851,20 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
18431851
return Offset + FPDelta;
18441852
}
18451853

1854+
int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
1855+
int FI, unsigned &FrameReg) const {
1856+
const MachineFrameInfo &MFI = MF.getFrameInfo();
1857+
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1858+
const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
1859+
const auto it = WinEHXMMSlotInfo.find(FI);
1860+
1861+
if (it == WinEHXMMSlotInfo.end())
1862+
return getFrameIndexReference(MF, FI, FrameReg);
1863+
1864+
FrameReg = TRI->getStackRegister();
1865+
return alignTo(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
1866+
}
1867+
18461868
int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
18471869
int FI, unsigned &FrameReg,
18481870
int Adjustment) const {
@@ -1941,6 +1963,8 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
19411963
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
19421964

19431965
unsigned CalleeSavedFrameSize = 0;
1966+
unsigned XMMCalleeSavedFrameSize = 0;
1967+
auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
19441968
int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
19451969

19461970
int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -2018,12 +2042,20 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
20182042
unsigned Size = TRI->getSpillSize(*RC);
20192043
unsigned Align = TRI->getSpillAlignment(*RC);
20202044
// ensure alignment
2021-
SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
2045+
assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2046+
SpillSlotOffset = -alignTo(-SpillSlotOffset, Align);
2047+
20222048
// spill into slot
20232049
SpillSlotOffset -= Size;
20242050
int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
20252051
CSI[i - 1].setFrameIdx(SlotIndex);
20262052
MFI.ensureMaxAlignment(Align);
2053+
2054+
// Save the start offset and size of XMM in stack frame for funclets.
2055+
if (X86::VR128RegClass.contains(Reg)) {
2056+
WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2057+
XMMCalleeSavedFrameSize += Size;
2058+
}
20272059
}
20282060

20292061
return true;

llvm/lib/Target/X86/X86FrameLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ class X86FrameLowering : public TargetFrameLowering {
9999
int getFrameIndexReference(const MachineFunction &MF, int FI,
100100
unsigned &FrameReg) const override;
101101

102+
int getWin64EHFrameIndexRef(const MachineFunction &MF,
103+
int FI, unsigned &SPReg) const;
102104
int getFrameIndexReferenceSP(const MachineFunction &MF,
103105
int FI, unsigned &SPReg, int Adjustment) const;
104106
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,

llvm/lib/Target/X86/X86MachineFunctionInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
3636
/// is stashed.
3737
signed char RestoreBasePointerOffset = 0;
3838

39+
/// WinEHXMMSlotInfo - Slot information of XMM registers in the stack frame
40+
/// in bytes.
41+
DenseMap<int, unsigned> WinEHXMMSlotInfo;
42+
3943
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
4044
/// stack frame in bytes.
4145
unsigned CalleeSavedFrameSize = 0;
@@ -120,6 +124,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
120124
void setRestoreBasePointer(const MachineFunction *MF);
121125
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
122126

127+
DenseMap<int, unsigned>& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; }
128+
const DenseMap<int, unsigned>& getWinEHXMMSlotInfo() const {
129+
return WinEHXMMSlotInfo; }
130+
123131
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
124132
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
125133

llvm/lib/Target/X86/X86RegisterInfo.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,12 +692,27 @@ static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
692692
return true;
693693
}
694694

695+
static bool isFuncletReturnInstr(MachineInstr &MI) {
696+
switch (MI.getOpcode()) {
697+
case X86::CATCHRET:
698+
case X86::CLEANUPRET:
699+
return true;
700+
default:
701+
return false;
702+
}
703+
llvm_unreachable("impossible");
704+
}
705+
695706
void
696707
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
697708
int SPAdj, unsigned FIOperandNum,
698709
RegScavenger *RS) const {
699710
MachineInstr &MI = *II;
700-
MachineFunction &MF = *MI.getParent()->getParent();
711+
MachineBasicBlock &MBB = *MI.getParent();
712+
MachineFunction &MF = *MBB.getParent();
713+
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
714+
bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
715+
: isFuncletReturnInstr(*MBBI);
701716
const X86FrameLowering *TFI = getFrameLowering(MF);
702717
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
703718

@@ -709,6 +724,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
709724
MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
710725
"Return instruction can only reference SP relative frame objects");
711726
FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
727+
} else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
728+
FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
712729
} else {
713730
FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
714731
}

llvm/test/CodeGen/X86/avx512-intel-ocl.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
184184
; WIN64-KNL-LABEL: test_prolog_epilog:
185185
; WIN64-KNL: # %bb.0:
186186
; WIN64-KNL-NEXT: pushq %rbp
187-
; WIN64-KNL-NEXT: subq $1328, %rsp # imm = 0x530
187+
; WIN64-KNL-NEXT: subq $1264, %rsp # imm = 0x4F0
188188
; WIN64-KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
189189
; WIN64-KNL-NEXT: kmovw %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
190190
; WIN64-KNL-NEXT: kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
@@ -232,14 +232,14 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
232232
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload
233233
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload
234234
; WIN64-KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 2-byte Reload
235-
; WIN64-KNL-NEXT: leaq 1200(%rbp), %rsp
235+
; WIN64-KNL-NEXT: leaq 1136(%rbp), %rsp
236236
; WIN64-KNL-NEXT: popq %rbp
237237
; WIN64-KNL-NEXT: retq
238238
;
239239
; WIN64-SKX-LABEL: test_prolog_epilog:
240240
; WIN64-SKX: # %bb.0:
241241
; WIN64-SKX-NEXT: pushq %rbp
242-
; WIN64-SKX-NEXT: subq $1328, %rsp # imm = 0x530
242+
; WIN64-SKX-NEXT: subq $1264, %rsp # imm = 0x4F0
243243
; WIN64-SKX-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
244244
; WIN64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
245245
; WIN64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -287,7 +287,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
287287
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 8-byte Reload
288288
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 8-byte Reload
289289
; WIN64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 # 8-byte Reload
290-
; WIN64-SKX-NEXT: leaq 1200(%rbp), %rsp
290+
; WIN64-SKX-NEXT: leaq 1136(%rbp), %rsp
291291
; WIN64-SKX-NEXT: popq %rbp
292292
; WIN64-SKX-NEXT: retq
293293
;
@@ -346,7 +346,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
346346
; X64-SKX: ## %bb.0:
347347
; X64-SKX-NEXT: pushq %rsi
348348
; X64-SKX-NEXT: pushq %rdi
349-
; X64-SKX-NEXT: subq $1192, %rsp ## imm = 0x4A8
349+
; X64-SKX-NEXT: subq $1064, %rsp ## imm = 0x428
350350
; X64-SKX-NEXT: kmovq %k7, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
351351
; X64-SKX-NEXT: kmovq %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
352352
; X64-SKX-NEXT: kmovq %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
@@ -388,7 +388,7 @@ define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x fl
388388
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 8-byte Reload
389389
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 8-byte Reload
390390
; X64-SKX-NEXT: kmovq {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 8-byte Reload
391-
; X64-SKX-NEXT: addq $1192, %rsp ## imm = 0x4A8
391+
; X64-SKX-NEXT: addq $1064, %rsp ## imm = 0x428
392392
; X64-SKX-NEXT: popq %rdi
393393
; X64-SKX-NEXT: popq %rsi
394394
; X64-SKX-NEXT: retq

llvm/test/CodeGen/X86/catchpad-realign-savexmm.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,18 @@ catch:
5151
; CHECK: popq %rbp
5252
; CHECK: retq
5353
; CHECK: .seh_handlerdata
54+
; CHECK: # %catch
55+
; CHECK: movq %rdx, 16(%rsp)
56+
; CHECK: pushq %rbp
57+
; CHECK: .seh_pushreg 5
58+
; CHECK: subq $48, %rsp
59+
; CHECK: .seh_stackalloc 48
60+
; CHECK: leaq 64(%rdx), %rbp
61+
; CHECK: movapd %xmm6, 32(%rsp)
62+
; CHECK: .seh_savexmm 6, 32
63+
; CHECK: .seh_endprologue
64+
; CHECK: movapd 32(%rsp), %xmm6
65+
; CHECK: leaq .LBB0_1(%rip), %rax
66+
; CHECK: addq $48, %rsp
67+
; CHECK: popq %rbp
68+
; CHECK: retq # CATCHRET
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; RUN: llc -mtriple=x86_64-pc-windows-msvc -mattr=+avx < %s | FileCheck %s
2+
3+
; void bar(int a, int b, int c, int d, int e);
4+
; void baz(int x);
5+
;
6+
; void foo(int a, int b, int c, int d, int e)
7+
; {
8+
; __asm("nop" ::: "bx", "cx", "xmm5", "xmm6", "ymm7");
9+
; try {
10+
; bar(a, b, c, d, e);
11+
; }
12+
; catch (...) {
13+
; baz(a);
14+
; if (a)
15+
; __asm("nop" ::: "xmm8");
16+
; }
17+
; }
18+
19+
%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
20+
21+
$"??_R0H@8" = comdat any
22+
23+
@"??_7type_info@@6B@" = external constant i8*
24+
@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
25+
26+
declare dso_local i32 @__CxxFrameHandler3(...)
27+
declare dso_local void @"?bar@@YAXHHHHH@Z"(i32, i32, i32, i32, i32)
28+
declare dso_local void @"?baz@@YAXH@Z"(i32)
29+
30+
define dso_local void @"?foo@@YAXHHHHH@Z"(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
31+
entry:
32+
%e.addr = alloca i32, align 4
33+
%d.addr = alloca i32, align 4
34+
%c.addr = alloca i32, align 4
35+
%b.addr = alloca i32, align 4
36+
%a.addr = alloca i32, align 4
37+
store i32 %e, i32* %e.addr, align 4
38+
store i32 %d, i32* %d.addr, align 4
39+
store i32 %c, i32* %c.addr, align 4
40+
store i32 %b, i32* %b.addr, align 4
41+
store i32 %a, i32* %a.addr, align 4
42+
call void asm sideeffect "nop", "~{bx},~{cx},~{xmm5},~{xmm6},~{ymm7}"()
43+
%0 = load i32, i32* %e.addr, align 4
44+
%1 = load i32, i32* %d.addr, align 4
45+
%2 = load i32, i32* %c.addr, align 4
46+
%3 = load i32, i32* %b.addr, align 4
47+
%4 = load i32, i32* %a.addr, align 4
48+
invoke void @"?bar@@YAXHHHHH@Z"(i32 %4, i32 %3, i32 %2, i32 %1, i32 %0)
49+
to label %invoke.cont unwind label %catch.dispatch
50+
51+
catch.dispatch: ; preds = %entry
52+
%5 = catchswitch within none [label %catch] unwind to caller
53+
54+
catch: ; preds = %catch.dispatch
55+
%6 = catchpad within %5 [i8* null, i32 64, i8* null]
56+
%7 = load i32, i32* %a.addr, align 4
57+
call void @"?baz@@YAXH@Z"(i32 %7) [ "funclet"(token %6) ]
58+
%8 = load i32, i32* %a.addr, align 4
59+
%tobool = icmp ne i32 %8, 0
60+
br i1 %tobool, label %if.then, label %if.end
61+
62+
if.then: ; preds = %catch
63+
call void asm sideeffect "nop", "~{xmm8}"() [ "funclet"(token %6) ]
64+
br label %if.end
65+
66+
invoke.cont: ; preds = %entry
67+
br label %try.cont
68+
69+
if.end: ; preds = %if.then, %catch
70+
catchret from %6 to label %catchret.dest
71+
72+
catchret.dest: ; preds = %if.end
73+
br label %try.cont
74+
75+
try.cont: ; preds = %catchret.dest, %invoke.cont
76+
ret void
77+
}
78+
79+
; CHECK: # %catch
80+
; CHECK: movq %rdx, 16(%rsp)
81+
; CHECK: pushq %rbp
82+
; CHECK: .seh_pushreg 5
83+
; CHECK: pushq %rbx
84+
; CHECK: .seh_pushreg 3
85+
; CHECK: subq $88, %rsp
86+
; CHECK: .seh_stackalloc 88
87+
; CHECK: leaq 112(%rdx), %rbp
88+
; CHECK: vmovaps %xmm8, 48(%rsp)
89+
; CHECK: .seh_savexmm 8, 48
90+
; CHECK: vmovaps %xmm7, 64(%rsp)
91+
; CHECK: .seh_savexmm 7, 64
92+
; CHECK: vmovaps %xmm6, 80(%rsp)
93+
; CHECK: .seh_savexmm 6, 80
94+
; CHECK: .seh_endprologue
95+
; CHECK: movl -{{[0-9]+}}(%rbp), %ecx
96+
; CHECK: vmovaps 80(%rsp), %xmm6
97+
; CHECK: vmovaps 64(%rsp), %xmm7
98+
; CHECK: vmovaps 48(%rsp), %xmm8
99+
; CHECK: leaq .LBB0_1(%rip), %rax
100+
; CHECK: addq $88, %rsp
101+
; CHECK: popq %rbx
102+
; CHECK: popq %rbp
103+
; CHECK: retq # CATCHRET
104+
105+
; CHECK-LABEL: "$handlerMap$0$?foo@@YAXHHHHH@Z":
106+
; CHECK-NEXT: .long 64 # Adjectives
107+
; CHECK-NEXT: .long 0 # Type
108+
; CHECK-NEXT: .long 0 # CatchObjOffset
109+
; CHECK-NEXT: .long "?catch$2@?0??foo@@YAXHHHHH@Z@4HA"@IMGREL # Handler
110+
; Sum of:
111+
; 16 RDX store offset
112+
; 16 two pushes
113+
; 72 stack alloc
114+
; CHECK-NEXT: .long 120 # ParentFrameOffset
115+

0 commit comments

Comments
 (0)