Skip to content

Commit e5972f2

Browse files
committed
[AMDGPU] Simplify VCCZ bug handling
Summary: VCCZBugHandledSet was used to make sure we don't apply the same workaround more than once to a single cbranch instruction, but it's not necessary because the workaround involves inserting an s_waitcnt instruction, which is enough for subsequent iterations to detect that no further workaround is necessary. Also beef up the test case to check that the workaround was only applied once. I have also manually verified that the test still passes even if I hack the big do-while loop in runOnMachineFunction to run a minimum of five iterations. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69621
1 parent c75cd3c commit e5972f2

File tree

2 files changed

+5
-7
lines changed

2 files changed

+5
-7
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {
372372
AMDGPU::IsaVersion IV;
373373

374374
DenseSet<MachineInstr *> TrackedWaitcntSet;
375-
DenseSet<MachineInstr *> VCCZBugHandledSet;
376375

377376
struct BlockInfo {
378377
MachineBasicBlock *MBB;
@@ -1388,8 +1387,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
13881387
}
13891388

13901389
bool VCCZBugWorkAround = false;
1391-
if (readsVCCZ(Inst) &&
1392-
(!VCCZBugHandledSet.count(&Inst))) {
1390+
if (readsVCCZ(Inst)) {
13931391
if (ScoreBrackets.getScoreLB(LGKM_CNT) <
13941392
ScoreBrackets.getScoreUB(LGKM_CNT) &&
13951393
ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
@@ -1431,7 +1429,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
14311429
TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
14321430
TRI->getVCC())
14331431
.addReg(TRI->getVCC());
1434-
VCCZBugHandledSet.insert(&Inst);
14351432
Modified = true;
14361433
}
14371434

@@ -1471,7 +1468,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
14711468
RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
14721469

14731470
TrackedWaitcntSet.clear();
1474-
VCCZBugHandledSet.clear();
14751471
RpotIdxMap.clear();
14761472
BlockInfos.clear();
14771473

llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
22
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
3-
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s
3+
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
44

55
; GCN-FUNC: {{^}}vccz_workaround:
66
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
77
; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}
88
; VCCZ-BUG: s_waitcnt lgkmcnt(0)
99
; VCCZ-BUG: s_mov_b64 vcc, vcc
10-
; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
10+
; GCN-NOT: s_mov_b64 vcc, vcc
1111
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
1212
; GCN: buffer_store_dword
1313
; GCN: [[EXIT]]:
@@ -28,6 +28,8 @@ endif:
2828

2929
; GCN-FUNC: {{^}}vccz_noworkaround:
3030
; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
31+
; GCN-NOT: s_waitcnt lgkmcnt(0)
32+
; GCN-NOT: s_mov_b64 vcc, vcc
3133
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
3234
; GCN: buffer_store_dword
3335
; GCN: [[EXIT]]:

0 commit comments

Comments
 (0)