Skip to content

Commit c226646

Browse files
committed
Resubmit: [DA][TTI][AMDGPU] Add option to select GPUDA with TTI
Summary: Enable the new diveregence analysis by default for AMDGPU. Resubmit with test updates since GPUDA was causing failures on Windows. Reviewers: rampitec, nhaehnle, arsenm, thakis Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73315
1 parent c5bd3d0 commit c226646

18 files changed

+41
-16
lines changed

llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ class LegacyDivergenceAnalysis : public FunctionPass {
5454

5555
private:
5656
// Whether analysis should be performed by GPUDivergenceAnalysis.
57-
bool shouldUseGPUDivergenceAnalysis(const Function &F) const;
57+
bool shouldUseGPUDivergenceAnalysis(const Function &F,
58+
const TargetTransformInfo &TTI) const;
5859

5960
// (optional) handle to new DivergenceAnalysis
6061
std::unique_ptr<GPUDivergenceAnalysis> gpuDA;

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,10 @@ class TargetTransformInfo {
342342
/// branches.
343343
bool hasBranchDivergence() const;
344344

345+
/// Return true if the target prefers to use GPU divergence analysis to
346+
/// replace the legacy version.
347+
bool useGPUDivergenceAnalysis() const;
348+
345349
/// Returns whether V is a source of divergence.
346350
///
347351
/// This function provides the target-dependent information for
@@ -1198,6 +1202,7 @@ class TargetTransformInfo::Concept {
11981202
virtual int
11991203
getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
12001204
virtual bool hasBranchDivergence() = 0;
1205+
virtual bool useGPUDivergenceAnalysis() = 0;
12011206
virtual bool isSourceOfDivergence(const Value *V) = 0;
12021207
virtual bool isAlwaysUniform(const Value *V) = 0;
12031208
virtual unsigned getFlatAddressSpace() = 0;
@@ -1452,6 +1457,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
14521457
return Impl.getUserCost(U, Operands);
14531458
}
14541459
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1460+
bool useGPUDivergenceAnalysis() override { return Impl.useGPUDivergenceAnalysis(); }
14551461
bool isSourceOfDivergence(const Value *V) override {
14561462
return Impl.isSourceOfDivergence(V);
14571463
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ class TargetTransformInfoImplBase {
152152

153153
bool hasBranchDivergence() { return false; }
154154

155+
bool useGPUDivergenceAnalysis() { return false; }
156+
155157
bool isSourceOfDivergence(const Value *V) { return false; }
156158

157159
bool isAlwaysUniform(const Value *V) { return false; }

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
207207

208208
bool hasBranchDivergence() { return false; }
209209

210+
bool useGPUDivergenceAnalysis() { return false; }
211+
210212
bool isSourceOfDivergence(const Value *V) { return false; }
211213

212214
bool isAlwaysUniform(const Value *V) { return false; }

llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,14 +301,13 @@ FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
301301
void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
302302
AU.addRequired<DominatorTreeWrapperPass>();
303303
AU.addRequired<PostDominatorTreeWrapperPass>();
304-
if (UseGPUDA)
305-
AU.addRequired<LoopInfoWrapperPass>();
304+
AU.addRequired<LoopInfoWrapperPass>();
306305
AU.setPreservesAll();
307306
}
308307

309308
bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
310-
const Function &F) const {
311-
if (!UseGPUDA)
309+
const Function &F, const TargetTransformInfo &TTI) const {
310+
if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
312311
return false;
313312

314313
// GPUDivergenceAnalysis requires a reducible CFG.
@@ -337,7 +336,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
337336
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
338337
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
339338

340-
if (shouldUseGPUDivergenceAnalysis(F)) {
339+
if (shouldUseGPUDivergenceAnalysis(F, TTI)) {
341340
// run the new GPU divergence analysis
342341
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
343342
gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ bool TargetTransformInfo::hasBranchDivergence() const {
212212
return TTIImpl->hasBranchDivergence();
213213
}
214214

215+
bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
216+
return TTIImpl->useGPUDivergenceAnalysis();
217+
}
218+
215219
bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
216220
return TTIImpl->isSourceOfDivergence(V);
217221
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ static cl::opt<unsigned> UnrollThresholdIf(
6969
cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
7070
cl::init(150), cl::Hidden);
7171

72+
static cl::opt<bool> UseLegacyDA(
73+
"amdgpu-use-legacy-divergence-analysis",
74+
cl::desc("Enable legacy divergence analysis for AMDGPU"),
75+
cl::init(false), cl::Hidden);
76+
7277
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
7378
unsigned Depth = 0) {
7479
const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -601,6 +606,11 @@ static bool isArgPassedInSGPR(const Argument *A) {
601606
}
602607
}
603608

609+
/// \returns true if the new GPU divergence analysis is enabled.
610+
bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
611+
return !UseLegacyDA;
612+
}
613+
604614
/// \returns true if the result of the value could potentially be
605615
/// different across workitems in a wavefront.
606616
bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
136136
HasFP32Denormals(ST->hasFP32Denormals(F)) { }
137137

138138
bool hasBranchDivergence() { return true; }
139+
bool useGPUDivergenceAnalysis() const;
139140

140141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
141142
TTI::UnrollingPreferences &UP);

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
1+
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst
44
define i32 @test1(i32* %ptr, i32 %val) #0 {

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
1+
; RUN: opt -mtriple=amdgcn-- -analyze -amdgpu-use-legacy-divergence-analysis -divergence %s | FileCheck %s
22

33
; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
44
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 {

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/kernel-args.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
1+
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
22

33
; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'test_amdgpu_ps':
44
; CHECK: DIVERGENT:

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.buffer.atomic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
1+
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.buffer.atomic.swap.i32(
44
define float @buffer_atomic_swap(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
1+
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
44
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/loads.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
1+
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
; Test that we consider loads from flat and private addrspaces to be divergent.
44

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/no-return-blocks.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
1+
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
22

33
; CHECK: DIVERGENT: %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp2
44
; CHECK: DIVERGENT: %tmp10 = load volatile float, float addrspace(1)* %tmp5, align 4

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/phi-undef.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s
1+
; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
; CHECK-LABEL: 'test1':
44
; CHECK-NEXT: DIVERGENT: i32 %bound

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/unreachable-loop-block.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
1+
; RUN: opt %s -mtriple amdgcn-- -amdgpu-use-legacy-divergence-analysis -analyze -divergence | FileCheck %s
22

33
; CHECK: DIVERGENT: %tmp = cmpxchg volatile
44
define amdgpu_kernel void @unreachable_loop(i32 %tidx) #0 {

llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/workitem-intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence %s | FileCheck %s
1+
; RUN: opt -mtriple amdgcn-unknown-amdhsa -amdgpu-use-legacy-divergence-analysis -analyze -divergence %s | FileCheck %s
22

33
declare i32 @llvm.amdgcn.workitem.id.x() #0
44
declare i32 @llvm.amdgcn.workitem.id.y() #0

0 commit comments

Comments
 (0)