Skip to content

Commit 87c46a3

Browse files
committed
AMDGPU: Don't error on ds.ordered intrinsic in function
These should be assumed to be called from a compute context. Also don't use a 2 entry switch over constants.
1 parent 36b887e commit 87c46a3

File tree

2 files changed

+60
-29
lines changed

2 files changed

+60
-29
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
60816081
cast<ConstantSDNode>(Offset)->getSExtValue();
60826082
}
60836083

6084+
static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
6085+
switch (MF.getFunction().getCallingConv()) {
6086+
case CallingConv::AMDGPU_PS:
6087+
return 1;
6088+
case CallingConv::AMDGPU_VS:
6089+
return 2;
6090+
case CallingConv::AMDGPU_GS:
6091+
return 3;
6092+
case CallingConv::AMDGPU_HS:
6093+
case CallingConv::AMDGPU_LS:
6094+
case CallingConv::AMDGPU_ES:
6095+
report_fatal_error("ds_ordered_count unsupported for this calling conv");
6096+
case CallingConv::AMDGPU_CS:
6097+
case CallingConv::AMDGPU_KERNEL:
6098+
case CallingConv::C:
6099+
case CallingConv::Fast:
6100+
default:
6101+
// Assume other calling conventions are various compute callable functions
6102+
return 0;
6103+
}
6104+
}
6105+
60846106
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
60856107
SelectionDAG &DAG) const {
60866108
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
60966118
unsigned IndexOperand = M->getConstantOperandVal(7);
60976119
unsigned WaveRelease = M->getConstantOperandVal(8);
60986120
unsigned WaveDone = M->getConstantOperandVal(9);
6099-
unsigned ShaderType;
6100-
unsigned Instruction;
61016121

61026122
unsigned OrderedCountIndex = IndexOperand & 0x3f;
61036123
IndexOperand &= ~0x3f;
@@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
61166136
if (IndexOperand)
61176137
report_fatal_error("ds_ordered_count: bad index operand");
61186138

6119-
switch (IntrID) {
6120-
case Intrinsic::amdgcn_ds_ordered_add:
6121-
Instruction = 0;
6122-
break;
6123-
case Intrinsic::amdgcn_ds_ordered_swap:
6124-
Instruction = 1;
6125-
break;
6126-
}
6127-
61286139
if (WaveDone && !WaveRelease)
61296140
report_fatal_error("ds_ordered_count: wave_done requires wave_release");
61306141

6131-
switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
6132-
case CallingConv::AMDGPU_CS:
6133-
case CallingConv::AMDGPU_KERNEL:
6134-
ShaderType = 0;
6135-
break;
6136-
case CallingConv::AMDGPU_PS:
6137-
ShaderType = 1;
6138-
break;
6139-
case CallingConv::AMDGPU_VS:
6140-
ShaderType = 2;
6141-
break;
6142-
case CallingConv::AMDGPU_GS:
6143-
ShaderType = 3;
6144-
break;
6145-
default:
6146-
report_fatal_error("ds_ordered_count unsupported for this calling conv");
6147-
}
6148-
6142+
unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
6143+
unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
61496144
unsigned Offset0 = OrderedCountIndex << 2;
61506145
unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
61516146
(Instruction << 4);

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
5757
ret float %r
5858
}
5959

60+
; FUNC-LABEL: {{^}}ds_ordered_add_default_cc:
61+
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
62+
; GCN: s_mov_b32 m0, 0{{$}}
63+
; VIGFX9-NEXT: s_nop 0
64+
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
65+
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
66+
define float @ds_ordered_add_default_cc() {
67+
%val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
68+
%r = bitcast i32 %val to float
69+
ret float %r
70+
}
71+
72+
; FUNC-LABEL: {{^}}ds_ordered_add_fastcc:
73+
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
74+
; GCN: s_mov_b32 m0, 0{{$}}
75+
; VIGFX9-NEXT: s_nop 0
76+
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
77+
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
78+
define fastcc float @ds_ordered_add_fastcc() {
79+
%val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
80+
%r = bitcast i32 %val to float
81+
ret float %r
82+
}
83+
84+
; FUNC-LABEL: {{^}}ds_ordered_add_func:
85+
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
86+
; GCN: s_mov_b32 m0, 0{{$}}
87+
; VIGFX9-NEXT: s_nop 0
88+
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
89+
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
90+
define float @ds_ordered_add_func() {
91+
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
92+
%r = bitcast i32 %val to float
93+
ret float %r
94+
}
95+
6096
; FUNC-LABEL: {{^}}ds_ordered_add_ps:
6197
; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
6298
; GCN: s_mov_b32 m0, s0

0 commit comments

Comments
 (0)