Skip to content

Commit 9dbc968

Browse files
committed
[AMDGPU] Fix atomic float max/min intrinsics
Hooked up raw.buffer.atomic.fmin/max.f64 This instruction should be available on GFX6, GFX7, and GFX10. It was implemented for GFX90a with a different name. Added intrinsic def for image_atomic_fmin/fmax; the instruction defs were already there. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D108208 Change-Id: I473f98d28b2afbeeb2c27822d9686b5e86634e2f
1 parent fd51ab6 commit 9dbc968

File tree

3 files changed

+1280
-14
lines changed

3 files changed

+1280
-14
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -684,7 +684,14 @@ class AMDGPUDimAtomicProfile<string opmod,
684684
let IsAtomic = true;
685685
}
686686

687-
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
687+
class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
688+
list<AMDGPUArg> dataargs>
689+
: AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
690+
let RetTypes = [llvm_anyfloat_ty];
691+
}
692+
693+
class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
694+
: AMDGPUDimProfile<"GET_RESINFO", dim> {
688695
let RetTypes = [llvm_anyfloat_ty];
689696
let DataArgs = [];
690697
let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
@@ -860,26 +867,35 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
860867
// atomic intrinsics
861868
//////////////////////////////////////////////////////////////////////////
862869
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
863-
multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
864-
foreach dim = AMDGPUDims.All in {
865-
def !strconcat(NAME, "_", dim.Name)
866-
: AMDGPUImageDimIntrinsic<
867-
AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
868-
[], [SDNPMemOperand]>;
869-
}
870+
multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
871+
int isFloat = 0> {
872+
foreach dim = AMDGPUDims.All in {
873+
def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
874+
!if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
875+
AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
876+
[], [SDNPMemOperand]>;
877+
}
870878
}
871879

872-
multiclass AMDGPUImageDimAtomic<string opmod> {
873-
defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
880+
multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
881+
defm ""
882+
: AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
883+
isFloat>;
884+
}
885+
886+
multiclass AMDGPUImageDimFloatAtomic<string opmod> {
887+
defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
874888
}
875889

876890
defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
877891
defm int_amdgcn_image_atomic_add : AMDGPUImageDimAtomic<"ATOMIC_ADD">;
878892
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
879893
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
880894
defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
895+
defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">;
881896
defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
882897
defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
898+
defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">;
883899
defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
884900
defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
885901
defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
@@ -1015,8 +1031,10 @@ def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
10151031
def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
10161032
def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
10171033
def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
1034+
def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
10181035
def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
10191036
def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
1037+
def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
10201038
def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
10211039
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
10221040
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
@@ -1036,10 +1054,6 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
10361054
// gfx908 intrinsic
10371055
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
10381056

1039-
// gfx90a intrinsics
1040-
def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
1041-
def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
1042-
10431057
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
10441058
!if(NoRtn, [], [data_ty]),
10451059
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,6 +1438,13 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
14381438
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
14391439
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
14401440

1441+
let SubtargetPredicate = isGFX6GFX7GFX10 in {
1442+
defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">;
1443+
defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">;
1444+
defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">;
1445+
defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">;
1446+
}
1447+
14411448
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
14421449
(ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7),
14431450
(vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)),

0 commit comments

Comments
 (0)