diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll index 3b4db4a4075fd..d45705edce2c8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll @@ -18,6 +18,16 @@ define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -74,6 +84,16 @@ define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_cube: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_cube: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -132,6 +152,16 @@ define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_2darray: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4 v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_2darray: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -190,6 +220,16 @@ define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -246,6 +286,16 @@ define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_cl v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_cl_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -306,6 +356,16 @@ define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_cl_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -362,6 +422,16 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_b_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_b_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -418,6 +488,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_b_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_b_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -476,6 +556,16 @@ define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_b_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_b_cl_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -538,6 +628,16 @@ define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_b_cl_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: s_mov_b32 s12, exec_lo +; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_b_cl_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: s_mov_b32 s12, exec_lo @@ -591,6 +691,13 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_l_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: image_gather4_l v[0:3], [v0, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_l_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l @@ -636,6 +743,13 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_l_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_l_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l @@ -677,6 +791,13 @@ define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_lz_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX10-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_lz_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l @@ -718,6 +839,13 @@ define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> i ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: ; return to shader part epilog ; +; GFX10-LABEL: gather4_c_lz_2d: +; GFX10: ; %bb.0: ; %main_body +; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX10-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: ; return to shader part epilog +; ; GFX11-TRUE16-LABEL: gather4_c_lz_2d: ; GFX11-TRUE16: ; %bb.0: ; %main_body ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l @@ -773,5 +901,4 @@ attributes #0 = { nounwind } attributes #1 = { nounwind readonly } attributes #2 = { nounwind readnone } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX10: {{.*}} ; GFX12: {{.*}}