diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 894d717bbbbd5..01751dfe9eb62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2763,7 +2763,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to promote this operator's result!"); - case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::BITCAST: + R = PromoteFloatRes_BITCAST(N); + break; + case ISD::FREEZE: + R = PromoteFloatRes_FREEZE(N); + break; case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; @@ -2876,6 +2881,18 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, Cast); } +SDValue DAGTypeLegalizer::PromoteFloatRes_FREEZE(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + // Input type isn't guaranteed to be a scalar int so bitcast if not. The + // bitcast will be legalized further if necessary. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), + N->getOperand(0).getValueType().getSizeInBits()); + SDValue Cast = DAG.getBitcast(IVT, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, + DAG.getFreeze(Cast)); +} + SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { ConstantFPSDNode *CFPNode = cast(N); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 50247cebb91b1..720393158aa5e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -756,6 +756,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void PromoteFloatResult(SDNode *N, unsigned ResNo); SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_FREEZE(SDNode *N); SDValue PromoteFloatRes_BinOp(SDNode *N); SDValue PromoteFloatRes_UnaryWithTwoFPResults(SDNode *N); SDValue PromoteFloatRes_ConstantFP(SDNode *N); diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll index 6f665d76b517b..b1732b905e4c1 100644 --- a/llvm/test/CodeGen/AMDGPU/freeze.ll +++ b/llvm/test/CodeGen/AMDGPU/freeze.ll @@ -1,10 +1,91 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX6,GFX6-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX7,GFX7-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s + +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX8-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s + ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s + ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -27,6 +108,80 @@ define void @freeze_v2i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 offset:8 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:8 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:8 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[4:6], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v3i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx3 v[4:6], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx3 v[2:3], v[4:6] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v3i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx3 v[4:6], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx3 v[2:3], v[4:6], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -49,6 +204,74 @@ define void @freeze_v3i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v4i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -71,6 +294,96 @@ define void @freeze_v4i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v5i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v5i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v5i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v8, v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v5i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v5i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dword v8, v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v8 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v5i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dword v8, v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v8, off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v5i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -125,6 +438,96 @@ define void @freeze_v5i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v6i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v6i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v6i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v6i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v6i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v6i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx2 v[8:9], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v6i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -179,6 +582,102 @@ define void @freeze_v6i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v7i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v10, v[0:1], s[4:7], 0 addr64 offset:24 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dword v10, v[2:3], s[4:7], 0 addr64 offset:24 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v7i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:24 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:24 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v7i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v7i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[8:10], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[8:10], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v7i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[8:10], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx3 v[0:1], v[8:10] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v7i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx3 v[8:10], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[8:10], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v7i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -233,6 +732,96 @@ define void @freeze_v7i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -287,6 +876,118 @@ define void @freeze_v8i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v9i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v9i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v9i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v12, v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dword v12, v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v9i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v9i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dword v14, v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dword v[12:13], v14 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v9i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dword v12, v[0:1], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v12, off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v9i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -353,6 +1054,118 @@ define void @freeze_v9i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v10i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v10i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v10i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v10i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v10i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[12:13], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[14:15], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v10i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx2 v[12:13], v[0:1], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[12:13], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v10i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -389,6 +1202,124 @@ define void @freeze_v10i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v11i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v14, v[0:1], s[4:7], 0 addr64 offset:40 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dword v14, v[2:3], s[4:7], 0 addr64 offset:40 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v11i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:40 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[12:13], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:40 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v11i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v11i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[12:14], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[12:14], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v11i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[12:14], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx3 v[15:16], v[12:14] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v11i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx3 v[12:14], v[0:1], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx3 v[2:3], v[12:14], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v11i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -425,6 +1356,118 @@ define void @freeze_v11i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v12i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v12i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v12i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v12i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v12i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v12i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v12i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -460,6 +1503,140 @@ define void @freeze_v12i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { ret void } define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v13i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v13i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v13i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v16, v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dword v16, v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v13i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v13i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dword v18, v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v18 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v13i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dword v16, v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v16, off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v13i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -538,6 +1715,140 @@ define void @freeze_v13i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v14i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v14i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v14i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v14i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v14i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[18:19], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v14i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx2 v[16:17], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[16:17], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v14i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -616,6 +1927,150 @@ define void @freeze_v14i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v15i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v15i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:56 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:56 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v15i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v18, v[0:1], s[4:7], 0 addr64 offset:56 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[16:17], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dword v18, v[2:3], s[4:7], 0 addr64 offset:56 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[16:17], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v15i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[16:18], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[16:18], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v15i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[16:18], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[16:18] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v15i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx3 v[16:18], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[16:18], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v15i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -694,6 +2149,141 @@ define void @freeze_v15i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -772,6 +2362,160 @@ define void @freeze_v16i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v17i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v17i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v17i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v20, v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dword v20, v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v17i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v17i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dword v20, v[18:19] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v20 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v17i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dword v20, v[0:1], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v20, off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v17i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -862,6 +2606,161 @@ define void @freeze_v17i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v18i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v18i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v18i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v18i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v18i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[18:19] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[20:21], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v18i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx2 v[20:21], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[20:21], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v18i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -952,6 +2851,169 @@ define void @freeze_v18i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v19i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v19i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v19i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v22, v[0:1], s[4:7], 0 addr64 offset:72 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[20:21], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dword v22, v[2:3], s[4:7], 0 addr64 offset:72 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[20:21], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v19i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[20:22], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[20:22], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v19i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[20:22], v[20:21] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx3 v[6:7], v[20:22] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v19i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx3 v[20:22], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[20:22], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v19i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1042,6 +3104,163 @@ define void @freeze_v19i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v20i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v20i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v20i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(4) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v20i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v20i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v20i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v20i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1132,6 +3351,185 @@ define void @freeze_v20i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v21i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v21i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v21i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dword v24, v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dword v24, v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v21i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v21i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dword v26, v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[24:25], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: flat_store_dword v[6:7], v26 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v21i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dword v24, v[0:1], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v24, off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v21i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1234,6 +3632,185 @@ define void @freeze_v21i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v22i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v22i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v22i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[24:25], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[24:25], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(5) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v22i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v22i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[24:25], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v26, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v27, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[26:27], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 0x50, v2 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(4) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[6:7], v[24:25] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v22i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx2 v[24:25], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(5) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[24:25], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v22i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1336,6 +3913,235 @@ define void @freeze_v22i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v30i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v30i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v30i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v30i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v30i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v34, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v34 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 +; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v32, vcc, v2, v34 +; GFX8-GISEL-NEXT: v_add_u32_e64 v34, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_addc_u32_e64 v33, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 0x60, v2 +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[34:35], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[32:33], v[24:27] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[28:31] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v30i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX9-GISEL-NEXT: s_nop 0 +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v30i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1462,6 +4268,239 @@ define void @freeze_v30i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v31i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v31i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(8) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(6) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v31i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dword v34, v[0:1], s[4:7], 0 addr64 offset:120 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[32:33], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dword v34, v[2:3], s[4:7], 0 addr64 offset:120 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[32:33], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(8) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v31i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[32:34], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx3 v[32:34], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v31i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v35, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v35 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 +; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[32:34], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v35 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx3 v[2:3], v[32:34] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v31i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX9-GISEL-NEXT: global_load_dwordx3 v[32:34], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx3 v[2:3], v[32:34], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v31i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1588,6 +4627,233 @@ define void @freeze_v31i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v32i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v32i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v32i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v32i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v32i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 +; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v32i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v32i32: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1714,6 +4980,74 @@ define void @freeze_v32i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i32: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i32: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1736,6 +5070,74 @@ define void @freeze_i32(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1758,6 +5160,74 @@ define void @freeze_i64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_float: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_float: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_float: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_float: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_float: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_float: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_float: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1780,6 +5250,74 @@ define void @freeze_float(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i128: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i128: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i128: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i128: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i128: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i128: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i128: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1802,6 +5340,96 @@ define void @freeze_i128(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i256: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i256: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i256: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i256: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_i256: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_i256: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_i256: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1856,6 +5484,74 @@ define void @freeze_i256(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ushort v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_short v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1878,6 +5574,74 @@ define void @freeze_i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1900,6 +5664,99 @@ define void @freeze_v2i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3i16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 +; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 +; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3i16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off +; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 +; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3i16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1946,6 +5803,74 @@ define void @freeze_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1968,6 +5893,74 @@ define void @freeze_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v8i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v8i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v8i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -1990,6 +5983,96 @@ define void @freeze_v8i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16i16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16i16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16i16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16i16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16i16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2044,6 +6127,74 @@ define void @freeze_v16i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ushort v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_short v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2066,6 +6217,74 @@ define void @freeze_f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2088,6 +6307,99 @@ define void @freeze_v2f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 +; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 +; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off +; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 +; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3f16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2134,6 +6446,74 @@ define void @freeze_v3f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2156,6 +6536,74 @@ define void @freeze_v4f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v8f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v8f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v8f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2178,6 +6626,96 @@ define void @freeze_v8f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16f16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16f16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16f16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16f16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16f16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16f16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16f16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2232,6 +6770,80 @@ define void @freeze_v16f16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_bf16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_bf16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_bf16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_bf16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ushort v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_short v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_short v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2254,6 +6866,74 @@ define void @freeze_bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2bf16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2bf16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2bf16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2bf16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2276,6 +6956,109 @@ define void @freeze_v2bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3bf16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX6-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX6-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX6-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX6-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16 +; GFX6-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3bf16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX6-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3bf16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v1, 1.0, v1 +; GFX7-SDAG-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v4 +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX7-SDAG-NEXT: v_alignbit_b32 v0, v4, v0, 16 +; GFX7-SDAG-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3bf16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_short v4, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX7-GISEL-NEXT: buffer_store_short v1, v[2:3], s[4:7], 0 addr64 offset:4 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3bf16: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 2, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 4, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 +; GFX8-GISEL-NEXT: flat_store_short v[4:5], v8 +; GFX8-GISEL-NEXT: flat_store_short v[6:7], v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3bf16: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off +; GFX9-GISEL-NEXT: global_store_short_d16_hi v[2:3], v0, off offset:2 +; GFX9-GISEL-NEXT: global_store_short v[2:3], v1, off offset:4 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3bf16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2322,6 +7105,74 @@ define void @freeze_v3bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4bf16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4bf16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4bf16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4bf16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v4bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2344,6 +7195,74 @@ define void @freeze_v4bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8bf16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8bf16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8bf16: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8bf16: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v8bf16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v8bf16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v8bf16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2366,6 +7285,74 @@ define void @freeze_v8bf16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_f64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_f64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_f64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_f64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2388,6 +7375,74 @@ define void @freeze_f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2f64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2f64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2f64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2f64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2f64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2410,6 +7465,96 @@ define void @freeze_v2f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3f64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3f64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3f64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2464,6 +7609,96 @@ define void @freeze_v3f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4f64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4f64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v4f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v4f64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2518,6 +7753,141 @@ define void @freeze_v4f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8f64: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8f64: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8f64: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8f64: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8f64: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8f64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8f64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2596,6 +7966,74 @@ define void @freeze_v8f64(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_p0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_p0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2618,6 +8056,74 @@ define void @freeze_p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2p0: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2p0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2640,6 +8146,105 @@ define void @freeze_v2p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3p0: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3p0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off +; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3p0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2699,6 +8304,96 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4p0: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v4p0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v4p0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2753,6 +8448,141 @@ define void @freeze_v4p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8p0: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8p0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8p0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2831,6 +8661,233 @@ define void @freeze_v8p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16p0: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16p0: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16p0: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16p0: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16p0: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 +; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16p0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16p0: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2957,6 +9014,74 @@ define void @freeze_v16p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_p1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_p1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_p1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -2979,6 +9104,74 @@ define void @freeze_p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2p1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2p1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2p1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3001,6 +9194,105 @@ define void @freeze_v2p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[8:9], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3p1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3p1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off +; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3p1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3060,6 +9352,96 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4p1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v4p1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v4p1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3114,6 +9496,141 @@ define void @freeze_v4p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8p1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[8:11] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8p1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8p1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3192,6 +9709,233 @@ define void @freeze_v8p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16p1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16p1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16p1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16p1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[16:19], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[20:23], v[0:1], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[24:27], v[0:1], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[28:31], v[0:1], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[32:35], v[0:1], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[8:11], v[2:3], s[4:7], 0 addr64 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[12:15], v[2:3], s[4:7], 0 addr64 offset:32 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[16:19], v[2:3], s[4:7], 0 addr64 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[20:23], v[2:3], s[4:7], 0 addr64 offset:64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[24:27], v[2:3], s[4:7], 0 addr64 offset:80 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[28:31], v[2:3], s[4:7], 0 addr64 offset:96 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[32:35], v[2:3], s[4:7], 0 addr64 offset:112 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16p1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 48, v0 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v20, vcc, 64, v0 +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v38, 0x50 +; GFX8-GISEL-NEXT: v_add_u32_e32 v24, vcc, v0, v38 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x60 +; GFX8-GISEL-NEXT: v_add_u32_e32 v28, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: v_mov_b32_e32 v14, 0x70 +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[12:15], v[12:13] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[16:19], v[16:17] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[20:23], v[20:21] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[24:27], v[24:25] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[28:31], v[28:29] +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[32:35], v[0:1] +; GFX8-GISEL-NEXT: v_add_u32_e32 v36, vcc, 16, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 32, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[36:37], v[4:7] +; GFX8-GISEL-NEXT: s_nop 0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 48, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[8:11] +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, v2, v38 +; GFX8-GISEL-NEXT: v_add_u32_e64 v8, s[4:5], 64, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v9, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: s_mov_b64 s[4:5], vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 0x60, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0x70, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e64 v7, s[4:5], 0, v3, s[4:5] +; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[12:15] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[16:19] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[8:9], v[20:23] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[6:7], v[24:27] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[10:11], v[28:31] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[32:35] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16p1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:32 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:48 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:64 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:80 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:96 +; GFX9-GISEL-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[8:11], off offset:16 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[12:15], off offset:32 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[16:19], off offset:48 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[20:23], off offset:64 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[24:27], off offset:80 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[28:31], off offset:96 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[32:35], off offset:112 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16p1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3318,6 +10062,45 @@ define void @freeze_v16p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-LABEL: freeze_p3: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 m0, -1 +; GFX6-NEXT: ds_read_b32 v0, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: ds_write_b32 v1, v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: freeze_p3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 m0, -1 +; GFX7-NEXT: ds_read_b32 v0, v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: ds_write_b32 v1, v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_p3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_read_b32 v0, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ds_write_b32 v1, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_p3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_read_b32 v0, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b32 v1, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_p3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3342,6 +10125,45 @@ define void @freeze_p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-LABEL: freeze_v2p3: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 m0, -1 +; GFX6-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: freeze_v2p3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 m0, -1 +; GFX7-NEXT: ds_read_b64 v[2:3], v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: ds_write_b64 v1, v[2:3] +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v2p3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_read_b64 v[2:3], v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ds_write_b64 v1, v[2:3] +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2p3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_read_b64 v[2:3], v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b64 v1, v[2:3] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2p3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3366,6 +10188,65 @@ define void @freeze_v2p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3p3: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 +; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX6-SDAG-NEXT: ds_read_b32 v4, v2 +; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-SDAG-NEXT: ds_write_b32 v0, v4 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-SDAG-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3p3: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v0 +; GFX6-GISEL-NEXT: ds_read_b32 v0, v0 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 8, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-GISEL-NEXT: ds_write_b32 v1, v0 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: freeze_v3p3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 m0, -1 +; GFX7-NEXT: ds_read_b96 v[2:4], v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: ds_write_b96 v1, v[2:4] +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v3p3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_read_b96 v[2:4], v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ds_write_b96 v1, v[2:4] +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v3p3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_read_b96 v[2:4], v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b96 v1, v[2:4] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v3p3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3390,6 +10271,50 @@ define void @freeze_v3p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-LABEL: freeze_v4p3: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 m0, -1 +; GFX6-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v0 +; GFX6-NEXT: ds_read_b64 v[4:5], v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-NEXT: ds_write_b64 v0, v[4:5] +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: freeze_v4p3: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 m0, -1 +; GFX7-NEXT: ds_read_b128 v[2:5], v0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: ds_write_b128 v1, v[2:5] +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_v4p3: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_read_b128 v[2:5], v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ds_write_b128 v1, v[2:5] +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4p3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ds_read_b128 v[2:5], v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ds_write_b128 v1, v[2:5] +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4p3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3414,6 +10339,105 @@ define void @freeze_v4p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8p3: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 +; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2 +; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4 +; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 +; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(2) +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX6-SDAG-NEXT: ds_write_b64 v1, v[6:7] +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[8:9] +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8p3: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 +; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v0 +; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4 +; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6 +; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(2) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1 +; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9] +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8p3: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:16 +; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:16 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(1) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8p3: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8p3: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8p3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(1) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8p3: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3468,6 +10492,164 @@ define void @freeze_v8p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16p3: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 +; GFX6-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 40, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 32, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 56, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 48, v0 +; GFX6-SDAG-NEXT: ds_read_b64 v[2:3], v2 +; GFX6-SDAG-NEXT: ds_read_b64 v[4:5], v4 +; GFX6-SDAG-NEXT: ds_read_b64 v[6:7], v6 +; GFX6-SDAG-NEXT: ds_read_b64 v[8:9], v0 +; GFX6-SDAG-NEXT: ds_read_b64 v[10:11], v10 +; GFX6-SDAG-NEXT: ds_read_b64 v[12:13], v12 +; GFX6-SDAG-NEXT: ds_read_b64 v[14:15], v14 +; GFX6-SDAG-NEXT: ds_read_b64 v[16:17], v16 +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 48, v1 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[10:11] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 56, v1 +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(1) +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[16:17] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 32, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[14:15] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 40, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[12:13] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[4:5] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 24, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[2:3] +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-SDAG-NEXT: ds_write_b64 v1, v[8:9] +; GFX6-SDAG-NEXT: ds_write_b64 v0, v[6:7] +; GFX6-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16p3: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 8, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 +; GFX6-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX6-GISEL-NEXT: ds_read_b64 v[2:3], v0 +; GFX6-GISEL-NEXT: ds_read_b64 v[4:5], v4 +; GFX6-GISEL-NEXT: ds_read_b64 v[6:7], v6 +; GFX6-GISEL-NEXT: ds_read_b64 v[8:9], v8 +; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v0 +; GFX6-GISEL-NEXT: ds_read_b64 v[10:11], v10 +; GFX6-GISEL-NEXT: ds_read_b64 v[12:13], v12 +; GFX6-GISEL-NEXT: ds_read_b64 v[14:15], v14 +; GFX6-GISEL-NEXT: ds_read_b64 v[16:17], v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 8, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[4:5] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 16, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[6:7] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 24, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[8:9] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 32, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[10:11] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 40, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[12:13] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 48, v1 +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(6) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[14:15] +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 56, v1 +; GFX6-GISEL-NEXT: ds_write_b64 v1, v[2:3] +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(7) +; GFX6-GISEL-NEXT: ds_write_b64 v0, v[16:17] +; GFX6-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16p3: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 m0, -1 +; GFX7-SDAG-NEXT: ds_read_b128 v[2:5], v0 offset:32 +; GFX7-SDAG-NEXT: ds_read_b128 v[6:9], v0 offset:48 +; GFX7-SDAG-NEXT: ds_read_b128 v[10:13], v0 +; GFX7-SDAG-NEXT: ds_read_b128 v[14:17], v0 offset:16 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[2:5] offset:32 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[6:9] offset:48 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[10:13] +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-SDAG-NEXT: ds_write_b128 v1, v[14:17] offset:16 +; GFX7-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16p3: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX7-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX7-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX7-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 +; GFX7-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX7-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 +; GFX7-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16p3: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_mov_b32 m0, -1 +; GFX8-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX8-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX8-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 +; GFX8-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX8-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16p3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: ds_read_b128 v[2:5], v0 +; GFX9-GISEL-NEXT: ds_read_b128 v[6:9], v0 offset:16 +; GFX9-GISEL-NEXT: ds_read_b128 v[10:13], v0 offset:32 +; GFX9-GISEL-NEXT: ds_read_b128 v[14:17], v0 offset:48 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[2:5] +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[6:9] offset:16 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[10:13] offset:32 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(3) +; GFX9-GISEL-NEXT: ds_write_b128 v1, v[14:17] offset:48 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16p3: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3546,6 +10728,42 @@ define void @freeze_v16p3(ptr addrspace(3) %ptra, ptr addrspace(3) %ptrb) { } define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-LABEL: freeze_p5: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-NEXT: s_waitcnt vmcnt(0) +; GFX6-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-LABEL: freeze_p5: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_p5: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3568,6 +10786,88 @@ define void @freeze_p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2p5: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2p5: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2p5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2p5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 4, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v2p5: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 4, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1) +; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v2p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v2p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3594,6 +10894,114 @@ define void @freeze_v2p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3p5: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3p5: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX6-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3p5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 8, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 8, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(2) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3p5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 8, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX7-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3p5: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 4, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 8, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(2) +; GFX8-GISEL-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v3p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(2) +; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v3p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3623,6 +11031,140 @@ define void @freeze_v3p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4p5: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4p5: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX6-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4p5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 8, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 4, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 12, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 4, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 12, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v3, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(3) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v7, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4p5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 4, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 8, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 12, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX7-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4p5: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 4, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 8, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 12, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: buffer_store_dword v2, v5, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: buffer_store_dword v3, v6, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(3) +; GFX8-GISEL-NEXT: buffer_store_dword v4, v7, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v4p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 +; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 +; GFX9-NEXT: s_waitcnt vmcnt(3) +; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v4p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3655,6 +11197,244 @@ define void @freeze_v4p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8p5: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX6-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8p5: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX6-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8p5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 24, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 20, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 16, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 12, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 8, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 4, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 28, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 4, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 8, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 12, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 16, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 20, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 24, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v15, vcc, 28, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(1) +; GFX7-SDAG-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v7, v9, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v6, v10, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v5, v11, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v4, v12, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v3, v13, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v2, v14, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v15, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8p5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v4, vcc, 12, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 16, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 20, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 24, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 28, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 4, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 8, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 12, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 16, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 20, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 24, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 28, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX7-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8p5: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 12, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 20, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 24, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 28, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 4, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 8, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 12, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 16, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 20, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 24, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 28, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v2, v9, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v3, v10, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v4, v11, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v5, v12, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v6, v13, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v7, v14, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(7) +; GFX8-GISEL-NEXT: buffer_store_dword v8, v15, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v8p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 +; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 +; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 +; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 +; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 +; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 +; GFX9-NEXT: s_waitcnt vmcnt(7) +; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v8p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3717,6 +11497,446 @@ define void @freeze_v8p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16p5: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0 +; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; GFX6-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX6-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX6-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX6-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1 +; GFX6-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9) +; GFX6-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX6-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(9) +; GFX6-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen +; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1 +; GFX6-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1 +; GFX6-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt expcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1 +; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1 +; GFX6-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16p5: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 +; GFX6-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(12) +; GFX6-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt expcnt(0) +; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen +; GFX6-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1 +; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX6-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16p5: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 16, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 12, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 8, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 4, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, 56, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 52, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 48, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v9, vcc, 44, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, 40, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 36, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v12, vcc, 28, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, 24, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, 20, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v4, v4, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v16, vcc, 32, v0 +; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; GFX7-SDAG-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 4, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 8, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 12, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX7-SDAG-NEXT: buffer_store_dword v6, v19, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX7-SDAG-NEXT: buffer_store_dword v7, v18, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX7-SDAG-NEXT: buffer_store_dword v8, v17, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 16, v1 +; GFX7-SDAG-NEXT: buffer_store_dword v5, v8, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 40, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v17, vcc, 20, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v7, vcc, 24, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v18, vcc, 28, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, 32, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v19, vcc, 36, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 44, v1 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9) +; GFX7-SDAG-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(7) +; GFX7-SDAG-NEXT: buffer_store_dword v14, v17, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v13, v7, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v12, v18, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(9) +; GFX7-SDAG-NEXT: buffer_store_dword v16, v6, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: buffer_store_dword v9, v8, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, 48, v1 +; GFX7-SDAG-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 52, v1 +; GFX7-SDAG-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen +; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, 56, v1 +; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 60, v1 +; GFX7-SDAG-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(14) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16p5: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 4, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 8, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 12, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 16, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 20, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, 24, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v9, vcc, 28, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v10, vcc, 32, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 36, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 40, v0 +; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 44, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 48, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v15, vcc, 52, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 56, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 60, v0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 4, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 8, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 12, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v17, vcc, 20, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(12) +; GFX7-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 40, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, 24, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v18, vcc, 28, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v5, vcc, 32, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v19, vcc, 36, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 44, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen +; GFX7-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 48, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 52, v1 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: v_add_i32_e32 v2, vcc, 56, v1 +; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 60, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX7-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16p5: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 4, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 8, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 12, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 16, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 20, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 24, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 28, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 32, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 36, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 40, v0 +; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 44, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v5, v5, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v6, v6, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v7, v7, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v8, v8, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v9, v9, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v10, v10, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v11, v11, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v12, v12, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_load_dword v13, v13, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 48, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v14, v14, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v15, vcc, 52, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v15, v15, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 56, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v16, v16, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 60, v0 +; GFX8-GISEL-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 4, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 8, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 12, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v17, vcc, 20, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(12) +; GFX8-GISEL-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 40, v1 +; GFX8-GISEL-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 24, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v18, vcc, 28, v1 +; GFX8-GISEL-NEXT: buffer_store_dword v5, v19, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 32, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v19, vcc, 36, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 44, v1 +; GFX8-GISEL-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v7, v17, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v9, v18, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_store_dword v10, v5, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v11, v19, s[0:3], 0 offen +; GFX8-GISEL-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v13, v6, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 48, v1 +; GFX8-GISEL-NEXT: buffer_store_dword v14, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 52, v1 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v15, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 56, v1 +; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 60, v1 +; GFX8-GISEL-NEXT: buffer_store_dword v16, v2, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(14) +; GFX8-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_v16p5: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen +; GFX9-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen offset:4 +; GFX9-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:8 +; GFX9-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:12 +; GFX9-NEXT: buffer_load_dword v6, v0, s[0:3], 0 offen offset:16 +; GFX9-NEXT: buffer_load_dword v7, v0, s[0:3], 0 offen offset:20 +; GFX9-NEXT: buffer_load_dword v8, v0, s[0:3], 0 offen offset:24 +; GFX9-NEXT: buffer_load_dword v9, v0, s[0:3], 0 offen offset:28 +; GFX9-NEXT: buffer_load_dword v10, v0, s[0:3], 0 offen offset:32 +; GFX9-NEXT: buffer_load_dword v11, v0, s[0:3], 0 offen offset:36 +; GFX9-NEXT: buffer_load_dword v12, v0, s[0:3], 0 offen offset:40 +; GFX9-NEXT: buffer_load_dword v13, v0, s[0:3], 0 offen offset:44 +; GFX9-NEXT: buffer_load_dword v14, v0, s[0:3], 0 offen offset:48 +; GFX9-NEXT: buffer_load_dword v15, v0, s[0:3], 0 offen offset:52 +; GFX9-NEXT: buffer_load_dword v16, v0, s[0:3], 0 offen offset:56 +; GFX9-NEXT: s_nop 0 +; GFX9-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen offset:8 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen offset:12 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen offset:16 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen offset:20 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen offset:24 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen offset:28 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen offset:32 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen offset:36 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v12, v1, s[0:3], 0 offen offset:40 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v13, v1, s[0:3], 0 offen offset:44 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v14, v1, s[0:3], 0 offen offset:48 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v15, v1, s[0:3], 0 offen offset:52 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v16, v1, s[0:3], 0 offen offset:56 +; GFX9-NEXT: s_waitcnt vmcnt(15) +; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen offset:60 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_v16p5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3819,6 +12039,74 @@ define void @freeze_v16p5(ptr addrspace(5) %ptra, ptr addrspace(5) %ptrb) { } define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i8: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ubyte v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: flat_store_byte v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i8: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_byte v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3841,6 +12129,92 @@ define void @freeze_i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v2i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_ushort v0, v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 +; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: flat_store_short v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v2i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 +; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v2i8: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3888,6 +12262,137 @@ define void @freeze_v2i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX6-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX6-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX6-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX6-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX7-SDAG-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v4 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX7-GISEL-NEXT: buffer_store_short v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: buffer_store_byte v1, v[2:3], s[4:7], 0 addr64 offset:2 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dword v4, v[0:1] +; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xff +; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 2, v2 +; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v4 +; GFX8-GISEL-NEXT: v_and_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v6, 8, v6 +; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4 +; GFX8-GISEL-NEXT: flat_store_short v[2:3], v4 +; GFX8-GISEL-NEXT: flat_store_byte v[0:1], v5 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4 +; GFX9-GISEL-NEXT: v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX9-GISEL-NEXT: global_store_short v[2:3], v0, off +; GFX9-GISEL-NEXT: global_store_byte_d16_hi v[2:3], v0, off offset:2 +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3i8: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3959,6 +12464,117 @@ define void @freeze_v3i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 24, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v5 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dword v0, v[0:1] +; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0xff +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v6 +; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-GISEL-NEXT: flat_store_dword v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v4i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dword v0, v[0:1], off +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 8 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0xff +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v0 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_and_b32_sdwa v6, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_or_b32 v4, v0, v1, v4 +; GFX9-GISEL-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_or3_b32 v0, v4, v6, v0 +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v4i8: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4016,6 +12632,156 @@ define void @freeze_v4i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v8i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v8i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v8i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v8i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 8, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 8, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v7, 8, v7 +; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v7 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v5 +; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v8 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v1, v9 +; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v8i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v4, 0xff +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0 +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v4, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v8 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v10 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v8i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 8 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0xff +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v6, 8, v0 +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v7, 8, v1 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v5, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_and_b32_sdwa v8, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v9, v0, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v10, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v11, v1, v4 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v6 +; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v5 +; GFX9-GISEL-NEXT: v_or3_b32 v0, v0, v8, v9 +; GFX9-GISEL-NEXT: v_or3_b32 v1, v1, v10, v11 +; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v8i8: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4090,6 +12856,234 @@ define void @freeze_v8i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v16i8: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v16i8: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX6-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX6-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX6-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX6-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX6-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX6-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX6-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX6-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX6-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX6-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v5, v9 +; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v6, v12 +; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v15 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v1, v4, v10 +; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v5, v13 +; GFX6-GISEL-NEXT: v_or_b32_e32 v9, v6, v16 +; GFX6-GISEL-NEXT: v_or_b32_e32 v4, v0, v8 +; GFX6-GISEL-NEXT: v_or_b32_e32 v5, v1, v11 +; GFX6-GISEL-NEXT: v_or_b32_e32 v6, v7, v14 +; GFX6-GISEL-NEXT: v_or_b32_e32 v7, v9, v17 +; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v16i8: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v16i8: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v0, 8, v4 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v12, 8, v6 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v15, 8, v7 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v4 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v5 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v13, 16, v6 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v16, 16, v7 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX7-GISEL-NEXT: v_and_b32_e32 v12, 0xff, v12 +; GFX7-GISEL-NEXT: v_and_b32_e32 v15, 0xff, v15 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v8, 24, v4 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v5 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v14, 24, v6 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v17, 24, v7 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v4 +; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v5 +; GFX7-GISEL-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-GISEL-NEXT: v_and_b32_e32 v7, 0xff, v7 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v10, 0xff, v10 +; GFX7-GISEL-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX7-GISEL-NEXT: v_and_b32_e32 v16, 0xff, v16 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v0, 8, v0 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v12, 8, v12 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v15, 8, v15 +; GFX7-GISEL-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX7-GISEL-NEXT: v_and_b32_e32 v11, 0xff, v11 +; GFX7-GISEL-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX7-GISEL-NEXT: v_and_b32_e32 v17, 0xff, v17 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v13, 16, v13 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v5, v9 +; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v6, v12 +; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v15 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v8, 24, v8 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v14, 24, v14 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v17, 24, v17 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v1, v4, v10 +; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v5, v13 +; GFX7-GISEL-NEXT: v_or_b32_e32 v9, v6, v16 +; GFX7-GISEL-NEXT: v_or_b32_e32 v4, v0, v8 +; GFX7-GISEL-NEXT: v_or_b32_e32 v5, v1, v11 +; GFX7-GISEL-NEXT: v_or_b32_e32 v6, v7, v14 +; GFX7-GISEL-NEXT: v_or_b32_e32 v7, v9, v17 +; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v16i8: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 8 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xff +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4 +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6 +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v6, v6, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_and_b32_sdwa v0, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v12 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v14 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v16 +; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v1, v18 +; GFX8-GISEL-NEXT: v_or_b32_e32 v4, v4, v13 +; GFX8-GISEL-NEXT: v_or_b32_e32 v5, v5, v15 +; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v17 +; GFX8-GISEL-NEXT: v_or_b32_e32 v7, v1, v0 +; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v16i8: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 8 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xff +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v4 +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v5 +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v10, 8, v6 +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v11, 8, v7 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v9, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v10, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v1, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-GISEL-NEXT: v_and_b32_sdwa v12, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v13, v4, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v14, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v15, v5, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v16, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v17, v6, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v18, v7, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_b32_sdwa v19, v7, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-GISEL-NEXT: v_and_or_b32 v4, v4, v0, v8 +; GFX9-GISEL-NEXT: v_and_or_b32 v5, v5, v0, v9 +; GFX9-GISEL-NEXT: v_and_or_b32 v6, v6, v0, v10 +; GFX9-GISEL-NEXT: v_and_or_b32 v0, v7, v0, v1 +; GFX9-GISEL-NEXT: v_or3_b32 v4, v4, v12, v13 +; GFX9-GISEL-NEXT: v_or3_b32 v5, v5, v14, v15 +; GFX9-GISEL-NEXT: v_or3_b32 v6, v6, v16, v17 +; GFX9-GISEL-NEXT: v_or3_b32 v7, v0, v18, v19 +; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v16i8: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4198,6 +13192,80 @@ define void @freeze_v16i8(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i1: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_ubyte v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-NEXT: flat_store_byte v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-NEXT: global_store_byte v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4222,6 +13290,100 @@ define void @freeze_i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2i1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2i1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2i1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2i1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v2i1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v2i1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v2i1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4274,6 +13436,116 @@ define void @freeze_v2i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3i1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3i1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX6-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3i1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3i1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX7-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3i1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_ubyte v0, v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3i1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v1, 1, v0 +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v4, 2, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3i1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4334,6 +13606,86 @@ define void @freeze_v3i1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_i1_vcc: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_i1_vcc: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_i1_vcc: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_i1_vcc: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: freeze_i1_vcc: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: flat_load_dword v0, v[0:1] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-NEXT: flat_store_byte v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: freeze_i1_vcc: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-NEXT: global_store_byte v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-LABEL: freeze_i1_vcc: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4361,6 +13713,124 @@ define void @freeze_i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v2i1_vcc: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v2i1_vcc: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v2i1_vcc: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v2i1_vcc: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v2i1_vcc: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v2i1_vcc: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 3, v0 +; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v2i1_vcc: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4432,6 +13902,152 @@ define void @freeze_v2i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v3i1_vcc: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v3i1_vcc: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v3i1_vcc: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v3i1_vcc: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx3 v[4:6], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v3i1_vcc: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx3 v[4:6], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v3i1_vcc: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx3 v[4:6], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 2, v4 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v3i1_vcc: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4521,6 +14137,180 @@ define void @freeze_v3i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { } define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { +; GFX6-SDAG-LABEL: freeze_v4i1_vcc: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX6-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX6-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX6-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5 +; GFX6-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX6-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: freeze_v4i1_vcc: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX6-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX6-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX6-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX6-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5 +; GFX6-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX6-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX6-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: freeze_v4i1_vcc: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_mov_b32 s6, 0 +; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-SDAG-NEXT: s_mov_b32 s4, s6 +; GFX7-SDAG-NEXT: s_mov_b32 s5, s6 +; GFX7-SDAG-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v4 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v1, 3, v5 +; GFX7-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX7-SDAG-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-GISEL-LABEL: freeze_v4i1_vcc: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-GISEL-NEXT: s_mov_b32 s6, 0 +; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX7-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX7-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v4, 2, v4 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX7-GISEL-NEXT: v_lshlrev_b32_e32 v1, 3, v5 +; GFX7-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX7-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX7-GISEL-NEXT: buffer_store_byte v0, v[2:3], s[4:7], 0 addr64 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: freeze_v4i1_vcc: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX8-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX8-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX8-GISEL-NEXT: flat_store_byte v[2:3], v0 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: freeze_v4i1_vcc: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GFX9-GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX9-GISEL-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 1, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v5, 1, v5 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v4, 2, v4 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX9-GISEL-NEXT: v_lshlrev_b16_e32 v1, 3, v5 +; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX9-GISEL-NEXT: global_store_byte v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; ; GFX10-SDAG-LABEL: freeze_v4i1_vcc: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -4626,3 +14416,5 @@ define void @freeze_v4i1_vcc(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) { store <4 x i1> %freeze, ptr addrspace(1) %ptrb ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX8-SDAG: {{.*}}