AMDGPU: Update tests to use modern buffer intrinsics

arsenm · arsenm · commit 20ca49b646b7 · 2020-01-16T13:49:43.000-05:00
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -77,13 +77,13 @@ entry:
   %e = getelementptr [2 x i32], [2 x i32] addrspace(5)* %v1a, i32 0, i32 %idx
   %x = load i32, i32 addrspace(5)* %e
   %xf = bitcast i32 %x to float
-  call void @llvm.amdgcn.buffer.store.f32(float %xf, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.raw.buffer.store.f32(float %xf, <4 x i32> undef, i32 0, i32 0, i32 0)
   ret void
 }
 
 attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1)
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
 
 
 ; Check we have CS_NUM_USED_VGPRS in PAL metadata.
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -49,12 +49,12 @@ else:
 
 end:
   %r = phi float [ %v.if, %if ], [ %v.else, %else ]
-  call void @llvm.amdgcn.buffer.store.f32(float %r, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.raw.buffer.store.f32(float %r, <4 x i32> undef, i32 0, i32 0, i32 0)
   ret void
 }
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -29,7 +29,7 @@ for.body:
   br i1 %cc, label %mid.loop, label %for.end
 
 mid.loop:
-  %v = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %tid, i32 %i, i1 false, i1 false)
+  %v = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %tid, i32 %i, i32 0, i32 0)
   %cc2 = fcmp oge float %v, 0.0
   br i1 %cc2, label %end.loop, label %for.end
 
@@ -48,7 +48,7 @@ end:
   ret void
 }
 
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -9,8 +9,8 @@
 ;CHECK: v_add_f32_e32
 define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) {
 main_body:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
   ret float %out.0
@@ -25,8 +25,8 @@ main_body:
 ;CHECK: v_add_f32_e32
 define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) {
 main_body:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %out.0 = bitcast float %out to i32
   %out.1 = call i32 @llvm.amdgcn.softwqm.i32(i32 %out.0)
@@ -45,10 +45,10 @@ main_body:
 ;CHECK: v_add_f32_e32
 define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) {
 main_body:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %temp = fadd float %src0, %src1
-  call void @llvm.amdgcn.buffer.store.f32(float %temp, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.struct.buffer.store.f32(float %temp, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
   %out = fadd float %temp, %temp
   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
   ret float %out.0
@@ -67,11 +67,11 @@ main_body:
 ;CHECK: v_add_f32_e32
 define amdgpu_ps float @test_softwqm2(i32 inreg %idx0, i32 inreg %idx1) {
 main_body:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %temp = fadd float %src0, %src1
   %temp.0 = call float @llvm.amdgcn.wqm.f32(float %temp)
-  call void @llvm.amdgcn.buffer.store.f32(float %temp.0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.struct.buffer.store.f32(float %temp.0, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
   %out = fadd float %temp, %temp
   %out.0 = call float @llvm.amdgcn.softwqm.f32(float %out)
   ret float %out.0
@@ -89,9 +89,9 @@ main_body:
 ;CHECK-NOT: s_wqm_b64
 define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) {
 main_body:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  call void @llvm.amdgcn.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  call void @llvm.amdgcn.struct.buffer.store.f32(float %src0, <4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %temp = fadd float %src0, %src1
   %temp.0 = call float @llvm.amdgcn.wwm.f32(float %temp)
   %out = fadd float %temp.0, %temp.0
@@ -115,14 +115,14 @@ main_body:
   br i1 %cmp, label %IF, label %ELSE
 
 IF:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
   br label %END
 
 ELSE:
-  call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.struct.buffer.store.f32(float %data, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
   br label %END
 
 END:
@@ -157,24 +157,24 @@ main_body:
   br i1 %cmp, label %IF, label %ELSE
 
 IF:
-  %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
-  %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+  %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0)
+  %src1 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i32 0, i32 0)
   %out = fadd float %src0, %src1
   %data.if = call float @llvm.amdgcn.softwqm.f32(float %out)
   br label %END
 
 ELSE:
-  call void @llvm.amdgcn.buffer.store.f32(float %data.sample, <4 x i32> undef, i32 %c, i32 0, i1 0, i1 0)
+  call void @llvm.amdgcn.struct.buffer.store.f32(float %data.sample, <4 x i32> undef, i32 %c, i32 0, i32 0, i32 0)
   br label %END
 
 END:
   %r = phi float [ %data.if, %IF ], [ %data, %ELSE ]
   ret float %r
 }
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
+declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
+declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
+declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #3
 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
 declare void @llvm.amdgcn.kill(i1) #1
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -22,7 +22,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
+  call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
@@ -49,7 +49,7 @@ loop:
   %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
   %tmp23 = add nuw i32 %tmp23phi, 1
   %tmp27 = icmp ult i32 %arg, %tmp23
-  call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
+  call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
   br i1 %tmp27, label %loop, label %loopexit
 
 loopexit:
@@ -76,7 +76,7 @@ loop:
   br i1 %tmp27, label %then, label %endif
 
 then:                                             ; preds = %bb
-  call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i1 false, i1 false) #1
+  call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0)
   br label %endif
 
 endif:                                             ; preds = %bb28, %bb
@@ -86,8 +86,6 @@ loopexit:
   ret void
 }
 
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3
-
-attributes #3 = { nounwind writeonly }
-
+attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -42,7 +42,7 @@
 ; W32: v_mov_b32_e32 v0, [[RES]]
 
 define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
-  %call = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
+  %call = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
   ret float %call
 }
 
@@ -128,8 +128,8 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 
 define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %out0, float addrspace(1)* %out1) #0 {
 entry:
-  %val0 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
-  %val1 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i1 zeroext false, i1 zeroext false) #1
+  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %c, i32 0, i32 0, i32 0) #1
+  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %c, i32 0, i32 0, i32 0) #1
   store volatile float %val0, float addrspace(1)* %out0
   store volatile float %val1, float addrspace(1)* %out1
   ret void
@@ -317,13 +317,13 @@ entry:
 define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %in, float addrspace(1)* %out) #0 {
 entry:
   %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" ()
-  %val0 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i1 zeroext false, i1 zeroext false) #1
+  %val0 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %i, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
   %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
   %cmp = icmp eq i32 %idx, 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-  %val1 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i1 zeroext false, i1 zeroext false) #1
+  %val1 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %j, i32 %live.out.reg, i32 0, i32 0, i32 0) #1
   br label %bb2
 
 bb2:
@@ -333,7 +333,7 @@ bb2:
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
@@ -9,7 +9,7 @@ define amdgpu_vs float @test_none(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
   %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i1 0, i1 0)
+  %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -19,7 +19,7 @@ define amdgpu_vs float @test_idxen(<4 x i32> addrspace(4)* inreg %base, i32 %i)
 main_body:
   %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
   %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i1 0, i1 0)
+  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -29,7 +29,7 @@ define amdgpu_vs float @test_offen(<4 x i32> addrspace(4)* inreg %base, i32 %i)
 main_body:
   %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
   %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 0, i32 undef, i1 0, i1 0)
+  %tmp7 = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 0, i32 0)
   ret float %tmp7
 }
 
@@ -39,10 +39,12 @@ define amdgpu_vs float @test_both(<4 x i32> addrspace(4)* inreg %base, i32 %i) {
 main_body:
   %ptr = getelementptr <4 x i32>, <4 x i32> addrspace(4)* %base, i32 %i
   %tmp2 = load <4 x i32>, <4 x i32> addrspace(4)* %ptr, align 32
-  %tmp7 = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i1 0, i1 0)
+  %tmp7 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %tmp2, i32 undef, i32 undef, i32 0, i32 0)
   ret float %tmp7
 }
 
-declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) nounwind readonly
+declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
+declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #1
 
 attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
@@ -8,7 +8,7 @@
 ; CHECK: s_cbranch_vccnz
 define amdgpu_ps float @main(<4 x i32> inreg %rsrc) {
 main_body:
-  %v = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 true, i1 false)
+  %v = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 1)
   %cc = fcmp une float %v, 1.000000e+00
   br i1 %cc, label %if, label %else
 
@@ -22,7 +22,6 @@ else:
   ret float %r
 }
 
-; Function Attrs: nounwind readonly
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
+declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) #0
 
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -39,9 +39,10 @@ entry:
 
 bb0:
   %desc = load <4 x i32>, <4 x i32>* %arg, align 8
-  tail call void @llvm.amdgcn.buffer.store.f32(float undef, <4 x i32> %desc, i32 0, i32 undef, i1 zeroext false, i1 zeroext false)
+  tail call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> %desc, i32 undef, i32 0, i32 0)
   br label %bb0
 }
 
-; Function Attrs: nounwind writeonly
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1 immarg, i1 immarg)
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
+
+attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -38,7 +38,7 @@ bb:
   %tmp16 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp15, align 16, !tbaa !0
   %tmp17 = add i32 %arg5, %arg7
   %tmp16.cast = bitcast <4 x i32> %tmp16 to <4 x i32>
-  %tmp18 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i1 false, i1 false)
+  %tmp18 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %tmp16.cast, i32 %tmp17, i32 0, i32 0, i32 0)
   %tmp19 = extractelement <4 x float> %tmp18, i32 0
   %tmp20 = extractelement <4 x float> %tmp18, i32 1
   %tmp21 = extractelement <4 x float> %tmp18, i32 2
@@ -489,7 +489,7 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 
 declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll