Skip to content

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Aug 24, 2025

This function is used to implement TargetInstrInfo::getRegClass and
conceptually should not depend on the dynamic state of the function.

arsenm added 2 commits August 23, 2025 23:44
This was not checking the alignment requirement for 64-bit
operands which accept inline immediates. Not all custom operand
types were handled in the switch, so round out with explicit
handling of all enum values, and change the default to use
the default checks for unhandled cases.

Fixes #155095
…egClass

This function is used to implement TargetInstrInfo::getOpRegClass and
conceptually should not depend on the dynamic state of the function.
Copy link
Contributor Author

arsenm commented Aug 24, 2025

@llvmbot
Copy link
Member

llvmbot commented Aug 24, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

This function is used to implement TargetInstrInfo::getOpRegClass and
conceptually should not depend on the dynamic state of the function.


Patch is 78.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155125.diff

23 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll (+18-18)
  • (modified) llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll (+6-5)
  • (modified) llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll (+16-16)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+12-6)
  • (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+78-78)
  • (modified) llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll (+6-6)
  • (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/global-i16-load-store.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/infer-addrspace-flat-atomic.ll (+6-6)
  • (modified) llvm/test/CodeGen/AMDGPU/mad_64_32.ll (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs.ll (+6-6)
  • (modified) llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll (+2-1)
  • (modified) llvm/test/CodeGen/AMDGPU/preload-kernargs.ll (+12-12)
  • (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+6-7)
  • (modified) llvm/test/CodeGen/AMDGPU/store-to-constant.ll (+10-10)
  • (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+2-8)
  • (modified) llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll (+33-32)
  • (modified) llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll (+4-4)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba37bdb203a7f..3b5b3687967c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5942,7 +5942,7 @@ adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
                           const MachineRegisterInfo &MRI,
                           const MCInstrDesc &TID, unsigned RCID,
                           bool IsAllocatable) {
-  if ((IsAllocatable || !ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
+  if ((IsAllocatable || !ST.hasGFX90AInsts()) &&
       (((TID.mayLoad() || TID.mayStore()) &&
         !(TID.TSFlags & SIInstrFlags::Spill)) ||
        (TID.TSFlags & (SIInstrFlags::DS | SIInstrFlags::MIMG)))) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
index 1da8cd6646d53..9666085341d83 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-branch-weight-metadata.ll
@@ -14,9 +14,9 @@ define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) n
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  .LBB0_2: ; %if.end
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -61,9 +61,9 @@ define void @uniform_br_same_weight(i32 noundef inreg %value, ptr addrspace(8) n
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  .LBB1_2: ; %if.end
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -108,9 +108,9 @@ define void @uniform_br_then_likely(i32 noundef inreg %value, ptr addrspace(8) n
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  .LBB2_2: ; %if.end
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -156,9 +156,9 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  .LBB3_2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -227,9 +227,9 @@ define void @divergent_br_same_weight(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  .LBB4_2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -297,9 +297,9 @@ define void @divergent_br_then_likely(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    s_mov_b32 s6, s19
 ; GFX9-NEXT:    s_mov_b32 s5, s18
 ; GFX9-NEXT:    s_mov_b32 s4, s17
-; GFX9-NEXT:    v_mov_b32_e32 v0, s16
-; GFX9-NEXT:    v_mov_b32_e32 v1, s21
-; GFX9-NEXT:    buffer_store_dword v0, v1, s[4:7], 0 offen
+; GFX9-NEXT:    v_mov_b32_e32 v1, s16
+; GFX9-NEXT:    v_mov_b32_e32 v0, s21
+; GFX9-NEXT:    buffer_store_dword v1, v0, s[4:7], 0 offen
 ; GFX9-NEXT:  ; %bb.2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
index 51caa84450ff3..8b2f0a7ed63c1 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
@@ -166,13 +166,14 @@ define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addr
 ; GFX942-ARCH-FLAT:       ; %bb.0:
 ; GFX942-ARCH-FLAT-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX942-ARCH-FLAT-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942-ARCH-FLAT-NEXT:    s_mov_b32 s0, 0
+; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX942-ARCH-FLAT-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX942-ARCH-FLAT-NEXT:    s_cmp_lg_u32 s2, -1
-; GFX942-ARCH-FLAT-NEXT:    s_cselect_b32 s0, s1, 0
-; GFX942-ARCH-FLAT-NEXT:    s_cselect_b32 s1, s2, 0
-; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v0, s1
-; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v1, s0
+; GFX942-ARCH-FLAT-NEXT:    s_cselect_b32 s1, s1, 0
+; GFX942-ARCH-FLAT-NEXT:    s_cselect_b32 s2, s2, 0
+; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v0, s2
+; GFX942-ARCH-FLAT-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX942-ARCH-FLAT-NEXT:    flat_store_dword v[0:1], v2 sc0 sc1
 ; GFX942-ARCH-FLAT-NEXT:    s_waitcnt vmcnt(0)
 ; GFX942-ARCH-FLAT-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll b/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll
index 614b1e38a530f..37040123ee20c 100644
--- a/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll
@@ -8,7 +8,7 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
 ; CHECK-NEXT:    s_load_dwordx2 s[10:11], s[4:5], 0x0
 ; CHECK-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x10
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0x3e21eeb6
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0xa17f65f6
+; CHECK-NEXT:    v_mov_b32_e32 v20, 0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_bitcmp1_b32 s0, 0
 ; CHECK-NEXT:    s_cselect_b64 s[16:17], -1, 0
@@ -22,6 +22,7 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
 ; CHECK-NEXT:    s_xor_b64 s[20:21], s[2:3], -1
 ; CHECK-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
 ; CHECK-NEXT:    v_accvgpr_write_b32 a2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0xa17f65f6
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 0xbe927e4f
 ; CHECK-NEXT:    v_mov_b32_e32 v4, 0x19f4ec90
 ; CHECK-NEXT:    v_mov_b32_e32 v5, 0x3efa01a0
@@ -42,7 +43,7 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
 ; CHECK-NEXT:    v_mov_b32_e32 v18, 0x55555523
 ; CHECK-NEXT:    v_mov_b32_e32 v19, 0xbfd55555
 ; CHECK-NEXT:    s_and_b64 s[6:7], exec, s[18:19]
-; CHECK-NEXT:    v_mov_b32_e32 v20, 0
+; CHECK-NEXT:    v_mov_b32_e32 v21, v20
 ; CHECK-NEXT:    ; implicit-def: $vgpr30_vgpr31
 ; CHECK-NEXT:    ; implicit-def: $vgpr22_vgpr23
 ; CHECK-NEXT:    s_branch .LBB0_2
@@ -155,7 +156,6 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
 ; CHECK-NEXT:  ; %bb.16: ; %._crit_edge2105.i.i.i2330
 ; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    v_mov_b32_e32 v21, v20
 ; CHECK-NEXT:    s_mov_b64 s[24:25], 0
 ; CHECK-NEXT:    global_store_dwordx2 v20, v[20:21], s[12:13]
 ; CHECK-NEXT:    s_branch .LBB0_1
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
index 96b191d5acead..2ce54f8a463c7 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
@@ -18,7 +18,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i3
   ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
@@ -40,7 +40,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32
   ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -62,7 +62,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32
   ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
@@ -86,7 +86,7 @@ define amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
@@ -107,7 +107,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32
   ; GFX90A_GFX942-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
   ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
@@ -135,7 +135,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32>
   ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
   ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
@@ -163,7 +163,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32>
   ; GFX90A_GFX942-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
   ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
@@ -193,7 +193,7 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+  ; GFX90A_GFX942-NEXT:   [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE1]]
   ; GFX90A_GFX942-NEXT:   [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE2]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
   ; GFX90A_GFX942-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
@@ -226,7 +226,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr
   ; GFX90A_GFX942-NEXT:   [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
@@ -254,7 +254,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr
   ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
@@ -282,7 +282,7 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr
   ; GFX90A_GFX942-NEXT:   [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
   ; GFX90A_GFX942-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
-  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+  ; GFX90A_GFX942-NEXT:   [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]]
   ; GFX90A_GFX942-NEXT:   BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
   ; GFX90A_GFX942-NEXT:   S_ENDPGM 0
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %v...
[truncated]

Base automatically changed from users/arsenm/amdgpu/add-machine-verifier-test-missed-unaligned-check to main August 25, 2025 08:43
@arsenm arsenm merged commit 05f208a into main Aug 26, 2025
11 of 13 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/stop-checking-reservedRegsFrozen-in-adjustAllocatableRegClass branch August 26, 2025 11:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants