Skip to content

Conversation

broxigarchen
Copy link
Contributor

@broxigarchen broxigarchen commented Jul 23, 2025

asm display opsel string in true16 mode when hi16 is used

@broxigarchen broxigarchen marked this pull request as ready for review July 24, 2025 13:57
@llvmbot llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Jul 24, 2025
@broxigarchen broxigarchen requested review from Sisyph and arsenm July 24, 2025 13:58
@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-mc

Author: Brox Chen (broxigarchen)

Changes

asm add opsel in true16 mode when hi16 is used


Full diff: https://github.com/llvm/llvm-project/pull/150315.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+20-5)
  • (modified) llvm/test/MC/AMDGPU/vinterp.s (+18-18)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 421fc429048ff..55d5e33d5bf9d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9196,6 +9196,7 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
 {
   OptionalImmIndexMap OptionalIdx;
   unsigned Opc = Inst.getOpcode();
+  const MCRegisterInfo *MRI = getMRI();
 
   unsigned I = 1;
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
@@ -9237,15 +9238,29 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
     if (OpIdx == -1)
       break;
+    const MCOperand &SrcOp = Inst.getOperand(OpIdx);
 
     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
 
-    if ((OpSel & (1 << J)) != 0)
-      ModVal |= SISrcMods::OP_SEL_0;
-    if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
-        (OpSel & (1 << 3)) != 0)
-      ModVal |= SISrcMods::DST_OP_SEL;
+    if (SrcOp.isReg() &&
+        MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(SrcOp.getReg())) {
+      if (AMDGPU::isHi16Reg(SrcOp.getReg(), *MRI))
+        ModVal |= SISrcMods::OP_SEL_0;
+    } else {
+      if ((OpSel & (1 << J)) != 0)
+        ModVal |= SISrcMods::OP_SEL_0;
+    }
+    if (ModOps[J] == AMDGPU::OpName::src0_modifiers) {
+      int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+      if (DstIdx == -1)
+        return;
+      const MCOperand &DstOp = Inst.getOperand(DstIdx);
+      if (DstOp.isReg() &&
+          MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg()))
+        if (AMDGPU::isHi16Reg(DstOp.getReg(), *MRI))
+          ModVal |= SISrcMods::DST_OP_SEL;
+    }
 
     Inst.getOperand(ModIdx).setImm(ModVal);
   }
diff --git a/llvm/test/MC/AMDGPU/vinterp.s b/llvm/test/MC/AMDGPU/vinterp.s
index f224086b1c056..cccd985ce033d 100644
--- a/llvm/test/MC/AMDGPU/vinterp.s
+++ b/llvm/test/MC/AMDGPU/vinterp.s
@@ -99,16 +99,16 @@ v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
 // GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7 ; encoding: [0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.h, v2, v3.l
-// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l wait_exp:0 ; encoding: [0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.l, v2, v3.h
-// GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h wait_exp:0 ; encoding: [0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0 ; encoding: [0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5
-// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5
-// GCN: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p2_f16_f32 v0.l, v1.l, v2, v3
 // GCN: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04]
@@ -135,19 +135,19 @@ v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
 // GCN: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.l, v1.h, v2, v3
-// GCN: v_interp_p2_f16_f32 v0.l, v1.h, v2, v3 wait_exp:0 ; encoding: [0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.l, v1.h, v2, v3 op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.l, v2, v3
-// GCN: v_interp_p2_f16_f32 v0.h, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.l, v2, v3 op_sel:[0,0,0,1] wait_exp:0 ; encoding: [0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5 ; encoding: [0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l
 // GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04]
@@ -174,16 +174,16 @@ v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
 // GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:7 ; encoding: [0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l wait_exp:0 ; encoding: [0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h wait_exp:0 ; encoding: [0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0 ; encoding: [0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5
-// GCN: v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3
 // GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04]
@@ -210,16 +210,16 @@ v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
 // GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3
-// GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3 wait_exp:0 ; encoding: [0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3 op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3 op_sel:[0,0,0,1] wait_exp:0 ; encoding: [0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5 ; encoding: [0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4]

@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

asm add opsel in true16 mode when hi16 is used


Full diff: https://github.com/llvm/llvm-project/pull/150315.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+20-5)
  • (modified) llvm/test/MC/AMDGPU/vinterp.s (+18-18)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 421fc429048ff..55d5e33d5bf9d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9196,6 +9196,7 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
 {
   OptionalImmIndexMap OptionalIdx;
   unsigned Opc = Inst.getOpcode();
+  const MCRegisterInfo *MRI = getMRI();
 
   unsigned I = 1;
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
@@ -9237,15 +9238,29 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
     if (OpIdx == -1)
       break;
+    const MCOperand &SrcOp = Inst.getOperand(OpIdx);
 
     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
 
-    if ((OpSel & (1 << J)) != 0)
-      ModVal |= SISrcMods::OP_SEL_0;
-    if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
-        (OpSel & (1 << 3)) != 0)
-      ModVal |= SISrcMods::DST_OP_SEL;
+    if (SrcOp.isReg() &&
+        MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(SrcOp.getReg())) {
+      if (AMDGPU::isHi16Reg(SrcOp.getReg(), *MRI))
+        ModVal |= SISrcMods::OP_SEL_0;
+    } else {
+      if ((OpSel & (1 << J)) != 0)
+        ModVal |= SISrcMods::OP_SEL_0;
+    }
+    if (ModOps[J] == AMDGPU::OpName::src0_modifiers) {
+      int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
+      if (DstIdx == -1)
+        return;
+      const MCOperand &DstOp = Inst.getOperand(DstIdx);
+      if (DstOp.isReg() &&
+          MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg()))
+        if (AMDGPU::isHi16Reg(DstOp.getReg(), *MRI))
+          ModVal |= SISrcMods::DST_OP_SEL;
+    }
 
     Inst.getOperand(ModIdx).setImm(ModVal);
   }
diff --git a/llvm/test/MC/AMDGPU/vinterp.s b/llvm/test/MC/AMDGPU/vinterp.s
index f224086b1c056..cccd985ce033d 100644
--- a/llvm/test/MC/AMDGPU/vinterp.s
+++ b/llvm/test/MC/AMDGPU/vinterp.s
@@ -99,16 +99,16 @@ v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
 // GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:7 ; encoding: [0x00,0x07,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.h, v2, v3.l
-// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l wait_exp:0 ; encoding: [0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.l, v2, v3.h
-// GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h wait_exp:0 ; encoding: [0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0 ; encoding: [0x00,0x20,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5
-// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5
-// GCN: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p10_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x02,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p2_f16_f32 v0.l, v1.l, v2, v3
 // GCN: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x03,0xcd,0x01,0x05,0x0e,0x04]
@@ -135,19 +135,19 @@ v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
 // GCN: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.l, v1.h, v2, v3
-// GCN: v_interp_p2_f16_f32 v0.l, v1.h, v2, v3 wait_exp:0 ; encoding: [0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.l, v1.h, v2, v3 op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.l, v2, v3
-// GCN: v_interp_p2_f16_f32 v0.h, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.l, v2, v3 op_sel:[0,0,0,1] wait_exp:0 ; encoding: [0x00,0x40,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5 ; encoding: [0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_f16_f32 v0.h, v1.h, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5
-// GCN: v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p2_f16_f32 v0.h, -v1.h, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x03,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l
 // GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:0 ; encoding: [0x00,0x00,0x04,0xcd,0x01,0x05,0x0e,0x04]
@@ -174,16 +174,16 @@ v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:7
 // GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.l wait_exp:7 ; encoding: [0x00,0x07,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l wait_exp:0 ; encoding: [0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.l op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h wait_exp:0 ; encoding: [0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.l, v2, v3.h op_sel:[0,0,1,0] wait_exp:0 ; encoding: [0x00,0x20,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5
-// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p10_rtz_f16_f32 v0, v1.h, v2, v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5
-// GCN: v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p10_rtz_f16_f32 v0, -v1.h, -v2, -v3.h clamp op_sel:[1,0,1,0] wait_exp:5 ; encoding: [0x00,0xad,0x04,0xcd,0x01,0x05,0x0e,0xe4]
 
 v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3
 // GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x05,0xcd,0x01,0x05,0x0e,0x04]
@@ -210,16 +210,16 @@ v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
 // GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3
-// GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3 wait_exp:0 ; encoding: [0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.l, v1.h, v2, v3 op_sel:[1,0,0,0] wait_exp:0 ; encoding: [0x00,0x08,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3 wait_exp:0 ; encoding: [0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.l, v2, v3 op_sel:[0,0,0,1] wait_exp:0 ; encoding: [0x00,0x40,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 wait_exp:5 ; encoding: [0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0x4d,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0x04]
 
 v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5
-// GCN: v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4]
+// GCN: v_interp_p2_rtz_f16_f32 v0.h, -v1.h, -v2, -v3 clamp op_sel:[1,0,0,1] wait_exp:5 ; encoding: [0x00,0xcd,0x05,0xcd,0x01,0x05,0x0e,0xe4]

@broxigarchen broxigarchen requested review from kosarev and jayfoad July 24, 2025 13:58
@broxigarchen
Copy link
Contributor Author

ping!

@broxigarchen
Copy link
Contributor Author

ping! Seems this patch is forgotten for a while

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU llvm:mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants