Skip to content

Conversation

RolandF77
Copy link
Collaborator

Add support for PPC Dense Math builtins mma_build_dmr and mma_disassemble_dmr builtins.

Copy link

github-actions bot commented Aug 11, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RolandF77 RolandF77 marked this pull request as ready for review August 11, 2025 22:49
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:PowerPC clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:codegen IR generation bugs: mangling, exceptions, etc. llvm:ir labels Aug 11, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 11, 2025

@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-llvm-ir

Author: None (RolandF77)

Changes

Add support for PPC Dense Math builtins mma_build_dmr and mma_disassemble_dmr builtins.


Full diff: https://github.com/llvm/llvm-project/pull/153097.diff

8 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsPPC.def (+4)
  • (modified) clang/lib/CodeGen/TargetBuiltins/PPC.cpp (+6-1)
  • (modified) clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c (+27-9)
  • (modified) clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c (+4)
  • (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+10)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+41)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+2)
  • (modified) llvm/test/CodeGen/PowerPC/dmr-enable.ll (+63)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index e7d6741eb695e..7f4b12b0f4e15 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1098,6 +1098,10 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
                          "mma,isa-future-instructions")
 UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
                          "mma,isa-future-instructions")
+UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
+                         "mma,isa-future-instructions")
+UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
+                         "mma,isa-future-instructions")
 
 // MMA builtins with positive/negative multiply/accumulate.
 UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 270e9fc976f23..f3dd349651238 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1152,10 +1152,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       CallOps.push_back(Acc);
     }
     if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
-        BuiltinID == PPC::BI__builtin_mma_dmxor) {
+        BuiltinID == PPC::BI__builtin_mma_dmxor ||
+        BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
       Address Addr = EmitPointerWithAlignment(E->getArg(1));
       Ops[1] = Builder.CreateLoad(Addr);
     }
+    if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
+      dbgs() << "&&& No disassemble!!!\n";
+      return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign());
+    }
     for (unsigned i=1; i<Ops.size(); i++)
       CallOps.push_back(Ops[i]);
     llvm::Function *F = CGM.getIntrinsic(ID);
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
index 4aafc09602228..c66f5e2a32919 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
@@ -93,18 +93,36 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_dmf_basic
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
-// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128
-// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128
-// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128
-// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
-// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128
+// CHECK-LABEL: @test_dmf_basic(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128
+// CHECK-NEXT:    [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
+// CHECK-NEXT:    ret void
+//
 void test_dmf_basic(char *p, char *res1, char *res2) {
   __dmr1024 x[2];
   __builtin_mma_dmsetdmrz(&x[0]);
   __builtin_mma_dmmr((__dmr1024*)res1, &x[0]);
   __builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
 }
+
+// CHECK-LABEL: @test_dmf_basic2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128
+// CHECK-NEXT:    ret void
+//
+void test_dmf_basic2(char *p1, char *res1, char *res2,
+                     vector unsigned char *v) {
+  vector unsigned char vv = *v;
+  __builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv);
+  __builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1);
+}
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index 5a92d6e982511..ea2b99b0e5b20 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -16,6 +16,8 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
   __builtin_mma_dmsetdmrz(&vdmr);
   __builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
   __builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
+  __builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc);
+  __builtin_mma_disassemble_dmr(vdmrp, &vdmr);
 
 // CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
 // CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
@@ -26,4 +28,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
 // CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
 // CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
 // CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7dd9ff7f08b8b..edecde1f223a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1701,6 +1701,16 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
                              llvm_i32_ty], [IntrNoMem]>;
 
+  def int_ppc_mma_disassemble_dmr :
+      DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_v1024i1_ty],
+                            [IntrWriteMem, IntrArgMemOnly]>;
+
+  def int_ppc_mma_build_dmr :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                             llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
+                             llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                            [IntrNoMem]>;
+
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2698bd6f37c59..1580682c6310b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11292,6 +11292,25 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getMergeValues(RetOps, dl);
   }
 
+  case Intrinsic::ppc_mma_build_dmr: {
+    SmallVector<SDValue, 8> Pairs;
+    SmallVector<SDValue, 8> Chains;
+    for (int i = 1; i < 9; i += 2) {
+      SDValue Hi = Op.getOperand(i);
+      SDValue Lo = Op.getOperand(i + 1);
+      if (Hi->getOpcode() == ISD::LOAD)
+        Chains.push_back(Hi.getValue(1));
+      if (Lo->getOpcode() == ISD::LOAD)
+        Chains.push_back(Lo.getValue(1));
+      Pairs.push_back(
+          DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
+    }
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+    SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
+    SDValue RetOps[] = {Value, TF};
+    return DAG.getMergeValues(RetOps, dl);
+  }
+
   case Intrinsic::ppc_mma_dmxxextfdmr512: {
     assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
     auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -11628,6 +11647,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
             Op.getOperand(0)),
         0);
   }
+  case Intrinsic::ppc_mma_disassemble_dmr: {
+    return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
+                        Op.getOperand(ArgStart + 1), MachinePointerInfo());
+  }
   default:
     break;
   }
@@ -12117,6 +12140,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
   return DAG.getMergeValues({DmrPValue, TF}, dl);
 }
 
+SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
+                                         const SDLoc &dl,
+                                         SelectionDAG &DAG) const {
+  SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
+                                Pairs[1]),
+             0);
+  SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+  SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
+                                Pairs[2], Pairs[3]),
+             0);
+  SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+  SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+  const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+
+  return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops),
+                 0);
+}
+
 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDLoc dl(Op);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9755f0e272d16..644b0c5b3941e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1361,6 +1361,8 @@ namespace llvm {
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
+    SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
+                          const SDLoc &dl, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
                             CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index 1e3014405ac4e..a505ac4c2434a 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -367,6 +367,69 @@ entry:
   ret void
 }
 
+define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) {
+; CHECK-LABEL: tbuild:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv v3, 0(r7)
+; CHECK-NEXT:    vmr v2, v3
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r6)
+; CHECK-NEXT:    stxvp vsp36, 64(r6)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r6)
+; CHECK-NEXT:    stxvp vsp36, 0(r6)
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r5)
+; CHECK-NEXT:    stxvp vsp36, 64(r5)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r5)
+; CHECK-NEXT:    stxvp vsp36, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tbuild:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v3, 0(r7)
+; CHECK-BE-NEXT:    vmr v2, v3
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, ptr %v, align 16
+  %1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0)
+  store <1024 x i1> %1, ptr %res2, align 128
+  %2 = load <1024 x i1>, ptr %p1, align 128
+  tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2)
+  ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>)
 declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
 declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
 declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)

@llvmbot
Copy link
Member

llvmbot commented Aug 11, 2025

@llvm/pr-subscribers-clang

Author: None (RolandF77)

Changes

Add support for PPC Dense Math builtins mma_build_dmr and mma_disassemble_dmr builtins.


Full diff: https://github.com/llvm/llvm-project/pull/153097.diff

8 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsPPC.def (+4)
  • (modified) clang/lib/CodeGen/TargetBuiltins/PPC.cpp (+6-1)
  • (modified) clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c (+27-9)
  • (modified) clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c (+4)
  • (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+10)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+41)
  • (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+2)
  • (modified) llvm/test/CodeGen/PowerPC/dmr-enable.ll (+63)
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index e7d6741eb695e..7f4b12b0f4e15 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1098,6 +1098,10 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmmr, "vW1024*W1024*", false,
                          "mma,isa-future-instructions")
 UNALIASED_CUSTOM_BUILTIN(mma_dmxor, "vW1024*W1024*", true,
                          "mma,isa-future-instructions")
+UNALIASED_CUSTOM_BUILTIN(mma_disassemble_dmr, "vv*W1024*", false,
+                         "mma,isa-future-instructions")
+UNALIASED_CUSTOM_BUILTIN(mma_build_dmr, "vW1024*VVVVVVVV", false,
+                         "mma,isa-future-instructions")
 
 // MMA builtins with positive/negative multiply/accumulate.
 UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
diff --git a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
index 270e9fc976f23..f3dd349651238 100644
--- a/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/PPC.cpp
@@ -1152,10 +1152,15 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
       CallOps.push_back(Acc);
     }
     if (BuiltinID == PPC::BI__builtin_mma_dmmr ||
-        BuiltinID == PPC::BI__builtin_mma_dmxor) {
+        BuiltinID == PPC::BI__builtin_mma_dmxor ||
+        BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
       Address Addr = EmitPointerWithAlignment(E->getArg(1));
       Ops[1] = Builder.CreateLoad(Addr);
     }
+    if (BuiltinID == PPC::BI__builtin_mma_disassemble_dmr) {
+      dbgs() << "&&& No disassemble!!!\n";
+      return Builder.CreateAlignedStore(Ops[1], Ops[0], MaybeAlign());
+    }
     for (unsigned i=1; i<Ops.size(); i++)
       CallOps.push_back(Ops[i]);
     llvm::Function *F = CGM.getIntrinsic(ID);
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
index 4aafc09602228..c66f5e2a32919 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-dmf.c
@@ -93,18 +93,36 @@ void test_pmdmxvi8gerx4spp(unsigned char *vdmrp, unsigned char *vpp, vector unsi
   *((__dmr1024 *)resp) = vdmr;
 }
 
-// CHECK-LABEL: @test_dmf_basic
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
-// CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr %res1, align 128
-// CHECK-NEXT: [[TMP2:%.*]] = load <1024 x i1>, ptr %res2, align 128
-// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr %p, align 128
-// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
-// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr %res2, align 128
+// CHECK-LABEL: @test_dmf_basic(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> [[TMP0]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES1:%.*]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[RES2:%.*]], align 128
+// CHECK-NEXT:    [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
+// CHECK-NEXT:    ret void
+//
 void test_dmf_basic(char *p, char *res1, char *res2) {
   __dmr1024 x[2];
   __builtin_mma_dmsetdmrz(&x[0]);
   __builtin_mma_dmmr((__dmr1024*)res1, &x[0]);
   __builtin_mma_dmxor((__dmr1024*)res2, (__dmr1024*)p);
 }
+
+// CHECK-LABEL: @test_dmf_basic2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[V:%.*]], align 16, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]], <16 x i8> [[TMP0]])
+// CHECK-NEXT:    store <1024 x i1> [[TMP1]], ptr [[RES2:%.*]], align 128
+// CHECK-NEXT:    [[TMP2:%.*]] = load <1024 x i1>, ptr [[P1:%.*]], align 128
+// CHECK-NEXT:    store <1024 x i1> [[TMP2]], ptr [[RES1:%.*]], align 128
+// CHECK-NEXT:    ret void
+//
+void test_dmf_basic2(char *p1, char *res1, char *res2,
+                     vector unsigned char *v) {
+  vector unsigned char vv = *v;
+  __builtin_mma_build_dmr((__dmr1024*)res2, vv, vv, vv, vv, vv, vv, vv, vv);
+  __builtin_mma_disassemble_dmr(res1, (__dmr1024*)p1);
+}
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index 5a92d6e982511..ea2b99b0e5b20 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -16,6 +16,8 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
   __builtin_mma_dmsetdmrz(&vdmr);
   __builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
   __builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
+  __builtin_mma_build_dmr(&vdmr, vc, vc, vc, vc, vc, vc, vc, vc);
+  __builtin_mma_disassemble_dmr(vdmrp, &vdmr);
 
 // CHECK: error: '__builtin_mma_dmxvi8gerx4' needs target feature mma,paired-vector-memops
 // CHECK: error: '__builtin_mma_pmdmxvi8gerx4' needs target feature mma,paired-vector-memops
@@ -26,4 +28,6 @@ void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc)
 // CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
 // CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
 // CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_build_dmr' needs target feature mma,isa-future-instructions
+// CHECK: error: '__builtin_mma_disassemble_dmr' needs target feature mma,isa-future-instructions
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 7dd9ff7f08b8b..edecde1f223a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1701,6 +1701,16 @@ let TargetPrefix = "ppc" in {
       DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
                              llvm_i32_ty], [IntrNoMem]>;
 
+  def int_ppc_mma_disassemble_dmr :
+      DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_v1024i1_ty],
+                            [IntrWriteMem, IntrArgMemOnly]>;
+
+  def int_ppc_mma_build_dmr :
+      DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                             llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
+                             llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                            [IntrNoMem]>;
+
   // MMA Reduced-Precision: Outer Product Intrinsic Definitions.
   defm int_ppc_mma_xvi4ger8 :
         PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 2698bd6f37c59..1580682c6310b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11292,6 +11292,25 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getMergeValues(RetOps, dl);
   }
 
+  case Intrinsic::ppc_mma_build_dmr: {
+    SmallVector<SDValue, 8> Pairs;
+    SmallVector<SDValue, 8> Chains;
+    for (int i = 1; i < 9; i += 2) {
+      SDValue Hi = Op.getOperand(i);
+      SDValue Lo = Op.getOperand(i + 1);
+      if (Hi->getOpcode() == ISD::LOAD)
+        Chains.push_back(Hi.getValue(1));
+      if (Lo->getOpcode() == ISD::LOAD)
+        Chains.push_back(Lo.getValue(1));
+      Pairs.push_back(
+          DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
+    }
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+    SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
+    SDValue RetOps[] = {Value, TF};
+    return DAG.getMergeValues(RetOps, dl);
+  }
+
   case Intrinsic::ppc_mma_dmxxextfdmr512: {
     assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
     auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -11628,6 +11647,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
             Op.getOperand(0)),
         0);
   }
+  case Intrinsic::ppc_mma_disassemble_dmr: {
+    return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
+                        Op.getOperand(ArgStart + 1), MachinePointerInfo());
+  }
   default:
     break;
   }
@@ -12117,6 +12140,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
   return DAG.getMergeValues({DmrPValue, TF}, dl);
 }
 
+SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
+                                         const SDLoc &dl,
+                                         SelectionDAG &DAG) const {
+  SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
+                                Pairs[1]),
+             0);
+  SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
+  SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
+                                Pairs[2], Pairs[3]),
+             0);
+  SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
+  SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
+  const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
+
+  return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops),
+                 0);
+}
+
 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDLoc dl(Op);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9755f0e272d16..644b0c5b3941e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1361,6 +1361,8 @@ namespace llvm {
     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
+    SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
+                          const SDLoc &dl, SelectionDAG &DAG) const;
 
     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
                             CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/PowerPC/dmr-enable.ll b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
index 1e3014405ac4e..a505ac4c2434a 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-enable.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-enable.ll
@@ -367,6 +367,69 @@ entry:
   ret void
 }
 
+define void @tbuild(ptr %p1, ptr %p2, ptr %res1, ptr %res2, ptr %v) {
+; CHECK-LABEL: tbuild:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lxv v3, 0(r7)
+; CHECK-NEXT:    vmr v2, v3
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r6)
+; CHECK-NEXT:    stxvp vsp36, 64(r6)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r6)
+; CHECK-NEXT:    stxvp vsp36, 0(r6)
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r5)
+; CHECK-NEXT:    stxvp vsp36, 64(r5)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r5)
+; CHECK-NEXT:    stxvp vsp36, 0(r5)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: tbuild:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lxv v3, 0(r7)
+; CHECK-BE-NEXT:    vmr v2, v3
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp34, vsp34, 1
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp34, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, ptr %v, align 16
+  %1 = tail call <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0, <16 x i8> %0)
+  store <1024 x i1> %1, ptr %res2, align 128
+  %2 = load <1024 x i1>, ptr %p1, align 128
+  tail call void @llvm.ppc.mma.disassemble.dmr(ptr %res1, <1024 x i1> %2)
+  ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.build.dmr(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+declare void @llvm.ppc.mma.disassemble.dmr(ptr, <1024 x i1>)
 declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
 declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
 declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)

@RolandF77 RolandF77 requested review from lei137 and maryammo August 11, 2025 22:55
Copy link
Contributor

@maryammo maryammo left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

// CHECK-NEXT: [[TMP3:%.*]] = load <1024 x i1>, ptr [[P:%.*]], align 128
// CHECK-NEXT: [[TMP4:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> [[TMP2]], <1024 x i1> [[TMP3]])
// CHECK-NEXT: store <1024 x i1> [[TMP4]], ptr [[RES2]], align 128
// CHECK-NEXT: ret void
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems irrelevant to this pr.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is but it was done by the tool to update test checks, not by me, so I'm not sure there's a way around it. Note that the test itself has not been changed.

SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
SDValue RetOps[] = {Value, TF};
return DAG.getMergeValues(RetOps, dl);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: perhaps we could remove the tmp variables when possible
return DAG.getMergeValues({Value, TF}, dl);

SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};

return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit : same here

@RolandF77 RolandF77 merged commit d1cbe6e into llvm:main Aug 25, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 25, 2025

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/21070

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/X86/sse2-intrinsics-fast-isel.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
/buildbot/worker/arc-folder/build/bin/llc < /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE # RUN: at line 2
+ /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
+ /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
LLVM ERROR: Cannot select: intrinsic %llvm.x86.sse2.clflush
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
1.	Running pass 'Function Pass Manager' on module '<stdin>'.
2.	Running pass 'X86 DAG->DAG Instruction Selection' on function '@test_mm_clflush'
 #0 0x00000000023387b8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/worker/arc-folder/build/bin/llc+0x23387b8)
 #1 0x00000000023356c5 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #2 0x00007fb07f97e630 __restore_rt sigaction.c:0:0
 #3 0x00007fb07e6ce3d7 raise (/usr/lib64/libc.so.6+0x363d7)
 #4 0x00007fb07e6cfac8 abort (/usr/lib64/libc.so.6+0x37ac8)
 #5 0x0000000000720bed llvm::json::operator==(llvm::json::Value const&, llvm::json::Value const&) (.cold) JSON.cpp:0:0
 #6 0x00000000020c3639 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/buildbot/worker/arc-folder/build/bin/llc+0x20c3639)
 #7 0x00000000020c816a llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/buildbot/worker/arc-folder/build/bin/llc+0x20c816a)
 #8 0x0000000000965b87 (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0
 #9 0x00000000020bee9f llvm::SelectionDAGISel::DoInstructionSelection() (/buildbot/worker/arc-folder/build/bin/llc+0x20bee9f)
#10 0x00000000020cebf0 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/buildbot/worker/arc-folder/build/bin/llc+0x20cebf0)
#11 0x00000000020d289a llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/buildbot/worker/arc-folder/build/bin/llc+0x20d289a)
#12 0x00000000020d34f5 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x20d34f5)
#13 0x00000000020be6af llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x20be6af)
#14 0x0000000001213837 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
#15 0x00000000018695c2 llvm::FPPassManager::runOnFunction(llvm::Function&) (/buildbot/worker/arc-folder/build/bin/llc+0x18695c2)
#16 0x0000000001869961 llvm::FPPassManager::runOnModule(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x1869961)
#17 0x000000000186a577 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x186a577)
#18 0x0000000000803262 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
#19 0x0000000000729056 main (/buildbot/worker/arc-folder/build/bin/llc+0x729056)
#20 0x00007fb07e6ba555 __libc_start_main (/usr/lib64/libc.so.6+0x22555)
#21 0x00000000007f9486 _start (/buildbot/worker/arc-folder/build/bin/llc+0x7f9486)
/buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll:399:14: error: SSE-LABEL: expected string not found in input
; SSE-LABEL: test_mm_bsrli_si128:
             ^
<stdin>:170:21: note: scanning from here
test_mm_bslli_si128: # @test_mm_bslli_si128
                    ^
<stdin>:178:9: note: possible intended match here
 .globl test_mm_bsrli_si128 # 
        ^

Input file: <stdin>
Check file: /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll

-dump-input=help explains the following input dump.
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:PowerPC clang:codegen IR generation bugs: mangling, exceptions, etc. clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category llvm:ir
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants