Skip to content

Conversation

maryammo
Copy link
Contributor

This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.

@maryammo maryammo self-assigned this Jul 16, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 16, 2025

@llvm/pr-subscribers-backend-powerpc

Author: Maryam Moghadas (maryammo)

Changes

This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.


Full diff: https://github.com/llvm/llvm-project/pull/149129.diff

7 Files Affected:

  • (modified) llvm/lib/Target/PowerPC/CMakeLists.txt (+1-1)
  • (modified) llvm/lib/Target/PowerPC/PPC.h (+2-2)
  • (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+43)
  • (modified) llvm/lib/Target/PowerPC/PPCTargetMachine.cpp (+2-2)
  • (renamed) llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp (+35-8)
  • (added) llvm/test/CodeGen/PowerPC/dmr-copy.ll (+245)
  • (modified) llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn (+1-1)
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..a5e1522753c8b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -50,7 +50,7 @@ add_llvm_target(PowerPCCodeGen
   PPCTargetTransformInfo.cpp
   PPCTOCRegDeps.cpp
   PPCTLSDynamicCall.cpp
-  PPCVSXCopy.cpp
+  PPCVSXWACCCopy.cpp
   PPCReduceCRLogicals.cpp
   PPCVSXFMAMutate.cpp
   PPCVSXSwapRemoval.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 124dac4584312..a8f0f215ebee5 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -39,7 +39,7 @@ class ModulePass;
   FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
   FunctionPass *createPPCTOCRegDepsPass();
   FunctionPass *createPPCEarlyReturnPass();
-  FunctionPass *createPPCVSXCopyPass();
+  FunctionPass *createPPCVSXWACCCopyPass();
   FunctionPass *createPPCVSXFMAMutatePass();
   FunctionPass *createPPCVSXSwapRemovalPass();
   FunctionPass *createPPCReduceCRLogicalsPass();
@@ -64,7 +64,7 @@ class ModulePass;
   void initializePPCLoopInstrFormPrepPass(PassRegistry&);
   void initializePPCTOCRegDepsPass(PassRegistry&);
   void initializePPCEarlyReturnPass(PassRegistry&);
-  void initializePPCVSXCopyPass(PassRegistry&);
+  void initializePPCVSXWACCCopyPass(PassRegistry&);
   void initializePPCVSXFMAMutatePass(PassRegistry&);
   void initializePPCVSXSwapRemovalPass(PassRegistry&);
   void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7c1550e99bae1..7cb7e05b55ca0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
         .addReg(SrcRegSub1)
         .addReg(SrcRegSub1, getKillRegState(KillSrc));
     return;
+  } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
+              PPC::WACC_HIRCRegClass.contains(DestReg)) &&
+             (PPC::WACCRCRegClass.contains(SrcReg) ||
+              PPC::WACC_HIRCRegClass.contains(SrcReg))) {
+
+    Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
+                                               : PPC::DMXXEXTFDMR512_HI;
+
+    RegScavenger RS;
+    RS.enterBasicBlockEnd(MBB);
+    RS.backward(std::next(I));
+
+    Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+                                                    /* RestoreAfter */ false, 0,
+                                                    /* AllowSpill */ false);
+
+    RS.setRegUsed(TmpReg1);
+    Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+                                                    /* RestoreAfter */ false, 0,
+                                                    /* AllowSpill */ false);
+
+    BuildMI(MBB, I, DL, get(Opc))
+        .addReg(TmpReg1, RegState::Define)
+        .addReg(TmpReg2, RegState::Define)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
+                                                : PPC::DMXXINSTDMR512_HI;
+
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
+        .addReg(TmpReg1, RegState::Kill)
+        .addReg(TmpReg2, RegState::Kill);
+
+    return;
+  } else if (PPC::DMRRCRegClass.contains(DestReg) &&
+             PPC::DMRRCRegClass.contains(SrcReg)) {
+
+    BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+
+    return;
+
   } else
     llvm_unreachable("Impossible reg-to-reg copy");
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b5c6ac111dff0..ae92d5eab20cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() {
   initializePPCLoopInstrFormPrepPass(PR);
   initializePPCTOCRegDepsPass(PR);
   initializePPCEarlyReturnPass(PR);
-  initializePPCVSXCopyPass(PR);
+  initializePPCVSXWACCCopyPass(PR);
   initializePPCVSXFMAMutatePass(PR);
   initializePPCVSXSwapRemovalPass(PR);
   initializePPCReduceCRLogicalsPass(PR);
@@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() {
     addPass(createPPCCTRLoopsVerify());
 #endif
 
-  addPass(createPPCVSXCopyPass());
+  addPass(createPPCVSXWACCCopyPass());
   return false;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
similarity index 76%
rename from llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
rename to llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
index 794095cd43769..044c945fc2049 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -1,4 +1,4 @@
-//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization ----------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
 //
 // A pass which deals with the complexity of generating legal VSX register
 // copies to/from register classes which partially overlap with the VSX
-// register file.
+// register file and combines the wacc/wacc_hi copies when needed.
 //
 //===----------------------------------------------------------------------===//
 
@@ -29,12 +29,12 @@ using namespace llvm;
 #define DEBUG_TYPE "ppc-vsx-copy"
 
 namespace {
-  // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+  // PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
   // (Altivec and scalar floating-point registers), we need to transform the
   // copies into subregister copies with other restrictions.
-  struct PPCVSXCopy : public MachineFunctionPass {
+  struct PPCVSXWACCCopy : public MachineFunctionPass {
     static char ID;
-    PPCVSXCopy() : MachineFunctionPass(ID) {}
+    PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
 
     const TargetInstrInfo *TII;
 
@@ -122,6 +122,33 @@ namespace {
           // Transform the original copy into a subregister extraction copy.
           SrcMO.setReg(NewVReg);
           SrcMO.setSubReg(PPC::sub_64);
+        } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+                   IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+	  // Matches the pattern:
+	  //   %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+	  //   %c:wacc_hirc = COPY %a:waccrc
+	  // And replaces it with:
+	  //   %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+          MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+          if (!DefMI || !DefMI->isCopy())
+            continue;
+
+          MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+          if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
+            continue;
+
+          if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+            continue;
+
+          // Rewrite the second copy to use the original register's subreg
+          SrcMO.setReg(OrigSrc.getReg());
+          SrcMO.setSubReg(PPC::sub_wacc_hi);
+          Changed = true;
+
+          // Remove the intermediate copy if safe
+          if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+            DefMI->eraseFromParent();
         }
       }
 
@@ -151,9 +178,9 @@ namespace {
   };
   } // end anonymous namespace
 
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE,
                 "PowerPC VSX Copy Legalization", false, false)
 
-char PPCVSXCopy::ID = 0;
+char PPCVSXWACCCopy::ID = 0;
 FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
new file mode 100644
index 0000000000000..d5a24309f94d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
+; CHECK-LABEL: test_wacc_copy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    std r31, -8(r1)
+; CHECK-NEXT:    std r30, -16(r1)
+; CHECK-NEXT:    mr r30, r1
+; CHECK-NEXT:    clrldi r0, r1, 57
+; CHECK-NEXT:    subfic r0, r0, -384
+; CHECK-NEXT:    stdux r1, r1, r0
+; CHECK-NEXT:    .cfi_def_cfa_register r30
+; CHECK-NEXT:    .cfi_offset r31, -8
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    mr r31, r1
+; CHECK-NEXT:    std r3, 360(r31)
+; CHECK-NEXT:    std r4, 352(r31)
+; CHECK-NEXT:    stxv v2, 336(r31)
+; CHECK-NEXT:    std r7, 328(r31)
+; CHECK-NEXT:    ld r3, 360(r31)
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 224(r31)
+; CHECK-NEXT:    stxvp vsp36, 192(r31)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 160(r31)
+; CHECK-NEXT:    stxvp vsp36, 128(r31)
+; CHECK-NEXT:    ld r3, 352(r31)
+; CHECK-NEXT:    lxv v2, 16(r3)
+; CHECK-NEXT:    lxv v3, 0(r3)
+; CHECK-NEXT:    stxv v2, 112(r31)
+; CHECK-NEXT:    stxv v3, 96(r31)
+; CHECK-NEXT:    lxv v2, 112(r31)
+; CHECK-NEXT:    lxv v3, 96(r31)
+; CHECK-NEXT:    lxv vs0, 336(r31)
+; CHECK-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 224(r31)
+; CHECK-NEXT:    stxvp vsp36, 192(r31)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 160(r31)
+; CHECK-NEXT:    stxvp vsp36, 128(r31)
+; CHECK-NEXT:    lxvp vsp34, 128(r31)
+; CHECK-NEXT:    lxvp vsp36, 160(r31)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 192(r31)
+; CHECK-NEXT:    lxvp vsp36, 224(r31)
+; CHECK-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT:    ld r3, 328(r31)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r3)
+; CHECK-NEXT:    stxvp vsp36, 64(r3)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r3)
+; CHECK-NEXT:    stxvp vsp36, 0(r3)
+; CHECK-NEXT:    mr r1, r30
+; CHECK-NEXT:    ld r31, -8(r1)
+; CHECK-NEXT:    ld r30, -16(r1)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: test_wacc_copy:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    std r31, -8(r1)
+; CHECK-BE-NEXT:    std r30, -16(r1)
+; CHECK-BE-NEXT:    mr r30, r1
+; CHECK-BE-NEXT:    clrldi r0, r1, 57
+; CHECK-BE-NEXT:    subfic r0, r0, -384
+; CHECK-BE-NEXT:    stdux r1, r1, r0
+; CHECK-BE-NEXT:    mr r31, r1
+; CHECK-BE-NEXT:    std r3, 360(r31)
+; CHECK-BE-NEXT:    std r4, 352(r31)
+; CHECK-BE-NEXT:    stxv v2, 336(r31)
+; CHECK-BE-NEXT:    std r5, 328(r31)
+; CHECK-BE-NEXT:    ld r3, 360(r31)
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT:    stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT:    stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT:    ld r3, 352(r31)
+; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    lxv v3, 16(r3)
+; CHECK-BE-NEXT:    stxv v3, 112(r31)
+; CHECK-BE-NEXT:    stxv v2, 96(r31)
+; CHECK-BE-NEXT:    lxv v2, 96(r31)
+; CHECK-BE-NEXT:    lxv v3, 112(r31)
+; CHECK-BE-NEXT:    lxv vs0, 336(r31)
+; CHECK-BE-NEXT:    dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT:    stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT:    stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT:    lxvp vsp34, 224(r31)
+; CHECK-BE-NEXT:    lxvp vsp36, 192(r31)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 160(r31)
+; CHECK-BE-NEXT:    lxvp vsp36, 128(r31)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    ld r3, 328(r31)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r3)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r3)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r3)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r3)
+; CHECK-BE-NEXT:    mr r1, r30
+; CHECK-BE-NEXT:    ld r31, -8(r1)
+; CHECK-BE-NEXT:    ld r30, -16(r1)
+; CHECK-BE-NEXT:    blr
+entry:
+  %vdmrp.addr = alloca ptr, align 8
+  %vpp.addr = alloca ptr, align 8
+  %vc.addr = alloca <16 x i8>, align 16
+  %resp.addr = alloca ptr, align 8
+  %vdmr = alloca <1024 x i1>, align 128
+  %vp = alloca <256 x i1>, align 32
+  store ptr %vdmrp, ptr %vdmrp.addr, align 8
+  store ptr %vpp, ptr %vpp.addr, align 8
+  store <16 x i8> %vc, ptr %vc.addr, align 16
+  store ptr %resp, ptr %resp.addr, align 8
+  %0 = load ptr, ptr %vdmrp.addr, align 8
+  %1 = load <1024 x i1>, ptr %0, align 128
+  store <1024 x i1> %1, ptr %vdmr, align 128
+  %2 = load ptr, ptr %vpp.addr, align 8
+  %3 = load <256 x i1>, ptr %2, align 32
+  store <256 x i1> %3, ptr %vp, align 32
+  %4 = load <256 x i1>, ptr %vp, align 32
+  %5 = load <16 x i8>, ptr %vc.addr, align 16
+  %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
+  store <1024 x i1> %6, ptr %vdmr, align 128
+  %7 = load <1024 x i1>, ptr %vdmr, align 128
+  %8 = load ptr, ptr %resp.addr, align 8
+  store <1024 x i1> %7, ptr %8, align 128
+  ret void
+}
+
+define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    dmsetdmrz dmr0
+; CHECK-NEXT:    lxvp vsp34, 0(r3)
+; CHECK-NEXT:    lxvp vsp36, 32(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r3)
+; CHECK-NEXT:    lxvp vsp36, 96(r3)
+; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT:    dmmr dmr2, dmr0
+; CHECK-NEXT:    dmxor dmr2, dmr1
+; CHECK-NEXT:    lxvp vsp34, 0(r4)
+; CHECK-NEXT:    lxvp vsp36, 32(r4)
+; CHECK-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT:    lxvp vsp34, 64(r4)
+; CHECK-NEXT:    lxvp vsp36, 96(r4)
+; CHECK-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT:    dmxor dmr0, dmr1
+; CHECK-NEXT:    dmmr dmr1, dmr2
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r5)
+; CHECK-NEXT:    stxvp vsp36, 64(r5)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r5)
+; CHECK-NEXT:    stxvp vsp36, 0(r5)
+; CHECK-NEXT:    dmmr dmr0, dmr0
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT:    stxvp vsp34, 96(r6)
+; CHECK-NEXT:    stxvp vsp36, 64(r6)
+; CHECK-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT:    stxvp vsp34, 32(r6)
+; CHECK-NEXT:    stxvp vsp36, 0(r6)
+; CHECK-NEXT:    blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    dmsetdmrz dmr0
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmmr dmr2, dmr0
+; CHECK-BE-NEXT:    dmxor dmr2, dmr1
+; CHECK-BE-NEXT:    lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT:    lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT:    lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT:    lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT:    dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT:    dmxor dmr0, dmr1
+; CHECK-BE-NEXT:    dmmr dmr1, dmr2
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT:    dmmr dmr0, dmr0
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT:    stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT:    stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT:    stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT:    blr
+entry:
+  %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+  %1 = load <1024 x i1>, ptr %p1, align 128
+  %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
+  %3 = load <1024 x i1>, ptr %p2, align 128
+  %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
+  %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
+  store <1024 x i1> %5, ptr %res1, align 128
+  %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
+  store <1024 x i1> %6, ptr %res2, align 128
+  ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }
+
+
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
index ea3615cee392a..8ab54156a8af2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
@@ -93,7 +93,7 @@ static_library("LLVMPowerPCCodeGen") {
     "PPCTargetMachine.cpp",
     "PPCTargetObjectFile.cpp",
     "PPCTargetTransformInfo.cpp",
-    "PPCVSXCopy.cpp",
+    "PPCVSXWACCCopy.cpp",
     "PPCVSXFMAMutate.cpp",
     "PPCVSXSwapRemoval.cpp",
   ]

@maryammo maryammo requested review from RolandF77, redstar and lei137 July 16, 2025 16:02
Copy link

github-actions bot commented Jul 16, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

// %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
// %c:wacc_hirc = COPY %a:waccrc
// And replaces it with:
// %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: indent off

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will run this through clang-format. Since it modifies this file significantly, I kept it for after the review is done.

Comment on lines +18 to +20
; CHECK-NEXT: .cfi_def_cfa_register r30
; CHECK-NEXT: .cfi_offset r31, -8
; CHECK-NEXT: .cfi_offset r30, -16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the attribute #0 needed to reproduce the test?
adding nounwind to the function will eliminate these

Copy link
Contributor Author

@maryammo maryammo Jul 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, #0 is needed to reproduce the wacc copy, and it already contains nounwind.

Copy link
Contributor

@lei137 lei137 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM
Please address the nit before committing. Thx

@@ -122,6 +122,33 @@ namespace {
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
SrcMO.setSubReg(PPC::sub_64);
} else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to add a comment about the type of copy this is meant to do similar to the blocks above.

Copy link
Contributor Author

@maryammo maryammo Aug 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a comment right after that describes what it does.

This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register
classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.
@maryammo maryammo merged commit 242d51a into llvm:main Aug 27, 2025
9 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Aug 27, 2025

LLVM Buildbot has detected a new failure on builder lldb-aarch64-windows running on linaro-armv8-windows-msvc-05 while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/141/builds/11136

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_logpoints.py (1192 of 2295)
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py (1193 of 2295)
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py (1194 of 2295)
UNSUPPORTED: lldb-api :: tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py (1195 of 2295)
PASS: lldb-api :: tools/lldb-dap/cancel/TestDAP_cancel.py (1196 of 2295)
PASS: lldb-api :: tools/lldb-dap/commands/TestDAP_commands.py (1197 of 2295)
PASS: lldb-api :: tools/lldb-dap/completions/TestDAP_completions.py (1198 of 2295)
PASS: lldb-api :: tools/lldb-dap/console/TestDAP_console.py (1199 of 2295)
PASS: lldb-api :: tools/lldb-dap/console/TestDAP_redirection_to_console.py (1200 of 2295)
UNRESOLVED: lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py (1201 of 2295)
******************** TEST 'lldb-api :: tools/lldb-dap/attach/TestDAP_attach.py' FAILED ********************
Script:
--
C:/Users/tcwg/scoop/apps/python/current/python.exe C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/llvm-project/lldb\test\API\dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./lib --env LLVM_INCLUDE_DIR=C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/include --env LLVM_TOOLS_DIR=C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./bin --arch aarch64 --build-dir C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/lldb-test-build.noindex --lldb-module-cache-dir C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/lldb-test-build.noindex/module-cache-lldb\lldb-api --clang-module-cache-dir C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/lldb-test-build.noindex/module-cache-clang\lldb-api --executable C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./bin/lldb.exe --compiler C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./bin/clang.exe --dsymutil C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./bin/dsymutil.exe --make C:/Users/tcwg/scoop/shims/make.exe --llvm-tools-dir C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./bin --lldb-obj-root C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/tools/lldb --lldb-libs-dir C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/./lib --cmake-build-type Release --skip-category=watchpoint C:\Users\tcwg\llvm-worker\lldb-aarch64-windows\llvm-project\lldb\test\API\tools\lldb-dap\attach -p TestDAP_attach.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 242d51afe59ec87c31c35eb07c34075866269bd8)
  clang revision 242d51afe59ec87c31c35eb07c34075866269bd8
  llvm revision 242d51afe59ec87c31c35eb07c34075866269bd8
Skipping the following test categories: ['watchpoint', 'libc++', 'libstdcxx', 'dwo', 'dsym', 'gmodules', 'debugserver', 'objc', 'fork', 'pexpect']


--
Command Output (stderr):
--
========= DEBUG ADAPTER PROTOCOL LOGS =========

1756319518.769679308 (stdio) --> {"command":"initialize","type":"request","arguments":{"adapterID":"lldb-native","clientID":"vscode","columnsStartAt1":true,"linesStartAt1":true,"locale":"en-us","pathFormat":"path","supportsRunInTerminalRequest":true,"supportsVariablePaging":true,"supportsVariableType":true,"supportsStartDebuggingRequest":true,"supportsProgressReporting":true,"$__lldb_sourceInitFile":false},"seq":1}

1756319518.769898415 (stdio) queued (command=initialize seq=1)

1756319518.782240391 (stdio) <-- {"body":{"$__lldb_version":"lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 242d51afe59ec87c31c35eb07c34075866269bd8)\n  clang revision 242d51afe59ec87c31c35eb07c34075866269bd8\n  llvm revision 242d51afe59ec87c31c35eb07c34075866269bd8","completionTriggerCharacters":["."," ","\t"],"exceptionBreakpointFilters":[{"description":"C++ Catch","filter":"cpp_catch","label":"C++ Catch","supportsCondition":true},{"description":"C++ Throw","filter":"cpp_throw","label":"C++ Throw","supportsCondition":true},{"description":"Objective-C Catch","filter":"objc_catch","label":"Objective-C Catch","supportsCondition":true},{"description":"Objective-C Throw","filter":"objc_throw","label":"Objective-C Throw","supportsCondition":true}],"supportTerminateDebuggee":true,"supportsBreakpointLocationsRequest":true,"supportsCancelRequest":true,"supportsCompletionsRequest":true,"supportsConditionalBreakpoints":true,"supportsConfigurationDoneRequest":true,"supportsDataBreakpoints":true,"supportsDelayedStackTraceLoading":true,"supportsDisassembleRequest":true,"supportsEvaluateForHovers":true,"supportsExceptionFilterOptions":true,"supportsExceptionInfoRequest":true,"supportsFunctionBreakpoints":true,"supportsHitConditionalBreakpoints":true,"supportsInstructionBreakpoints":true,"supportsLogPoints":true,"supportsModuleSymbolsRequest":true,"supportsModulesRequest":true,"supportsReadMemoryRequest":true,"supportsSetVariable":true,"supportsSteppingGranularity":true,"supportsValueFormattingOptions":true,"supportsWriteMemoryRequest":true},"command":"initialize","request_seq":1,"seq":0,"success":true,"type":"response"}

1756319518.782963514 (stdio) --> {"command":"attach","type":"request","arguments":{"program":"C:\\Users\\tcwg\\llvm-worker\\lldb-aarch64-windows\\build\\lldb-test-build.noindex\\tools\\lldb-dap\\attach\\TestDAP_attach.test_attach_command_process_failures\\d026f0e4-b077-41e3-9603-dce196a79047","initCommands":["settings clear --all","settings set symbols.enable-external-lookup false","settings set target.inherit-tcc true","settings set target.disable-aslr false","settings set target.detach-on-error false","settings set target.auto-apply-fixits false","settings set plugin.process.gdb-remote.packet-timeout 60","settings set symbols.clang-modules-cache-path \"C:/Users/tcwg/llvm-worker/lldb-aarch64-windows/build/lldb-test-build.noindex/module-cache-lldb\\lldb-api\"","settings set use-color false","settings set show-statusline false","settings set target.env-vars PATH="],"attachCommands":["script print(\"oops, forgot to attach to a process...\")"]},"seq":2}

1756319518.783033371 (stdio) queued (command=attach seq=2)

1756319518.783973694 (stdio) <-- {"body":{"category":"console","output":"Running initCommands:\n"},"event":"output","seq":0,"type":"event"}

1756319518.784025192 (stdio) <-- {"body":{"category":"console","output":"(lldb) settings clear --all\n"},"event":"output","seq":0,"type":"event"}

1756319518.784055233 (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set symbols.enable-external-lookup false\n"},"event":"output","seq":0,"type":"event"}

1756319518.784096718 (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.inherit-tcc true\n"},"event":"output","seq":0,"type":"event"}

1756319518.784127712 (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.disable-aslr false\n"},"event":"output","seq":0,"type":"event"}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants