-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[PowerPC] Add DMR and WACC COPY support #149129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-powerpc Author: Maryam Moghadas (maryammo) ChangesThis patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns. Full diff: https://github.com/llvm/llvm-project/pull/149129.diff 7 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..a5e1522753c8b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -50,7 +50,7 @@ add_llvm_target(PowerPCCodeGen
PPCTargetTransformInfo.cpp
PPCTOCRegDeps.cpp
PPCTLSDynamicCall.cpp
- PPCVSXCopy.cpp
+ PPCVSXWACCCopy.cpp
PPCReduceCRLogicals.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 124dac4584312..a8f0f215ebee5 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -39,7 +39,7 @@ class ModulePass;
FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM);
FunctionPass *createPPCTOCRegDepsPass();
FunctionPass *createPPCEarlyReturnPass();
- FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXWACCCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCReduceCRLogicalsPass();
@@ -64,7 +64,7 @@ class ModulePass;
void initializePPCLoopInstrFormPrepPass(PassRegistry&);
void initializePPCTOCRegDepsPass(PassRegistry&);
void initializePPCEarlyReturnPass(PassRegistry&);
- void initializePPCVSXCopyPass(PassRegistry&);
+ void initializePPCVSXWACCCopyPass(PassRegistry&);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCVSXSwapRemovalPass(PassRegistry&);
void initializePPCReduceCRLogicalsPass(PassRegistry&);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7c1550e99bae1..7cb7e05b55ca0 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcRegSub1)
.addReg(SrcRegSub1, getKillRegState(KillSrc));
return;
+ } else if ((PPC::WACCRCRegClass.contains(DestReg) ||
+ PPC::WACC_HIRCRegClass.contains(DestReg)) &&
+ (PPC::WACCRCRegClass.contains(SrcReg) ||
+ PPC::WACC_HIRCRegClass.contains(SrcReg))) {
+
+ Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512
+ : PPC::DMXXEXTFDMR512_HI;
+
+ RegScavenger RS;
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(std::next(I));
+
+ Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ RS.setRegUsed(TmpReg1);
+ Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
+
+ BuildMI(MBB, I, DL, get(Opc))
+ .addReg(TmpReg1, RegState::Define)
+ .addReg(TmpReg2, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512
+ : PPC::DMXXINSTDMR512_HI;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(TmpReg1, RegState::Kill)
+ .addReg(TmpReg2, RegState::Kill);
+
+ return;
+ } else if (PPC::DMRRCRegClass.contains(DestReg) &&
+ PPC::DMRRCRegClass.contains(SrcReg)) {
+
+ BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+
+ return;
+
} else
llvm_unreachable("Impossible reg-to-reg copy");
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index b5c6ac111dff0..ae92d5eab20cd 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() {
initializePPCLoopInstrFormPrepPass(PR);
initializePPCTOCRegDepsPass(PR);
initializePPCEarlyReturnPass(PR);
- initializePPCVSXCopyPass(PR);
+ initializePPCVSXWACCCopyPass(PR);
initializePPCVSXFMAMutatePass(PR);
initializePPCVSXSwapRemovalPass(PR);
initializePPCReduceCRLogicalsPass(PR);
@@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
- addPass(createPPCVSXCopyPass());
+ addPass(createPPCVSXWACCCopyPass());
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
similarity index 76%
rename from llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
rename to llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
index 794095cd43769..044c945fc2049 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp
@@ -1,4 +1,4 @@
-//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===//
+//===-------------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
//
// A pass which deals with the complexity of generating legal VSX register
// copies to/from register classes which partially overlap with the VSX
-// register file.
+// register file and combines the wacc/wacc_hi copies when needed.
//
//===----------------------------------------------------------------------===//
@@ -29,12 +29,12 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-vsx-copy"
namespace {
- // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers
// (Altivec and scalar floating-point registers), we need to transform the
// copies into subregister copies with other restrictions.
- struct PPCVSXCopy : public MachineFunctionPass {
+ struct PPCVSXWACCCopy : public MachineFunctionPass {
static char ID;
- PPCVSXCopy() : MachineFunctionPass(ID) {}
+ PPCVSXWACCCopy() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
@@ -122,6 +122,33 @@ namespace {
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
SrcMO.setSubReg(PPC::sub_64);
+ } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) &&
+ IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) {
+ // Matches the pattern:
+ // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc
+ // %c:wacc_hirc = COPY %a:waccrc
+ // And replaces it with:
+ // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc
+ MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg());
+ if (!DefMI || !DefMI->isCopy())
+ continue;
+
+ MachineOperand &OrigSrc = DefMI->getOperand(1);
+
+ if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI))
+ continue;
+
+ if (OrigSrc.getSubReg() != PPC::sub_wacc_hi)
+ continue;
+
+ // Rewrite the second copy to use the original register's subreg
+ SrcMO.setReg(OrigSrc.getReg());
+ SrcMO.setSubReg(PPC::sub_wacc_hi);
+ Changed = true;
+
+ // Remove the intermediate copy if safe
+ if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg()))
+ DefMI->eraseFromParent();
}
}
@@ -151,9 +178,9 @@ namespace {
};
} // end anonymous namespace
-INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE,
"PowerPC VSX Copy Legalization", false, false)
-char PPCVSXCopy::ID = 0;
+char PPCVSXWACCCopy::ID = 0;
FunctionPass*
-llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); }
diff --git a/llvm/test/CodeGen/PowerPC/dmr-copy.ll b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
new file mode 100644
index 0000000000000..d5a24309f94d5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/dmr-copy.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_wacc_copy(ptr noundef %vdmrp, ptr noundef %vpp, <16 x i8> noundef %vc, ptr noundef %resp) #0 {
+; CHECK-LABEL: test_wacc_copy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: std r31, -8(r1)
+; CHECK-NEXT: std r30, -16(r1)
+; CHECK-NEXT: mr r30, r1
+; CHECK-NEXT: clrldi r0, r1, 57
+; CHECK-NEXT: subfic r0, r0, -384
+; CHECK-NEXT: stdux r1, r1, r0
+; CHECK-NEXT: .cfi_def_cfa_register r30
+; CHECK-NEXT: .cfi_offset r31, -8
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: mr r31, r1
+; CHECK-NEXT: std r3, 360(r31)
+; CHECK-NEXT: std r4, 352(r31)
+; CHECK-NEXT: stxv v2, 336(r31)
+; CHECK-NEXT: std r7, 328(r31)
+; CHECK-NEXT: ld r3, 360(r31)
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: ld r3, 352(r31)
+; CHECK-NEXT: lxv v2, 16(r3)
+; CHECK-NEXT: lxv v3, 0(r3)
+; CHECK-NEXT: stxv v2, 112(r31)
+; CHECK-NEXT: stxv v3, 96(r31)
+; CHECK-NEXT: lxv v2, 112(r31)
+; CHECK-NEXT: lxv v3, 96(r31)
+; CHECK-NEXT: lxv vs0, 336(r31)
+; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 224(r31)
+; CHECK-NEXT: stxvp vsp36, 192(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 160(r31)
+; CHECK-NEXT: stxvp vsp36, 128(r31)
+; CHECK-NEXT: lxvp vsp34, 128(r31)
+; CHECK-NEXT: lxvp vsp36, 160(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 192(r31)
+; CHECK-NEXT: lxvp vsp36, 224(r31)
+; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-NEXT: ld r3, 328(r31)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r3)
+; CHECK-NEXT: stxvp vsp36, 64(r3)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r3)
+; CHECK-NEXT: stxvp vsp36, 0(r3)
+; CHECK-NEXT: mr r1, r30
+; CHECK-NEXT: ld r31, -8(r1)
+; CHECK-NEXT: ld r30, -16(r1)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: test_wacc_copy:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: std r31, -8(r1)
+; CHECK-BE-NEXT: std r30, -16(r1)
+; CHECK-BE-NEXT: mr r30, r1
+; CHECK-BE-NEXT: clrldi r0, r1, 57
+; CHECK-BE-NEXT: subfic r0, r0, -384
+; CHECK-BE-NEXT: stdux r1, r1, r0
+; CHECK-BE-NEXT: mr r31, r1
+; CHECK-BE-NEXT: std r3, 360(r31)
+; CHECK-BE-NEXT: std r4, 352(r31)
+; CHECK-BE-NEXT: stxv v2, 336(r31)
+; CHECK-BE-NEXT: std r5, 328(r31)
+; CHECK-BE-NEXT: ld r3, 360(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: ld r3, 352(r31)
+; CHECK-BE-NEXT: lxv v2, 0(r3)
+; CHECK-BE-NEXT: lxv v3, 16(r3)
+; CHECK-BE-NEXT: stxv v3, 112(r31)
+; CHECK-BE-NEXT: stxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v2, 96(r31)
+; CHECK-BE-NEXT: lxv v3, 112(r31)
+; CHECK-BE-NEXT: lxv vs0, 336(r31)
+; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 224(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 192(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 160(r31)
+; CHECK-BE-NEXT: stxvp vsp34, 128(r31)
+; CHECK-BE-NEXT: lxvp vsp34, 224(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 192(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 160(r31)
+; CHECK-BE-NEXT: lxvp vsp36, 128(r31)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
+; CHECK-BE-NEXT: ld r3, 328(r31)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r3)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r3)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r3)
+; CHECK-BE-NEXT: mr r1, r30
+; CHECK-BE-NEXT: ld r31, -8(r1)
+; CHECK-BE-NEXT: ld r30, -16(r1)
+; CHECK-BE-NEXT: blr
+entry:
+ %vdmrp.addr = alloca ptr, align 8
+ %vpp.addr = alloca ptr, align 8
+ %vc.addr = alloca <16 x i8>, align 16
+ %resp.addr = alloca ptr, align 8
+ %vdmr = alloca <1024 x i1>, align 128
+ %vp = alloca <256 x i1>, align 32
+ store ptr %vdmrp, ptr %vdmrp.addr, align 8
+ store ptr %vpp, ptr %vpp.addr, align 8
+ store <16 x i8> %vc, ptr %vc.addr, align 16
+ store ptr %resp, ptr %resp.addr, align 8
+ %0 = load ptr, ptr %vdmrp.addr, align 8
+ %1 = load <1024 x i1>, ptr %0, align 128
+ store <1024 x i1> %1, ptr %vdmr, align 128
+ %2 = load ptr, ptr %vpp.addr, align 8
+ %3 = load <256 x i1>, ptr %2, align 32
+ store <256 x i1> %3, ptr %vp, align 32
+ %4 = load <256 x i1>, ptr %vp, align 32
+ %5 = load <16 x i8>, ptr %vc.addr, align 16
+ %6 = call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %4, <16 x i8> %5)
+ store <1024 x i1> %6, ptr %vdmr, align 128
+ %7 = load <1024 x i1>, ptr %vdmr, align 128
+ %8 = load ptr, ptr %resp.addr, align 8
+ store <1024 x i1> %7, ptr %8, align 128
+ ret void
+}
+
+define void @foo(ptr noundef readonly captures(none) %p1, ptr noundef readonly captures(none) %p2, ptr noundef writeonly captures(none) initializes((0, 128)) %res1, ptr noundef writeonly captures(none) initializes((0, 128)) %res2) local_unnamed_addr #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: dmsetdmrz dmr0
+; CHECK-NEXT: lxvp vsp34, 0(r3)
+; CHECK-NEXT: lxvp vsp36, 32(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r3)
+; CHECK-NEXT: lxvp vsp36, 96(r3)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmmr dmr2, dmr0
+; CHECK-NEXT: dmxor dmr2, dmr1
+; CHECK-NEXT: lxvp vsp34, 0(r4)
+; CHECK-NEXT: lxvp vsp36, 32(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-NEXT: lxvp vsp34, 64(r4)
+; CHECK-NEXT: lxvp vsp36, 96(r4)
+; CHECK-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-NEXT: dmxor dmr0, dmr1
+; CHECK-NEXT: dmmr dmr1, dmr2
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-NEXT: stxvp vsp34, 96(r5)
+; CHECK-NEXT: stxvp vsp36, 64(r5)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-NEXT: stxvp vsp34, 32(r5)
+; CHECK-NEXT: stxvp vsp36, 0(r5)
+; CHECK-NEXT: dmmr dmr0, dmr0
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-NEXT: stxvp vsp34, 96(r6)
+; CHECK-NEXT: stxvp vsp36, 64(r6)
+; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-NEXT: stxvp vsp34, 32(r6)
+; CHECK-NEXT: stxvp vsp36, 0(r6)
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: foo:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: dmsetdmrz dmr0
+; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmmr dmr2, dmr0
+; CHECK-BE-NEXT: dmxor dmr2, dmr1
+; CHECK-BE-NEXT: lxvp vsp34, 96(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 64(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi1, vsp36, vsp34, 1
+; CHECK-BE-NEXT: lxvp vsp34, 32(r4)
+; CHECK-BE-NEXT: lxvp vsp36, 0(r4)
+; CHECK-BE-NEXT: dmxxinstdmr512 wacc1, vsp36, vsp34, 0
+; CHECK-BE-NEXT: dmxor dmr0, dmr1
+; CHECK-BE-NEXT: dmmr dmr1, dmr2
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi1, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
+; CHECK-BE-NEXT: dmmr dmr0, dmr0
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
+; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
+; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
+; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
+; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
+; CHECK-BE-NEXT: blr
+entry:
+ %0 = tail call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+ %1 = load <1024 x i1>, ptr %p1, align 128
+ %2 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %1)
+ %3 = load <1024 x i1>, ptr %p2, align 128
+ %4 = tail call <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1> %0, <1024 x i1> %3)
+ %5 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %2)
+ store <1024 x i1> %5, ptr %res1, align 128
+ %6 = tail call <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1> %4)
+ store <1024 x i1> %6, ptr %res2, align 128
+ ret void
+}
+
+declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
+declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
+declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="future" "target-features"="+64bit,+allow-unaligned-fp-access,+altivec,+bpermd,+cmpb,+crbits,+crypto,+direct-move,+extdiv,+fast-MFLR,+fcpsgn,+fpcvt,+fprnd,+fpu,+fre,+fres,+frsqrte,+frsqrtes,+fsqrt,+fuse-add-logical,+fuse-arith-add,+fuse-logical,+fuse-logical-add,+fuse-sha3,+fuse-store,+fusion,+hard-float,+icbt,+isa-future-instructions,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+isel,+ldbrx,+lfiwax,+mfocrf,+mma,+paired-vector-memops,+partword-atomics,+pcrelative-memops,+popcntd,+power10-vector,+power8-altivec,+power8-vector,+power9-altivec,+power9-vector,+ppc-postra-sched,+ppc-prera-sched,+predictable-select-expensive,+prefix-instrs,+quadword-atomics,+recipprec,+stfiwx,+two-const-nr,+vsx" }
+
+
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
index ea3615cee392a..8ab54156a8af2 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn
@@ -93,7 +93,7 @@ static_library("LLVMPowerPCCodeGen") {
"PPCTargetMachine.cpp",
"PPCTargetObjectFile.cpp",
"PPCTargetTransformInfo.cpp",
- "PPCVSXCopy.cpp",
+ "PPCVSXWACCCopy.cpp",
"PPCVSXFMAMutate.cpp",
"PPCVSXSwapRemoval.cpp",
]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
// %a:waccrc = COPY %b.sub_wacc_hi:dmrrc | ||
// %c:wacc_hirc = COPY %a:waccrc | ||
// And replaces it with: | ||
// %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: indent off
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will run this through clang-format. Since it modifies this file significantly, I kept it for after the review is done.
; CHECK-NEXT: .cfi_def_cfa_register r30 | ||
; CHECK-NEXT: .cfi_offset r31, -8 | ||
; CHECK-NEXT: .cfi_offset r30, -16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the attribute #0
needed to reproduce the test?
adding nounwind
to the function will eliminate these
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, #0 is needed to reproduce the wacc copy, and it already contains nounwind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Please address the nit before committing. Thx
@@ -122,6 +122,33 @@ namespace { | |||
// Transform the original copy into a subregister extraction copy. | |||
SrcMO.setReg(NewVReg); | |||
SrcMO.setSubReg(PPC::sub_64); | |||
} else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) && | |||
IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good to add a comment about the type of copy this is meant to do similar to the blocks above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a comment right after that describes what it does.
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/141/builds/11136 Here is the relevant piece of the build log for the reference
|
This patch updates PPCInstrInfo::copyPhysReg to support DMR and WACC register classes and extends the PPCVSXCopy pass to handle specific WACC copy patterns.