@@ -173,6 +173,7 @@ struct FoldCandidate {
173
173
174
174
class SIFoldOperandsImpl {
175
175
public:
176
+ MachineFunction *MF;
176
177
MachineRegisterInfo *MRI;
177
178
const SIInstrInfo *TII;
178
179
const SIRegisterInfo *TRI;
@@ -705,6 +706,36 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
705
706
}
706
707
707
708
MachineOperand *New = Fold.Def .OpToFold ;
709
+
710
+ // Verify the register is compatible with the operand.
711
+ if (const TargetRegisterClass *OpRC =
712
+ TII->getRegClass (MI->getDesc (), Fold.UseOpNo , TRI, *MF)) {
713
+ const TargetRegisterClass *OldRC = MRI->getRegClass (Old.getReg ());
714
+ const TargetRegisterClass *NewRC = MRI->getRegClass (New->getReg ());
715
+ unsigned NewSubReg = New->getSubReg ();
716
+ unsigned OldSubReg = Old.getSubReg ();
717
+
718
+ const TargetRegisterClass *ConstrainRC = OpRC;
719
+ if (NewSubReg && OldSubReg) {
720
+ unsigned PreA, PreB;
721
+ ConstrainRC = TRI->getCommonSuperRegClass (OpRC, OldSubReg, NewRC,
722
+ NewSubReg, PreA, PreB);
723
+ } else if (OldSubReg) {
724
+ ConstrainRC = TRI->getMatchingSuperRegClass (OldRC, OpRC, OldSubReg);
725
+ } else if (NewSubReg) {
726
+ ConstrainRC = TRI->getMatchingSuperRegClass (NewRC, OpRC, NewSubReg);
727
+ }
728
+
729
+ if (!ConstrainRC)
730
+ return false ;
731
+
732
+ if (!MRI->constrainRegClass (New->getReg (), ConstrainRC)) {
733
+ LLVM_DEBUG (dbgs () << " Cannot constrain " << printReg (New->getReg (), TRI)
734
+ << TRI->getRegClassName (ConstrainRC) << ' \n ' );
735
+ return false ;
736
+ }
737
+ }
738
+
708
739
// Rework once the VS_16 register class is updated to include proper
709
740
// 16-bit SGPRs instead of 32-bit ones.
710
741
if (Old.getSubReg () == AMDGPU::lo16 && TRI->isSGPRReg (*MRI, New->getReg ()))
@@ -1429,30 +1460,9 @@ void SIFoldOperandsImpl::foldOperand(
1429
1460
return ;
1430
1461
}
1431
1462
1432
- if (!FoldingImmLike) {
1433
- if (OpToFold.isReg () && ST->needsAlignedVGPRs ()) {
1434
- // Don't fold if OpToFold doesn't hold an aligned register.
1435
- const TargetRegisterClass *RC =
1436
- TRI->getRegClassForReg (*MRI, OpToFold.getReg ());
1437
- assert (RC);
1438
- if (TRI->hasVectorRegisters (RC) && OpToFold.getSubReg ()) {
1439
- unsigned SubReg = OpToFold.getSubReg ();
1440
- if (const TargetRegisterClass *SubRC =
1441
- TRI->getSubRegisterClass (RC, SubReg))
1442
- RC = SubRC;
1443
- }
1444
-
1445
- if (!RC || !TRI->isProperlyAlignedRC (*RC))
1446
- return ;
1447
- }
1448
-
1449
- tryAddToFoldList (FoldList, UseMI, UseOpIdx, OpToFold);
1450
-
1451
- // FIXME: We could try to change the instruction from 64-bit to 32-bit
1452
- // to enable more folding opportunities. The shrink operands pass
1453
- // already does this.
1454
- return ;
1455
- }
1463
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
1464
+ // to enable more folding opportunities. The shrink operands pass
1465
+ // already does this.
1456
1466
1457
1467
tryAddToFoldList (FoldList, UseMI, UseOpIdx, OpToFold);
1458
1468
}
@@ -2747,6 +2757,7 @@ bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
2747
2757
}
2748
2758
2749
2759
bool SIFoldOperandsImpl::run (MachineFunction &MF) {
2760
+ this ->MF = &MF;
2750
2761
MRI = &MF.getRegInfo ();
2751
2762
ST = &MF.getSubtarget <GCNSubtarget>();
2752
2763
TII = ST->getInstrInfo ();
0 commit comments