Skip to content

Commit a96f1c9

Browse files
committed
AMDGPU: Stop special casing aligned VGPR targets in operand folding
Perform a register class constraint check when performing the fold
1 parent 640dc21 commit a96f1c9

File tree

3 files changed

+47
-42
lines changed

3 files changed

+47
-42
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ struct FoldCandidate {
173173

174174
class SIFoldOperandsImpl {
175175
public:
176+
MachineFunction *MF;
176177
MachineRegisterInfo *MRI;
177178
const SIInstrInfo *TII;
178179
const SIRegisterInfo *TRI;
@@ -705,6 +706,36 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
705706
}
706707

707708
MachineOperand *New = Fold.Def.OpToFold;
709+
710+
// Verify the register is compatible with the operand.
711+
if (const TargetRegisterClass *OpRC =
712+
TII->getRegClass(MI->getDesc(), Fold.UseOpNo, TRI, *MF)) {
713+
const TargetRegisterClass *OldRC = MRI->getRegClass(Old.getReg());
714+
const TargetRegisterClass *NewRC = MRI->getRegClass(New->getReg());
715+
unsigned NewSubReg = New->getSubReg();
716+
unsigned OldSubReg = Old.getSubReg();
717+
718+
const TargetRegisterClass *ConstrainRC = OpRC;
719+
if (NewSubReg && OldSubReg) {
720+
unsigned PreA, PreB;
721+
ConstrainRC = TRI->getCommonSuperRegClass(OpRC, OldSubReg, NewRC,
722+
NewSubReg, PreA, PreB);
723+
} else if (OldSubReg) {
724+
ConstrainRC = TRI->getMatchingSuperRegClass(OldRC, OpRC, OldSubReg);
725+
} else if (NewSubReg) {
726+
ConstrainRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, NewSubReg);
727+
}
728+
729+
if (!ConstrainRC)
730+
return false;
731+
732+
if (!MRI->constrainRegClass(New->getReg(), ConstrainRC)) {
733+
LLVM_DEBUG(dbgs() << "Cannot constrain " << printReg(New->getReg(), TRI)
734+
<< TRI->getRegClassName(ConstrainRC) << '\n');
735+
return false;
736+
}
737+
}
738+
708739
// Rework once the VS_16 register class is updated to include proper
709740
// 16-bit SGPRs instead of 32-bit ones.
710741
if (Old.getSubReg() == AMDGPU::lo16 && TRI->isSGPRReg(*MRI, New->getReg()))
@@ -1429,30 +1460,9 @@ void SIFoldOperandsImpl::foldOperand(
14291460
return;
14301461
}
14311462

1432-
if (!FoldingImmLike) {
1433-
if (OpToFold.isReg() && ST->needsAlignedVGPRs()) {
1434-
// Don't fold if OpToFold doesn't hold an aligned register.
1435-
const TargetRegisterClass *RC =
1436-
TRI->getRegClassForReg(*MRI, OpToFold.getReg());
1437-
assert(RC);
1438-
if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
1439-
unsigned SubReg = OpToFold.getSubReg();
1440-
if (const TargetRegisterClass *SubRC =
1441-
TRI->getSubRegisterClass(RC, SubReg))
1442-
RC = SubRC;
1443-
}
1444-
1445-
if (!RC || !TRI->isProperlyAlignedRC(*RC))
1446-
return;
1447-
}
1448-
1449-
tryAddToFoldList(FoldList, UseMI, UseOpIdx, OpToFold);
1450-
1451-
// FIXME: We could try to change the instruction from 64-bit to 32-bit
1452-
// to enable more folding opportunities. The shrink operands pass
1453-
// already does this.
1454-
return;
1455-
}
1463+
// FIXME: We could try to change the instruction from 64-bit to 32-bit
1464+
// to enable more folding opportunities. The shrink operands pass
1465+
// already does this.
14561466

14571467
tryAddToFoldList(FoldList, UseMI, UseOpIdx, OpToFold);
14581468
}
@@ -2747,6 +2757,7 @@ bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
27472757
}
27482758

27492759
bool SIFoldOperandsImpl::run(MachineFunction &MF) {
2760+
this->MF = &MF;
27502761
MRI = &MF.getRegInfo();
27512762
ST = &MF.getSubtarget<GCNSubtarget>();
27522763
TII = ST->getInstrInfo();

llvm/test/CodeGen/AMDGPU/si-fold-aligned-agprs.mir

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,8 @@ body: |
3131
; GFX90A: liveins: $vgpr0_vgpr1
3232
; GFX90A-NEXT: {{ $}}
3333
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
34-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_64 = IMPLICIT_DEF
35-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_64_align2 = COPY killed [[DEF]]
36-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec
34+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_64_align2 = IMPLICIT_DEF
35+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, implicit $exec
3736
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
3837
%1:areg_64 = IMPLICIT_DEF
3938
%2:areg_64_align2 = COPY killed %1
@@ -105,9 +104,8 @@ body: |
105104
; GFX90A: liveins: $vgpr0_vgpr1
106105
; GFX90A-NEXT: {{ $}}
107106
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
108-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_96 = IMPLICIT_DEF
109-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_96_align2 = COPY killed [[DEF]]
110-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec
107+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_96_align2 = IMPLICIT_DEF
108+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[DEF]], 0, 0, implicit $exec
111109
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
112110
%1:areg_96 = IMPLICIT_DEF
113111
%2:areg_96_align2 = COPY killed %1
@@ -234,9 +232,8 @@ body: |
234232
; GFX90A: liveins: $vgpr0_vgpr1
235233
; GFX90A-NEXT: {{ $}}
236234
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
237-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_128 = IMPLICIT_DEF
238-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed [[DEF]]
239-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec
235+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
236+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[DEF]], 0, 0, implicit $exec
240237
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
241238
%1:areg_128 = IMPLICIT_DEF
242239
%2:areg_128_align2 = COPY killed %1

llvm/test/CodeGen/AMDGPU/si-fold-aligned-vgprs.mir

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,8 @@ body: |
4646
; GFX90A: liveins: $vgpr0_vgpr1
4747
; GFX90A-NEXT: {{ $}}
4848
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
49-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
50-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[DEF]]
51-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec
49+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
50+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, implicit $exec
5251
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
5352
%1:vreg_64 = IMPLICIT_DEF
5453
%2:vreg_64_align2 = COPY killed %1
@@ -148,9 +147,8 @@ body: |
148147
; GFX90A: liveins: $vgpr0_vgpr1
149148
; GFX90A-NEXT: {{ $}}
150149
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
151-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96 = IMPLICIT_DEF
152-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_96_align2 = COPY killed [[DEF]]
153-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec
150+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF
151+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX3 [[COPY]], [[DEF]], 0, 0, implicit $exec
154152
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
155153
%1:vreg_96 = IMPLICIT_DEF
156154
%2:vreg_96_align2 = COPY killed %1
@@ -326,9 +324,8 @@ body: |
326324
; GFX90A: liveins: $vgpr0_vgpr1
327325
; GFX90A-NEXT: {{ $}}
328326
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
329-
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
330-
; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY killed [[DEF]]
331-
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec
327+
; GFX90A-NEXT: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
328+
; GFX90A-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[DEF]], 0, 0, implicit $exec
332329
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
333330
%1:vreg_128 = IMPLICIT_DEF
334331
%2:vreg_128_align2 = COPY killed %1

0 commit comments

Comments
 (0)