Skip to content

Commit d2a9739

Browse files
committed
AMDGPU/GlobalISel: Eliminate SelectVOP3Mods_f32
Trivial type predicates should be moved into the tablegen pattern itself, and not checked inside complex patterns. This eliminates a redundant complex pattern, and fixes select source modifiers for GlobalISel. I have further patches which fully handle select in tablegen and remove all of the C++ selection, although it requires the ugliness to support the entire range of legal register types.
1 parent 46044a6 commit d2a9739

File tree

5 files changed

+234
-23
lines changed

5 files changed

+234
-23
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
252252
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
253253

254254
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
255-
bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const;
256255
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
257256
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
258257
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
@@ -2430,15 +2429,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
24302429
return isNoNanSrc(Src);
24312430
}
24322431

2433-
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src,
2434-
SDValue &SrcMods) const {
2435-
if (In.getValueType() == MVT::f32)
2436-
return SelectVOP3Mods(In, Src, SrcMods);
2437-
Src = In;
2438-
SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);;
2439-
return true;
2440-
}
2441-
24422432
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
24432433
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
24442434
return false;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1120,6 +1120,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
11201120
}
11211121

11221122
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
1123+
if (selectImpl(I, *CoverageInfo))
1124+
return true;
1125+
11231126
MachineBasicBlock *BB = I.getParent();
11241127
const DebugLoc &DL = I.getDebugLoc();
11251128

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,8 +1282,6 @@ def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
12821282
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
12831283
// VOP3Mods, but the input source is known to never be NaN.
12841284
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
1285-
// VOP3Mods, but only allowed for f32 operands.
1286-
def VOP3Mods_f32 : ComplexPattern<fAny, 2, "SelectVOP3Mods_f32">;
12871285

12881286
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
12891287

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -888,18 +888,22 @@ def : FMADModsPat<V_MAD_F16, AMDGPUfmad_ftz, f16> {
888888
let SubtargetPredicate = Has16BitInsts;
889889
}
890890

891-
multiclass SelectPat <ValueType vt> {
892-
def : GCNPat <
893-
(vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods),
894-
(VOP3Mods_f32 vt:$src2, i32:$src2_mods))),
895-
(V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0)
896-
>;
897-
}
891+
class VOPSelectModsPat <ValueType vt> : GCNPat <
892+
(vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
893+
(VOP3Mods vt:$src2, i32:$src2_mods))),
894+
(V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
895+
FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
896+
>;
897+
898+
class VOPSelectPat <ValueType vt> : GCNPat <
899+
(vt (select i1:$src0, vt:$src1, vt:$src2)),
900+
(V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0)
901+
>;
898902

899-
defm : SelectPat <i16>;
900-
defm : SelectPat <i32>;
901-
defm : SelectPat <f16>;
902-
defm : SelectPat <f32>;
903+
def : VOPSelectModsPat <i32>;
904+
def : VOPSelectModsPat <f32>;
905+
def : VOPSelectPat <f16>;
906+
def : VOPSelectPat <i16>;
903907

904908
let AddedComplexity = 1 in {
905909
def : GCNPat <

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,3 +344,219 @@ body: |
344344
S_ENDPGM 0, implicit %5
345345
346346
...
347+
348+
# Fold source modifiers into VOP select
349+
---
350+
name: select_s32_vcc_fneg_lhs
351+
legalized: true
352+
regBankSelected: true
353+
354+
body: |
355+
bb.0:
356+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
357+
358+
; GCN-LABEL: name: select_s32_vcc_fneg_lhs
359+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
360+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
361+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
362+
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
363+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
364+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
365+
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
366+
%0:vgpr(s32) = COPY $vgpr0
367+
%1:vgpr(s32) = COPY $vgpr1
368+
%2:vgpr(s32) = COPY $vgpr2
369+
%3:vgpr(s32) = COPY $vgpr3
370+
%4:vgpr(s32) = G_FNEG %2
371+
%5:vcc(s1) = G_ICMP intpred(eq), %0, %1
372+
%6:vgpr(s32) = G_SELECT %5, %4, %3
373+
S_ENDPGM 0, implicit %6
374+
375+
...
376+
377+
---
378+
name: select_s32_vcc_fneg_rhs
379+
legalized: true
380+
regBankSelected: true
381+
382+
body: |
383+
bb.0:
384+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
385+
386+
; GCN-LABEL: name: select_s32_vcc_fneg_rhs
387+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
388+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
389+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
390+
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
391+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
392+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec
393+
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
394+
%0:vgpr(s32) = COPY $vgpr0
395+
%1:vgpr(s32) = COPY $vgpr1
396+
%2:vgpr(s32) = COPY $vgpr2
397+
%3:vgpr(s32) = COPY $vgpr3
398+
%4:vgpr(s32) = G_FNEG %3
399+
%5:vcc(s1) = G_ICMP intpred(eq), %0, %1
400+
%6:vgpr(s32) = G_SELECT %5, %2, %4
401+
S_ENDPGM 0, implicit %6
402+
403+
...
404+
405+
---
406+
name: select_s32_vcc_fneg_fabs_lhs
407+
legalized: true
408+
regBankSelected: true
409+
410+
body: |
411+
bb.0:
412+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
413+
414+
; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs
415+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
416+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
417+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
418+
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
419+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
420+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec
421+
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
422+
%0:vgpr(s32) = COPY $vgpr0
423+
%1:vgpr(s32) = COPY $vgpr1
424+
%2:vgpr(s32) = COPY $vgpr2
425+
%3:vgpr(s32) = COPY $vgpr3
426+
%4:vgpr(s32) = G_FABS %3
427+
%5:vgpr(s32) = G_FNEG %4
428+
%6:vcc(s1) = G_ICMP intpred(eq), %0, %1
429+
%7:vgpr(s32) = G_SELECT %6, %5, %2
430+
S_ENDPGM 0, implicit %7
431+
432+
...
433+
434+
# Make sure we don't try to fold source modifiers into non-32 bit value.
435+
---
436+
name: select_s16_vcc_fneg_lhs
437+
legalized: true
438+
regBankSelected: true
439+
440+
body: |
441+
bb.0:
442+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
443+
444+
; GCN-LABEL: name: select_s16_vcc_fneg_lhs
445+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
446+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
447+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
448+
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
449+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
450+
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
451+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec
452+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
453+
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
454+
%0:vgpr(s32) = COPY $vgpr0
455+
%1:vgpr(s32) = COPY $vgpr1
456+
%2:vgpr(s32) = COPY $vgpr2
457+
%3:vgpr(s32) = COPY $vgpr3
458+
%4:vgpr(s16) = G_TRUNC %0
459+
%5:vgpr(s16) = G_TRUNC %1
460+
%6:vgpr(s16) = G_FNEG %4
461+
%7:vcc(s1) = G_ICMP intpred(eq), %2, %3
462+
%8:vgpr(s16) = G_SELECT %7, %6, %5
463+
S_ENDPGM 0, implicit %8
464+
465+
...
466+
467+
468+
# Make sure we don't try to fold source modifiers into a vector
469+
---
470+
name: select_v2s16_vcc_fneg_lhs
471+
legalized: true
472+
regBankSelected: true
473+
474+
body: |
475+
bb.0:
476+
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
477+
478+
; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs
479+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
480+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
481+
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
482+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416
483+
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec
484+
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
485+
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
486+
; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
487+
%0:vgpr(s32) = COPY $vgpr0
488+
%1:vgpr(s32) = COPY $vgpr1
489+
%2:vgpr(<2 x s16>) = COPY $vgpr2
490+
%3:vgpr(<2 x s16>) = COPY $vgpr3
491+
%4:vgpr(<2 x s16>) = G_FNEG %3
492+
%5:vcc(s1) = G_ICMP intpred(eq), %0, %1
493+
%6:vgpr(<2 x s16>) = G_SELECT %5, %4, %3
494+
S_ENDPGM 0, implicit %6
495+
496+
...
497+
498+
# Make sure we don't try to fold source modifiers into a scalar select
499+
500+
---
501+
name: select_s32_scc_fneg_lhs
502+
legalized: true
503+
regBankSelected: true
504+
505+
body: |
506+
bb.0:
507+
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
508+
509+
; GCN-LABEL: name: select_s32_scc_fneg_lhs
510+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
511+
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
512+
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
513+
; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
514+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
515+
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
516+
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
517+
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
518+
; GCN: $scc = COPY [[COPY4]]
519+
; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc
520+
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
521+
%0:sgpr(s32) = COPY $sgpr0
522+
%1:sgpr(s32) = COPY $sgpr1
523+
%2:sgpr(s32) = COPY $sgpr2
524+
%3:sgpr(s32) = COPY $sgpr3
525+
%4:sgpr(s32) = G_FNEG %2
526+
%5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
527+
%6:sgpr(s32) = G_SELECT %5, %4, %3
528+
S_ENDPGM 0, implicit %6
529+
530+
...
531+
532+
---
533+
name: select_s32_scc_fneg_rhs
534+
legalized: true
535+
regBankSelected: true
536+
537+
body: |
538+
bb.0:
539+
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
540+
541+
; GCN-LABEL: name: select_s32_scc_fneg_rhs
542+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
543+
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
544+
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
545+
; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
546+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
547+
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc
548+
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
549+
; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc
550+
; GCN: $scc = COPY [[COPY4]]
551+
; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc
552+
; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
553+
%0:sgpr(s32) = COPY $sgpr0
554+
%1:sgpr(s32) = COPY $sgpr1
555+
%2:sgpr(s32) = COPY $sgpr2
556+
%3:sgpr(s32) = COPY $sgpr3
557+
%4:sgpr(s32) = G_FNEG %3
558+
%5:sgpr(s32) = G_ICMP intpred(eq), %0, %1
559+
%6:sgpr(s32) = G_SELECT %5, %2, %4
560+
S_ENDPGM 0, implicit %6
561+
562+
...

0 commit comments

Comments
 (0)