Skip to content

Commit d3bf06b

Browse files
committed
[DAGCombiner] Add combine for (not (strict_fsetcc)) to create a strict_fsetcc with the opposite condition.
Unlike the existing code that I modified here, I only handle the case where the strict_fsetcc has a single use. Not sure exactly how to handle multiples uses. Testing this on X86 is hard because we already have a other combines that get rid of lowered version of the integer setcc that this xor will eventually become. So this combine really just saves a bunch of extra nodes being created. Not sure about other targets. Differential Revision: https://reviews.llvm.org/D71816
1 parent 118efa5 commit d3bf06b

File tree

2 files changed

+111
-4
lines changed

2 files changed

+111
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ namespace {
522522
SDValue rebuildSetCC(SDValue N);
523523

524524
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
525-
SDValue &CC) const;
525+
SDValue &CC, bool MatchStrict = false) const;
526526
bool isOneUseSetCC(SDValue N) const;
527527
bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
528528

@@ -814,14 +814,23 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
814814
// the appropriate nodes based on the type of node we are checking. This
815815
// simplifies life a bit for the callers.
816816
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
817-
SDValue &CC) const {
817+
SDValue &CC, bool MatchStrict) const {
818818
if (N.getOpcode() == ISD::SETCC) {
819819
LHS = N.getOperand(0);
820820
RHS = N.getOperand(1);
821821
CC = N.getOperand(2);
822822
return true;
823823
}
824824

825+
if (MatchStrict &&
826+
(N.getOpcode() == ISD::STRICT_FSETCC ||
827+
N.getOpcode() == ISD::STRICT_FSETCCS)) {
828+
LHS = N.getOperand(1);
829+
RHS = N.getOperand(2);
830+
CC = N.getOperand(3);
831+
return true;
832+
}
833+
825834
if (N.getOpcode() != ISD::SELECT_CC ||
826835
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
827836
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
@@ -7058,7 +7067,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
70587067
// fold !(x cc y) -> (x !cc y)
70597068
unsigned N0Opcode = N0.getOpcode();
70607069
SDValue LHS, RHS, CC;
7061-
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
7070+
if (TLI.isConstTrueVal(N1.getNode()) &&
7071+
isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
70627072
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
70637073
LHS.getValueType());
70647074
if (!LegalOperations ||
@@ -7071,6 +7081,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
70717081
case ISD::SELECT_CC:
70727082
return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
70737083
N0.getOperand(3), NotCC);
7084+
case ISD::STRICT_FSETCC:
7085+
case ISD::STRICT_FSETCCS: {
7086+
if (N0.hasOneUse()) {
7087+
// FIXME Can we handle multiple uses? Could we token factor the chain
7088+
// results from the new/old setcc?
7089+
SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7090+
N0.getOperand(0),
7091+
N0Opcode == ISD::STRICT_FSETCCS);
7092+
CombineTo(N, SetCC);
7093+
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7094+
recursivelyDeleteUnusedNodes(N0.getNode());
7095+
return SDValue(N, 0); // Return N so it doesn't get rechecked!
7096+
}
7097+
break;
7098+
}
70747099
}
70757100
}
70767101
}
@@ -13541,8 +13566,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
1354113566
}
1354213567

1354313568
if (N1.hasOneUse()) {
13569+
// rebuildSetCC calls visitXor which may change the Chain when there is a
13570+
// STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
13571+
HandleSDNode ChainHandle(Chain);
1354413572
if (SDValue NewN1 = rebuildSetCC(N1))
13545-
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13573+
return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
13574+
ChainHandle.getValue(), NewN1, N2);
1354613575
}
1354713576

1354813577
return SDValue();

llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4136,6 +4136,84 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 {
41364136
ret i32 %res
41374137
}
41384138

4139+
define void @foo(float %0, float %1) #0 {
4140+
; SSE-32-LABEL: foo:
4141+
; SSE-32: # %bb.0:
4142+
; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4143+
; SSE-32-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0
4144+
; SSE-32-NEXT: jbe .LBB56_1
4145+
; SSE-32-NEXT: # %bb.2:
4146+
; SSE-32-NEXT: jmp bar # TAILCALL
4147+
; SSE-32-NEXT: .LBB56_1:
4148+
; SSE-32-NEXT: retl
4149+
;
4150+
; SSE-64-LABEL: foo:
4151+
; SSE-64: # %bb.0:
4152+
; SSE-64-NEXT: ucomiss %xmm1, %xmm0
4153+
; SSE-64-NEXT: jbe .LBB56_1
4154+
; SSE-64-NEXT: # %bb.2:
4155+
; SSE-64-NEXT: jmp bar # TAILCALL
4156+
; SSE-64-NEXT: .LBB56_1:
4157+
; SSE-64-NEXT: retq
4158+
;
4159+
; AVX-32-LABEL: foo:
4160+
; AVX-32: # %bb.0:
4161+
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
4162+
; AVX-32-NEXT: vucomiss {{[0-9]+}}(%esp), %xmm0
4163+
; AVX-32-NEXT: jbe .LBB56_1
4164+
; AVX-32-NEXT: # %bb.2:
4165+
; AVX-32-NEXT: jmp bar # TAILCALL
4166+
; AVX-32-NEXT: .LBB56_1:
4167+
; AVX-32-NEXT: retl
4168+
;
4169+
; AVX-64-LABEL: foo:
4170+
; AVX-64: # %bb.0:
4171+
; AVX-64-NEXT: vucomiss %xmm1, %xmm0
4172+
; AVX-64-NEXT: jbe .LBB56_1
4173+
; AVX-64-NEXT: # %bb.2:
4174+
; AVX-64-NEXT: jmp bar # TAILCALL
4175+
; AVX-64-NEXT: .LBB56_1:
4176+
; AVX-64-NEXT: retq
4177+
;
4178+
; X87-LABEL: foo:
4179+
; X87: # %bb.0:
4180+
; X87-NEXT: flds {{[0-9]+}}(%esp)
4181+
; X87-NEXT: flds {{[0-9]+}}(%esp)
4182+
; X87-NEXT: fucompp
4183+
; X87-NEXT: wait
4184+
; X87-NEXT: fnstsw %ax
4185+
; X87-NEXT: # kill: def $ah killed $ah killed $ax
4186+
; X87-NEXT: sahf
4187+
; X87-NEXT: jbe .LBB56_1
4188+
; X87-NEXT: # %bb.2:
4189+
; X87-NEXT: jmp bar # TAILCALL
4190+
; X87-NEXT: .LBB56_1:
4191+
; X87-NEXT: retl
4192+
;
4193+
; X87-CMOV-LABEL: foo:
4194+
; X87-CMOV: # %bb.0:
4195+
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
4196+
; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp)
4197+
; X87-CMOV-NEXT: fucompi %st(1), %st
4198+
; X87-CMOV-NEXT: fstp %st(0)
4199+
; X87-CMOV-NEXT: wait
4200+
; X87-CMOV-NEXT: jbe .LBB56_1
4201+
; X87-CMOV-NEXT: # %bb.2:
4202+
; X87-CMOV-NEXT: jmp bar # TAILCALL
4203+
; X87-CMOV-NEXT: .LBB56_1:
4204+
; X87-CMOV-NEXT: retl
4205+
%3 = call i1 @llvm.experimental.constrained.fcmp.f32( float %0, float %1, metadata !"ogt", metadata !"fpexcept.strict") #0
4206+
br i1 %3, label %4, label %5
4207+
4208+
4: ; preds = %2
4209+
tail call void @bar()
4210+
br label %5
4211+
4212+
5: ; preds = %4, %2
4213+
ret void
4214+
}
4215+
declare void @bar()
4216+
41394217
attributes #0 = { strictfp }
41404218

41414219
declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)

0 commit comments

Comments
 (0)