Skip to content

Commit 3180af4

Browse files
committed
[InstCombine] reassociate fsub+fsub into fsub+fadd
As discussed in the motivating PR44509: https://bugs.llvm.org/show_bug.cgi?id=44509 ...we can end up with worse code using fast-math than without. This is because the reassociate pass greedily transforms fsub into fneg/fadd and apparently (based on the regression tests seen here) expects instcombine to clean that up if it wasn't profitable. But we were missing this fold: (X - Y) - Z --> X - (Y + Z) There's another, more specific case that I think we should handle as shown in the "fake" fneg test (but missed with a real fneg), but that's another patch. That may be tricky to get right without conflicting with existing transforms for fneg. Differential Revision: https://reviews.llvm.org/D72521
1 parent 88380b9 commit 3180af4

File tree

4 files changed

+25
-21
lines changed

4 files changed

+25
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2285,6 +2285,12 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
22852285
// complex pattern matching and remove this from InstCombine.
22862286
if (Value *V = FAddCombine(Builder).simplify(&I))
22872287
return replaceInstUsesWith(I, V);
2288+
2289+
// (X - Y) - Op1 --> X - (Y + Op1)
2290+
if (match(Op0, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
2291+
Value *FAdd = Builder.CreateFAddFMF(Y, Op1, &I);
2292+
return BinaryOperator::CreateFSubFMF(X, FAdd, &I);
2293+
}
22882294
}
22892295

22902296
return nullptr;

llvm/test/Transforms/InstCombine/fsub.ll

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,8 @@ define float @fsub_fmul_fneg2_extra_use3(float %x, float %y, float %z) {
642642
ret float %r
643643
}
644644

645+
; Negative test - can't reassociate without FMF.
646+
645647
define float @fsub_fsub(float %x, float %y, float %z) {
646648
; CHECK-LABEL: @fsub_fsub(
647649
; CHECK-NEXT: [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -653,6 +655,8 @@ define float @fsub_fsub(float %x, float %y, float %z) {
653655
ret float %xyz
654656
}
655657

658+
; Negative test - can't reassociate without enough FMF.
659+
656660
define float @fsub_fsub_nsz(float %x, float %y, float %z) {
657661
; CHECK-LABEL: @fsub_fsub_nsz(
658662
; CHECK-NEXT: [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -664,6 +668,8 @@ define float @fsub_fsub_nsz(float %x, float %y, float %z) {
664668
ret float %xyz
665669
}
666670

671+
; Negative test - can't reassociate without enough FMF.
672+
667673
define float @fsub_fsub_reassoc(float %x, float %y, float %z) {
668674
; CHECK-LABEL: @fsub_fsub_reassoc(
669675
; CHECK-NEXT: [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -677,8 +683,8 @@ define float @fsub_fsub_reassoc(float %x, float %y, float %z) {
677683

678684
define float @fsub_fsub_nsz_reassoc(float %x, float %y, float %z) {
679685
; CHECK-LABEL: @fsub_fsub_nsz_reassoc(
680-
; CHECK-NEXT: [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
681-
; CHECK-NEXT: [[XYZ:%.*]] = fsub reassoc nsz float [[XY]], [[Z:%.*]]
686+
; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]]
687+
; CHECK-NEXT: [[XYZ:%.*]] = fsub reassoc nsz float [[X:%.*]], [[TMP1]]
682688
; CHECK-NEXT: ret float [[XYZ]]
683689
;
684690
%xy = fsub float %x, %y
@@ -688,15 +694,17 @@ define float @fsub_fsub_nsz_reassoc(float %x, float %y, float %z) {
688694

689695
define <2 x double> @fsub_fsub_fast_vec(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
690696
; CHECK-LABEL: @fsub_fsub_fast_vec(
691-
; CHECK-NEXT: [[XY:%.*]] = fsub fast <2 x double> [[X:%.*]], [[Y:%.*]]
692-
; CHECK-NEXT: [[XYZ:%.*]] = fsub fast <2 x double> [[XY]], [[Z:%.*]]
697+
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x double> [[Y:%.*]], [[Z:%.*]]
698+
; CHECK-NEXT: [[XYZ:%.*]] = fsub fast <2 x double> [[X:%.*]], [[TMP1]]
693699
; CHECK-NEXT: ret <2 x double> [[XYZ]]
694700
;
695701
%xy = fsub fast <2 x double> %x, %y
696702
%xyz = fsub fast reassoc <2 x double> %xy, %z
697703
ret <2 x double> %xyz
698704
}
699705

706+
; Negative test - don't reassociate and increase instructions.
707+
700708
define float @fsub_fsub_nsz_reassoc_extra_use(float %x, float %y, float %z) {
701709
; CHECK-LABEL: @fsub_fsub_nsz_reassoc_extra_use(
702710
; CHECK-NEXT: [[XY:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
@@ -734,8 +742,8 @@ define float @fneg_fsub_nsz(float %x, float %y) {
734742

735743
define float @fake_fneg_fsub_fast(float %x, float %y) {
736744
; CHECK-LABEL: @fake_fneg_fsub_fast(
737-
; CHECK-NEXT: [[NEGX:%.*]] = fsub float -0.000000e+00, [[X:%.*]]
738-
; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[NEGX]], [[Y:%.*]]
745+
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
746+
; CHECK-NEXT: [[SUB:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
739747
; CHECK-NEXT: ret float [[SUB]]
740748
;
741749
%negx = fsub float -0.0, %x

llvm/test/Transforms/Reassociate/fast-SubReassociate.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,10 @@ define float @test3(float %A, float %B, float %C, float %D) {
7979
; With sub reassociation, constant folding can eliminate the two 12 constants.
8080

8181
define float @test4(float %A, float %B, float %C, float %D) {
82-
; FIXME: InstCombine should be able to get us to the following:
83-
; %sum = fadd fast float %B, %A
84-
; %sum1 = fadd fast float %sum, %C
85-
; %Q = fsub fast float %D, %sum1
86-
; ret i32 %Q
8782
; CHECK-LABEL: @test4(
88-
; CHECK-NEXT: [[B_NEG:%.*]] = fsub fast float -0.000000e+00, [[B:%.*]]
89-
; CHECK-NEXT: [[O_NEG:%.*]] = fsub fast float [[B_NEG]], [[A:%.*]]
90-
; CHECK-NEXT: [[P:%.*]] = fsub fast float [[O_NEG]], [[C:%.*]]
91-
; CHECK-NEXT: [[Q:%.*]] = fadd fast float [[P]], [[D:%.*]]
83+
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[B:%.*]], [[A:%.*]]
84+
; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP1]], [[C:%.*]]
85+
; CHECK-NEXT: [[Q:%.*]] = fsub fast float [[D:%.*]], [[TMP2]]
9286
; CHECK-NEXT: ret float [[Q]]
9387
;
9488
%M = fadd fast float 1.200000e+01, %A

llvm/test/Transforms/Reassociate/fast-basictest.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -669,13 +669,9 @@ define float @test19_reassoc(float %A, float %B) {
669669

670670
; With sub reassociation, constant folding can eliminate the uses of %a.
671671
define float @test20(float %a, float %b, float %c) nounwind {
672-
; FIXME: Should be able to generate the below, which may expose more
673-
; opportunites for FAdd reassociation.
674-
; %sum = fadd fast float %c, %b
675-
; %t7 = fsub fast float 0, %sum
676672
; CHECK-LABEL: @test20(
677-
; CHECK-NEXT: [[B_NEG:%.*]] = fsub fast float -0.000000e+00, [[B:%.*]]
678-
; CHECK-NEXT: [[T7:%.*]] = fsub fast float [[B_NEG]], [[C:%.*]]
673+
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[B:%.*]], [[C:%.*]]
674+
; CHECK-NEXT: [[T7:%.*]] = fsub fast float -0.000000e+00, [[TMP1]]
679675
; CHECK-NEXT: ret float [[T7]]
680676
;
681677
%t3 = fsub fast float %a, %b

0 commit comments

Comments
 (0)