Skip to content

Commit 5fc0367

Browse files
committed
Merging r371088 and r371095:
------------------------------------------------------------------------ r371088 | spatel | 2019-09-05 18:58:18 +0200 (Thu, 05 Sep 2019) | 1 line [x86] add test for horizontal math bug (PR43225); NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r371095 | spatel | 2019-09-05 19:28:17 +0200 (Thu, 05 Sep 2019) | 3 lines [x86] fix horizontal math bug exposed by improved demanded elements analysis (PR43225) https://bugs.llvm.org/show_bug.cgi?id=43225 ------------------------------------------------------------------------ llvm-svn: 371178
1 parent c255101 commit 5fc0367

File tree

2 files changed

+50
-5
lines changed

2 files changed

+50
-5
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33594,7 +33594,7 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
3359433594
}
3359533595

3359633596
/// Eliminate a redundant shuffle of a horizontal math op.
33597-
static SDValue foldShuffleOfHorizOp(SDNode *N) {
33597+
static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
3359833598
unsigned Opcode = N->getOpcode();
3359933599
if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
3360033600
if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
@@ -33625,6 +33625,25 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
3362533625
HOp.getOperand(0) != HOp.getOperand(1))
3362633626
return SDValue();
3362733627

33628+
// The shuffle that we are eliminating may have allowed the horizontal op to
33629+
// have an undemanded (undefined) operand. Duplicate the other (defined)
33630+
// operand to ensure that the results are defined across all lanes without the
33631+
// shuffle.
33632+
auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
33633+
SDValue X;
33634+
if (HorizOp.getOperand(0).isUndef()) {
33635+
assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
33636+
X = HorizOp.getOperand(1);
33637+
} else if (HorizOp.getOperand(1).isUndef()) {
33638+
assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
33639+
X = HorizOp.getOperand(0);
33640+
} else {
33641+
return HorizOp;
33642+
}
33643+
return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
33644+
HorizOp.getValueType(), X, X);
33645+
};
33646+
3362833647
// When the operands of a horizontal math op are identical, the low half of
3362933648
// the result is the same as the high half. If a target shuffle is also
3363033649
// replicating low and high halves, we don't need the shuffle.
@@ -33635,7 +33654,7 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
3363533654
assert((HOp.getValueType() == MVT::v2f64 ||
3363633655
HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
3363733656
"Unexpected type for h-op");
33638-
return HOp;
33657+
return updateHOp(HOp, DAG);
3363933658
}
3364033659
return SDValue();
3364133660
}
@@ -33649,14 +33668,14 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
3364933668
(isTargetShuffleEquivalent(Mask, {0, 0}) ||
3365033669
isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
3365133670
isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
33652-
return HOp;
33671+
return updateHOp(HOp, DAG);
3365333672

3365433673
if (HOp.getValueSizeInBits() == 256 &&
3365533674
(isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
3365633675
isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
3365733676
isTargetShuffleEquivalent(
3365833677
Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
33659-
return HOp;
33678+
return updateHOp(HOp, DAG);
3366033679

3366133680
return SDValue();
3366233681
}
@@ -33710,7 +33729,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
3371033729
if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
3371133730
return AddSub;
3371233731

33713-
if (SDValue HAddSub = foldShuffleOfHorizOp(N))
33732+
if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
3371433733
return HAddSub;
3371533734
}
3371633735

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
3+
4+
; Eliminating a shuffle means we have to replace an undef operand of a horizontal op.
5+
6+
define void @PR43225(<4 x double>* %p0, <4 x double>* %p1, <4 x double> %x, <4 x double> %y, <4 x double> %z) nounwind {
7+
; CHECK-LABEL: PR43225:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: vmovaps (%rdi), %ymm0
10+
; CHECK-NEXT: vmovaps (%rsi), %ymm0
11+
; CHECK-NEXT: vhsubpd %ymm2, %ymm2, %ymm0
12+
; CHECK-NEXT: vmovapd %ymm0, (%rdi)
13+
; CHECK-NEXT: vzeroupper
14+
; CHECK-NEXT: retq
15+
%t39 = load volatile <4 x double>, <4 x double>* %p0, align 32
16+
%shuffle11 = shufflevector <4 x double> %t39, <4 x double> %x, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
17+
%t40 = load volatile <4 x double>, <4 x double>* %p1, align 32
18+
%t41 = tail call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %shuffle11, <4 x double> %t40)
19+
%t42 = tail call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %z, <4 x double> %t41)
20+
%shuffle12 = shufflevector <4 x double> %t42, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
21+
store volatile <4 x double> %shuffle12, <4 x double>* %p0, align 32
22+
ret void
23+
}
24+
25+
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>)
26+
declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>)

0 commit comments

Comments
 (0)