Skip to content

Commit a14aa7d

Browse files
committed
[X86][SSE] combineExtractWithShuffle - extract(bictcast(scalar_to_vector(x))) --> x
Removes some unnecessary gpr<-->fpu traffic
1 parent 52ec737 commit a14aa7d

File tree

6 files changed

+199
-214
lines changed

6 files changed

+199
-214
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37126,6 +37126,28 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
3712637126
}
3712737127
}
3712837128

37129+
// Handle extract(scalar_to_vector(scalar_value)) for integers.
37130+
// TODO: Move to DAGCombine?
37131+
if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
37132+
SrcBC.getValueType().isInteger() &&
37133+
(SrcBC.getScalarValueSizeInBits() % SrcSVT.getSizeInBits()) == 0 &&
37134+
SrcBC.getScalarValueSizeInBits() ==
37135+
SrcBC.getOperand(0).getValueSizeInBits()) {
37136+
unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcSVT.getSizeInBits();
37137+
if (IdxC.ult(Scale)) {
37138+
unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits();
37139+
SDValue Scl = SrcBC.getOperand(0);
37140+
EVT SclVT = Scl.getValueType();
37141+
if (Offset) {
37142+
Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl,
37143+
DAG.getShiftAmountConstant(Offset, SclVT, dl));
37144+
}
37145+
Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType());
37146+
Scl = DAG.getZExtOrTrunc(Scl, dl, VT);
37147+
return Scl;
37148+
}
37149+
}
37150+
3712937151
// Handle extract(truncate(x)) for 0'th index.
3713037152
// TODO: Treat this as a faux shuffle?
3713137153
// TODO: When can we use this for general indices?

llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,18 @@ target triple = "x86_64-unknown-linux-gnu"
1717
define i32 @main() nounwind uwtable {
1818
; CHECK-LABEL: main:
1919
; CHECK: # %bb.0: # %entry
20-
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
21-
; CHECK-NEXT: pextrb $1, %xmm0, %ecx
22-
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
23-
; CHECK-NEXT: pextrb $1, %xmm1, %eax
20+
; CHECK-NEXT: movq {{.*}}(%rip), %rsi
21+
; CHECK-NEXT: movq {{.*}}(%rip), %rax
22+
; CHECK-NEXT: movq %rsi, %rdx
23+
; CHECK-NEXT: shrq $8, %rdx
24+
; CHECK-NEXT: movsbl %al, %ecx
25+
; CHECK-NEXT: shrq $8, %rax
2426
; CHECK-NEXT: cbtw
25-
; CHECK-NEXT: pextrb $0, %xmm0, %edx
26-
; CHECK-NEXT: pextrb $0, %xmm1, %esi
27-
; CHECK-NEXT: idivb %cl
28-
; CHECK-NEXT: movl %eax, %ecx
29-
; CHECK-NEXT: movsbl %sil, %eax
3027
; CHECK-NEXT: idivb %dl
31-
; CHECK-NEXT: movzbl %cl, %ecx
28+
; CHECK-NEXT: movl %eax, %edx
29+
; CHECK-NEXT: movl %ecx, %eax
30+
; CHECK-NEXT: idivb %sil
31+
; CHECK-NEXT: movzbl %dl, %ecx
3232
; CHECK-NEXT: movzbl %al, %eax
3333
; CHECK-NEXT: movd %eax, %xmm0
3434
; CHECK-NEXT: pinsrb $1, %ecx, %xmm0

0 commit comments

Comments
 (0)