Skip to content

Commit 1c21c19

Browse files
committed
Merging r371305 and r371307:
------------------------------------------------------------------------ r371305 | nikic | 2019-09-07 14:03:48 +0200 (Sat, 07 Sep 2019) | 1 line [X86] Add test for PR43230; NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r371307 | nikic | 2019-09-07 14:13:44 +0200 (Sat, 07 Sep 2019) | 9 lines [X86] Fix pshuflw formation from repeated shuffle mask (PR43230) Fix for https://bugs.llvm.org/show_bug.cgi?id=43230. When creating PSHUFLW from a repeated shuffle mask, we have to apply the checks to the repeated mask, not the original one. For the test case from PR43230 the inspected part of the original mask is all undef. Differential Revision: https://reviews.llvm.org/D67314 ------------------------------------------------------------------------ llvm-svn: 371378
1 parent 8cdf289 commit 1c21c19

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31664,8 +31664,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
3166431664
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
3166531665
SmallVector<int, 4> RepeatedMask;
3166631666
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
31667-
ArrayRef<int> LoMask(Mask.data() + 0, 4);
31668-
ArrayRef<int> HiMask(Mask.data() + 4, 4);
31667+
ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
31668+
ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
3166931669

3167031670
// PSHUFLW: permute lower 4 elements only.
3167131671
if (isUndefOrInRange(LoMask, 0, 4) &&

llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4754,3 +4754,44 @@ define <16 x i16> @unpckh_v16i16(<16 x i16> %x, <16 x i16> %y) {
47544754
ret <16 x i16> %unpckh
47554755
}
47564756

4757+
define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
4758+
; AVX1-LABEL: pr43230:
4759+
; AVX1: # %bb.0:
4760+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4761+
; AVX1-NEXT: vpsllw $12, %xmm1, %xmm2
4762+
; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
4763+
; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
4764+
; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2
4765+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4766+
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
4767+
; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
4768+
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1
4769+
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4770+
; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1
4771+
; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
4772+
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4773+
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1
4774+
; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
4775+
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
4776+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4777+
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
4778+
; AVX1-NEXT: retq
4779+
;
4780+
; AVX2-LABEL: pr43230:
4781+
; AVX2: # %bb.0:
4782+
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
4783+
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
4784+
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
4785+
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
4786+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
4787+
; AVX2-NEXT: retq
4788+
;
4789+
; AVX512VL-LABEL: pr43230:
4790+
; AVX512VL: # %bb.0:
4791+
; AVX512VL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0
4792+
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
4793+
; AVX512VL-NEXT: retq
4794+
%shr = lshr <16 x i16> %a, %b
4795+
%shuf = shufflevector <16 x i16> zeroinitializer, <16 x i16> %shr, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 30, i32 15>
4796+
ret <16 x i16> %shuf
4797+
}

0 commit comments

Comments
 (0)