Skip to content

Commit e7e0437

Browse files
committed
[DAG] Enable ISD::EXTRACT_SUBVECTOR SimplifyMultipleUseDemandedBits handling
This allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits to create a simpler ISD::EXTRACT_SUBVECTOR, which is particularly useful for cases where we're splitting into subvectors anyhow. Differential Revision: This allows SimplifyDemandedBits to call SimplifyMultipleUseDemandedBits to create a simpler ISD::EXTRACT_SUBVECTOR, which is particularly useful for cases where we're splitting into subvectors anyhow.
1 parent a095d14 commit e7e0437

File tree

10 files changed

+105
-120
lines changed

10 files changed

+105
-120
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,17 @@ bool TargetLowering::SimplifyDemandedBits(
970970
}
971971
if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
972972
return true;
973+
974+
// Attempt to avoid multi-use src if we don't need anything from it.
975+
if (!DemandedBits.isAllOnesValue() || !SrcElts.isAllOnesValue()) {
976+
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
977+
Src, DemandedBits, SrcElts, TLO.DAG, Depth + 1);
978+
if (DemandedSrc) {
979+
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
980+
Op.getOperand(1));
981+
return TLO.CombineTo(Op, NewOp);
982+
}
983+
}
973984
break;
974985
}
975986
case ISD::CONCAT_VECTORS: {

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,6 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
129129
define i32 @test_v3i32(<3 x i32> %a) nounwind {
130130
; CHECK-LABEL: test_v3i32:
131131
; CHECK: // %bb.0:
132-
; CHECK-NEXT: mov w8, #-1
133-
; CHECK-NEXT: mov v0.s[3], w8
134132
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
135133
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
136134
; CHECK-NEXT: mov w8, v0.s[1]

llvm/test/CodeGen/X86/avx-vperm2x128.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -627,11 +627,11 @@ entry:
627627
define <4 x i64> @ld1_hi0_hi1_4i64(<4 x i64> %a, <4 x i64> * %pb) nounwind uwtable readnone ssp {
628628
; AVX1-LABEL: ld1_hi0_hi1_4i64:
629629
; AVX1: # %bb.0: # %entry
630-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
631-
; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1
632630
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
633631
; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
634-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
632+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
633+
; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm1, %xmm1
634+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
635635
; AVX1-NEXT: retq
636636
;
637637
; AVX2-LABEL: ld1_hi0_hi1_4i64:
@@ -672,12 +672,11 @@ entry:
672672
define <8 x i32> @ld1_hi0_hi1_8i32(<8 x i32> %a, <8 x i32> * %pb) nounwind uwtable readnone ssp {
673673
; AVX1-LABEL: ld1_hi0_hi1_8i32:
674674
; AVX1: # %bb.0: # %entry
675-
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
676-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
677-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,2,3,4]
678-
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
679-
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
680-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
675+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
676+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,3,4]
677+
; AVX1-NEXT: vpaddd 16(%rdi), %xmm1, %xmm2
678+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
679+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
681680
; AVX1-NEXT: retq
682681
;
683682
; AVX2-LABEL: ld1_hi0_hi1_8i32:

llvm/test/CodeGen/X86/bitcast-setcc-256.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,8 +375,6 @@ define void @bitcast_16i16_store(i16* %p, <16 x i16> %a0) {
375375
;
376376
; AVX2-LABEL: bitcast_16i16_store:
377377
; AVX2: # %bb.0:
378-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
379-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
380378
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
381379
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
382380
; AVX2-NEXT: vpmovmskb %xmm0, %eax

llvm/test/CodeGen/X86/bitcast-vector-bool.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,6 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
296296
;
297297
; AVX2-LABEL: bitcast_v16i16_to_v2i8:
298298
; AVX2: # %bb.0:
299-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
300-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
301299
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
302300
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
303301
; AVX2-NEXT: vpmovmskb %xmm0, %ecx

llvm/test/CodeGen/X86/masked_load.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3198,8 +3198,6 @@ define <16 x i16> @load_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <1
31983198
;
31993199
; AVX2-LABEL: load_v16i16_v16i16:
32003200
; AVX2: ## %bb.0:
3201-
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
3202-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
32033201
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
32043202
; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
32053203
; AVX2-NEXT: vpmovmskb %xmm0, %eax

llvm/test/CodeGen/X86/movmsk-cmp.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,6 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
418418
;
419419
; AVX2-LABEL: allones_v16i16_sign:
420420
; AVX2: # %bb.0:
421-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
422-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
423421
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
424422
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
425423
; AVX2-NEXT: vpmovmskb %xmm0, %eax
@@ -473,8 +471,6 @@ define i1 @allzeros_v16i16_sign(<16 x i16> %arg) {
473471
;
474472
; AVX2-LABEL: allzeros_v16i16_sign:
475473
; AVX2: # %bb.0:
476-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
477-
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
478474
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
479475
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
480476
; AVX2-NEXT: vpmovmskb %xmm0, %eax

llvm/test/CodeGen/X86/pr31956.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ define <4 x float> @foo() {
1111
; CHECK: # %bb.0: # %entry
1212
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
1313
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
14-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
15-
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
16-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
14+
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
15+
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
1716
; CHECK-NEXT: vzeroupper
1817
; CHECK-NEXT: retq
1918
entry:

llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,29 +1068,27 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
10681068
;
10691069
; AVX2-64-LABEL: uitofp_v4i64_v4f32:
10701070
; AVX2-64: # %bb.0:
1071-
; AVX2-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
1072-
; AVX2-64-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
1073-
; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm2
1074-
; AVX2-64-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
1075-
; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
1076-
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm2
1077-
; AVX2-64-NEXT: vpsrlq $1, %ymm0, %ymm3
1078-
; AVX2-64-NEXT: vpor %ymm2, %ymm3, %ymm2
1079-
; AVX2-64-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
1080-
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
1081-
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
1082-
; AVX2-64-NEXT: vmovq %xmm0, %rax
1083-
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
1071+
; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
1072+
; AVX2-64-NEXT: vpand %ymm1, %ymm0, %ymm1
1073+
; AVX2-64-NEXT: vpsrlq $1, %ymm0, %ymm2
1074+
; AVX2-64-NEXT: vpor %ymm1, %ymm2, %ymm1
1075+
; AVX2-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
1076+
; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
1077+
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
1078+
; AVX2-64-NEXT: vmovq %xmm1, %rax
1079+
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
10841080
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
1085-
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0
1086-
; AVX2-64-NEXT: vmovq %xmm0, %rax
1081+
; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm1
1082+
; AVX2-64-NEXT: vmovq %xmm1, %rax
10871083
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
10881084
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
1089-
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
1090-
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
1091-
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
1092-
; AVX2-64-NEXT: vaddps %xmm0, %xmm0, %xmm2
1093-
; AVX2-64-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
1085+
; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
1086+
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
1087+
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
1088+
; AVX2-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
1089+
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm3
1090+
; AVX2-64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
1091+
; AVX2-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
10941092
; AVX2-64-NEXT: vzeroupper
10951093
; AVX2-64-NEXT: retq
10961094
;

llvm/test/CodeGen/X86/vec_int_to_fp.ll

Lines changed: 66 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -2194,10 +2194,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
21942194
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
21952195
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
21962196
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
2197-
; AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
2198-
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0
2199-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2200-
; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2197+
; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
22012198
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
22022199
; AVX2-NEXT: vzeroupper
22032200
; AVX2-NEXT: retq
@@ -2593,29 +2590,27 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
25932590
;
25942591
; AVX2-LABEL: uitofp_4i64_to_4f32:
25952592
; AVX2: # %bb.0:
2596-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
2597-
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
2598-
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
2599-
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
2600-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
2601-
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm2
2602-
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
2603-
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
2604-
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
2605-
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2606-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
2607-
; AVX2-NEXT: vmovq %xmm0, %rax
2608-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
2593+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
2594+
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
2595+
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
2596+
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
2597+
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
2598+
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2599+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
2600+
; AVX2-NEXT: vmovq %xmm1, %rax
2601+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
26092602
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
2610-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
2611-
; AVX2-NEXT: vmovq %xmm0, %rax
2603+
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
2604+
; AVX2-NEXT: vmovq %xmm1, %rax
26122605
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
26132606
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
2614-
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
2615-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
2616-
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
2617-
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2
2618-
; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
2607+
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
2608+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
2609+
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
2610+
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
2611+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
2612+
; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
2613+
; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
26192614
; AVX2-NEXT: vzeroupper
26202615
; AVX2-NEXT: retq
26212616
;
@@ -4512,29 +4507,27 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
45124507
; AVX2-LABEL: uitofp_load_4i64_to_4f32:
45134508
; AVX2: # %bb.0:
45144509
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
4515-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
4516-
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
4517-
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
4518-
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
4519-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
4520-
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm2
4521-
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
4522-
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
4523-
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
4510+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
4511+
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1
4512+
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2
4513+
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
4514+
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0
45244515
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
4525-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
4516+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
45264517
; AVX2-NEXT: vmovq %xmm0, %rax
4527-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
4528-
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
4518+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
4519+
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
45294520
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
45304521
; AVX2-NEXT: vmovq %xmm0, %rax
4531-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
4532-
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
4522+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
4523+
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
45334524
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
4534-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
4535-
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
4536-
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2
4537-
; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
4525+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
4526+
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
4527+
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm1
4528+
; AVX2-NEXT: vmovdqa (%rdi), %xmm2
4529+
; AVX2-NEXT: vpackssdw 16(%rdi), %xmm2, %xmm2
4530+
; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
45384531
; AVX2-NEXT: vzeroupper
45394532
; AVX2-NEXT: retq
45404533
;
@@ -4993,50 +4986,47 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
49934986
; AVX2: # %bb.0:
49944987
; AVX2-NEXT: vmovaps (%rdi), %ymm0
49954988
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
4996-
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
4997-
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3
4998-
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
4999-
; AVX2-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
5000-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [1,1,1,1]
5001-
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm5
5002-
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm6
5003-
; AVX2-NEXT: vpor %ymm5, %ymm6, %ymm5
5004-
; AVX2-NEXT: vblendvpd %ymm1, %ymm5, %ymm1, %ymm1
4989+
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
4990+
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3
4991+
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
4992+
; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
4993+
; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm1
50054994
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
5006-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
4995+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
50074996
; AVX2-NEXT: vmovq %xmm1, %rax
5008-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
5009-
; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[2,3]
4997+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
4998+
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
50104999
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
50115000
; AVX2-NEXT: vmovq %xmm1, %rax
5012-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
5013-
; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm6[0],xmm5[3]
5001+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
5002+
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
50145003
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
5015-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm1
5016-
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm5[0,1,2],xmm1[0]
5017-
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm5
5018-
; AVX2-NEXT: vblendvps %xmm3, %xmm5, %xmm1, %xmm1
5019-
; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm2
5020-
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
5021-
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
5022-
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm3
5023-
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm4
5024-
; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
5025-
; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0
5004+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
5005+
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
5006+
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm3
5007+
; AVX2-NEXT: vmovdqa (%rdi), %xmm4
5008+
; AVX2-NEXT: vmovdqa 32(%rdi), %xmm5
5009+
; AVX2-NEXT: vpackssdw 48(%rdi), %xmm5, %xmm5
5010+
; AVX2-NEXT: vblendvps %xmm5, %xmm3, %xmm1, %xmm1
5011+
; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm2
5012+
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3
5013+
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
5014+
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0
50265015
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
5027-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
5016+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm2
50285017
; AVX2-NEXT: vmovq %xmm0, %rax
5029-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
5030-
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
5018+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
5019+
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
50315020
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
50325021
; AVX2-NEXT: vmovq %xmm0, %rax
5033-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
5034-
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
5022+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
5023+
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
50355024
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
5036-
; AVX2-NEXT: vcvtsi2ss %rax, %xmm7, %xmm0
5037-
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
5038-
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm3
5039-
; AVX2-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0
5025+
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm0
5026+
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
5027+
; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm2
5028+
; AVX2-NEXT: vpackssdw 16(%rdi), %xmm4, %xmm3
5029+
; AVX2-NEXT: vblendvps %xmm3, %xmm2, %xmm0, %xmm0
50405030
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
50415031
; AVX2-NEXT: retq
50425032
;

0 commit comments

Comments
 (0)