@@ -2966,6 +2966,95 @@ define <8 x i16> @shuffle_extract_concat_insert(<4 x i16> %lhsa, <4 x i16> %rhsa
2966
2966
ret <8 x i16 > %7
2967
2967
}
2968
2968
2969
+ define <8 x i16 > @shuffle_scalar_to_vector_extract (<8 x i8 >* %p0 , i8* %p1 , i8* %p2 ) {
2970
+ ; SSE2-LABEL: shuffle_scalar_to_vector_extract:
2971
+ ; SSE2: # %bb.0:
2972
+ ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2973
+ ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2974
+ ; SSE2-NEXT: psraw $8, %xmm1
2975
+ ; SSE2-NEXT: pextrw $7, %xmm1, %eax
2976
+ ; SSE2-NEXT: movd %eax, %xmm2
2977
+ ; SSE2-NEXT: movsbl (%rsi), %eax
2978
+ ; SSE2-NEXT: movd %eax, %xmm0
2979
+ ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2980
+ ; SSE2-NEXT: movsbl (%rdx), %eax
2981
+ ; SSE2-NEXT: movd %eax, %xmm0
2982
+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
2983
+ ; SSE2-NEXT: pxor %xmm0, %xmm0
2984
+ ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2985
+ ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
2986
+ ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
2987
+ ; SSE2-NEXT: retq
2988
+ ;
2989
+ ; SSSE3-LABEL: shuffle_scalar_to_vector_extract:
2990
+ ; SSSE3: # %bb.0:
2991
+ ; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2992
+ ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2993
+ ; SSSE3-NEXT: psraw $8, %xmm1
2994
+ ; SSSE3-NEXT: pextrw $7, %xmm1, %eax
2995
+ ; SSSE3-NEXT: movd %eax, %xmm2
2996
+ ; SSSE3-NEXT: movsbl (%rsi), %eax
2997
+ ; SSSE3-NEXT: movd %eax, %xmm0
2998
+ ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2999
+ ; SSSE3-NEXT: movsbl (%rdx), %eax
3000
+ ; SSSE3-NEXT: movd %eax, %xmm0
3001
+ ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
3002
+ ; SSSE3-NEXT: pxor %xmm0, %xmm0
3003
+ ; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3004
+ ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
3005
+ ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
3006
+ ; SSSE3-NEXT: retq
3007
+ ;
3008
+ ; SSE41-LABEL: shuffle_scalar_to_vector_extract:
3009
+ ; SSE41: # %bb.0:
3010
+ ; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
3011
+ ; SSE41-NEXT: pextrw $4, %xmm0, %eax
3012
+ ; SSE41-NEXT: pextrw $7, %xmm0, %ecx
3013
+ ; SSE41-NEXT: pxor %xmm0, %xmm0
3014
+ ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
3015
+ ; SSE41-NEXT: movl $65531, %eax # imm = 0xFFFB
3016
+ ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
3017
+ ; SSE41-NEXT: pinsrw $4, %ecx, %xmm0
3018
+ ; SSE41-NEXT: movsbl (%rsi), %eax
3019
+ ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
3020
+ ; SSE41-NEXT: movsbl (%rdx), %eax
3021
+ ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
3022
+ ; SSE41-NEXT: retq
3023
+ ;
3024
+ ; AVX-LABEL: shuffle_scalar_to_vector_extract:
3025
+ ; AVX: # %bb.0:
3026
+ ; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
3027
+ ; AVX-NEXT: vpextrw $4, %xmm0, %eax
3028
+ ; AVX-NEXT: vpextrw $7, %xmm0, %ecx
3029
+ ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
3030
+ ; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
3031
+ ; AVX-NEXT: movl $65531, %eax # imm = 0xFFFB
3032
+ ; AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
3033
+ ; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
3034
+ ; AVX-NEXT: movsbl (%rsi), %eax
3035
+ ; AVX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
3036
+ ; AVX-NEXT: movsbl (%rdx), %eax
3037
+ ; AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
3038
+ ; AVX-NEXT: retq
3039
+ %tmp = load <8 x i8 >, <8 x i8 >* %p0 , align 1
3040
+ %tmp1 = sext <8 x i8 > %tmp to <8 x i16 >
3041
+ %tmp2 = load i8 , i8* %p1 , align 1
3042
+ %cvt1 = sext i8 %tmp2 to i16
3043
+ %tmp3 = load i8 , i8* %p2 , align 1
3044
+ %cvt2 = sext i8 %tmp3 to i16
3045
+ %tmp4 = extractelement <8 x i16 > %tmp1 , i32 4
3046
+ %tmp5 = extractelement <8 x i16 > %tmp1 , i32 7
3047
+ %tmp6 = insertelement <8 x i16 > <i16 undef , i16 undef , i16 -5 , i16 undef , i16 undef , i16 undef , i16 undef , i16 undef >, i16 undef , i32 0
3048
+ %tmp7 = insertelement <8 x i16 > %tmp6 , i16 %tmp4 , i32 1
3049
+ %tmp8 = insertelement <8 x i16 > %tmp7 , i16 undef , i32 3
3050
+ %tmp9 = insertelement <8 x i16 > %tmp8 , i16 %tmp5 , i32 4
3051
+ %tmp10 = insertelement <8 x i16 > %tmp9 , i16 %cvt1 , i32 5
3052
+ %tmp11 = insertelement <8 x i16 > %tmp10 , i16 %cvt2 , i32 6
3053
+ %tmp12 = insertelement <8 x i16 > %tmp11 , i16 undef , i32 7
3054
+ %tmp13 = shufflevector <8 x i16 > %tmp12 , <8 x i16 > undef , <8 x i32 > <i32 0 , i32 1 , i32 10 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
3055
+ ret <8 x i16 > %tmp13
3056
+ }
3057
+
2969
3058
define void @PR43024 () {
2970
3059
; SSE2-LABEL: PR43024:
2971
3060
; SSE2: # %bb.0:
0 commit comments