diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 55a76f1172cb9..ae6c7726a618e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2082,13 +2082,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .lower(); - // TODO: Only Try to form v2s16 with legal packed instructions. - getActionDefinitionsBuilder(G_FSHR) - .legalFor({{S32, S32}}) - .lowerFor({{V2S16, V2S16}}) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .lower(); + auto &FSHRActionDefs = getActionDefinitionsBuilder(G_FSHR); + FSHRActionDefs.legalFor({{S32, S32}}) + .clampMaxNumElementsStrict(0, S16, 2); + if (ST.hasVOP3PInsts()) + FSHRActionDefs.lowerFor({{V2S16, V2S16}}); + FSHRActionDefs.scalarize(0).lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index d1ba24673043d..b74c6eeabb239 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -3404,32 +3404,19 @@ define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { ; GFX6-LABEL: s_fshr_v2i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshl_b32 s5, s5, 16 -; GFX6-NEXT: s_and_b32 s4, s4, 0xffff -; GFX6-NEXT: s_or_b32 s4, s5, s4 -; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_lshr_b32 s5, s5, 14 -; GFX6-NEXT: s_or_b32 s0, s0, s5 -; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 -; GFX6-NEXT: s_lshr_b32 s5, s5, 14 -; GFX6-NEXT: s_lshl_b32 s2, s2, 1 -; GFX6-NEXT: s_xor_b32 s4, s4, -1 -; GFX6-NEXT: s_or_b32 s1, s1, s5 -; GFX6-NEXT: s_lshr_b32 s5, s4, 16 ; GFX6-NEXT: s_and_b32 s6, s4, 15 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4 -; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, s6 -; GFX6-NEXT: s_lshr_b32 s2, s2, s4 -; GFX6-NEXT: s_lshl_b32 s3, s3, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s2, s2, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s0, s4 +; GFX6-NEXT: s_lshr_b32 s2, s2, s6 ; GFX6-NEXT: s_or_b32 s0, s0, s2 ; GFX6-NEXT: s_and_b32 s2, s5, 15 ; GFX6-NEXT: s_andn2_b32 s4, 15, s5 -; GFX6-NEXT: s_lshl_b32 s1, s1, s2 -; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s2, s2, s4 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff +; GFX6-NEXT: s_lshl_b32 s1, s1, s4 +; GFX6-NEXT: s_lshr_b32 s2, s3, s2 ; GFX6-NEXT: s_or_b32 s1, s1, s2 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 @@ -3439,33 +3426,22 @@ define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, < ; ; GFX8-LABEL: s_fshr_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s5, 0xffff, s1 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16 -; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_lshr_b32 s5, s5, 15 -; GFX8-NEXT: s_lshl_b32 s1, s1, 1 -; GFX8-NEXT: s_or_b32 s0, s0, s5 -; GFX8-NEXT: s_lshl_b32 s3, s3, 1 -; GFX8-NEXT: s_lshr_b32 s5, s4, 15 -; GFX8-NEXT: s_xor_b32 s2, s2, -1 -; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 -; GFX8-NEXT: s_or_b32 s3, s3, s5 ; GFX8-NEXT: s_lshr_b32 s5, s2, 16 ; GFX8-NEXT: s_and_b32 s6, s2, 15 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2 -; GFX8-NEXT: s_lshr_b32 s1, s1, 1 -; GFX8-NEXT: s_lshl_b32 s0, s0, s6 -; GFX8-NEXT: s_lshr_b32 s1, s1, s2 -; GFX8-NEXT: s_lshl_b32 s4, s4, 1 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX8-NEXT: s_lshl_b32 s0, s0, s2 +; GFX8-NEXT: s_lshr_b32 s1, s1, s6 ; GFX8-NEXT: s_or_b32 s0, s0, s1 ; GFX8-NEXT: s_and_b32 s1, s5, 15 -; GFX8-NEXT: s_lshl_b32 s1, s3, s1 -; GFX8-NEXT: s_and_b32 s3, 0xffff, s4 ; GFX8-NEXT: s_andn2_b32 s2, 15, s5 -; GFX8-NEXT: s_lshr_b32 s3, s3, 1 -; GFX8-NEXT: s_lshr_b32 s2, s3, s2 -; GFX8-NEXT: s_or_b32 s1, s1, s2 +; GFX8-NEXT: s_lshl_b32 s3, s3, 1 +; GFX8-NEXT: s_lshl_b32 s2, s3, s2 +; GFX8-NEXT: s_lshr_b32 s1, s4, s1 +; GFX8-NEXT: s_or_b32 s1, s2, s1 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16 @@ -3547,65 +3523,43 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { ; GFX6-LABEL: v_fshr_v2i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 -; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5 -; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v4 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v6, v2 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 -; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 -; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshr_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_mov_b32_e32 v4, 1 -; GFX8-NEXT: v_mov_b32_e32 v5, 15 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v2 -; GFX8-NEXT: v_xor_b32_e32 v7, -1, v2 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 -; GFX8-NEXT: v_lshlrev_b16_e32 v3, v4, v3 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 -; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 +; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v4, v4, v5 +; GFX8-NEXT: v_lshrrev_b16_e32 v3, v3, v1 +; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX8-NEXT: v_mov_b32_e32 v4, 15 ; GFX8-NEXT: v_mov_b32_e32 v5, -1 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v5, 1 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3657,13 +3611,11 @@ define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX6-LABEL: v_fshr_v2i16_4_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v2 +; GFX6-NEXT: v_bfe_u32 v2, v2, 4, 12 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2 +; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -3716,35 +3668,22 @@ define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { ; GFX6-LABEL: v_fshr_v2i16_ssv: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_lshr_b32 s4, s4, 14 -; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX6-NEXT: s_or_b32 s0, s0, s4 -; GFX6-NEXT: s_lshl_b32 s2, s2, 1 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 -; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001 -; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001 -; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 -; GFX6-NEXT: s_lshr_b32 s4, s4, 14 -; GFX6-NEXT: s_lshl_b32 s3, s3, 1 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 +; GFX6-NEXT: s_and_b32 s0, s2, 0xffff +; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX6-NEXT: s_or_b32 s1, s1, s4 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 -; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001 -; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 -; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: s_lshl_b32 s0, s1, 1 +; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 +; GFX6-NEXT: s_and_b32 s0, s3, 0xffff +; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -3753,36 +3692,24 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg % ; ; GFX8-LABEL: v_fshr_v2i16_ssv: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s4, 0xffff, s1 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_lshr_b32 s4, s4, 15 -; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_or_b32 s0, s0, s4 -; GFX8-NEXT: s_lshl_b32 s1, s1, 1 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 -; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 -; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX8-NEXT: s_lshr_b32 s0, s0, 1 -; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0 -; GFX8-NEXT: s_lshr_b32 s4, s3, 15 -; GFX8-NEXT: s_lshl_b32 s3, s3, 1 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 +; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, 15 ; GFX8-NEXT: v_mov_b32_e32 v3, -1 -; GFX8-NEXT: s_lshl_b32 s2, s2, 1 ; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: s_and_b32 s0, 0xffff, s3 -; GFX8-NEXT: s_or_b32 s2, s2, s4 +; GFX8-NEXT: s_lshr_b32 s3, s1, 16 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX8-NEXT: s_lshr_b32 s0, s0, 1 -; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 -; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_lshl_b32 s0, s2, 1 +; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s3 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -3838,33 +3765,20 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg % define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { ; GFX6-LABEL: v_fshr_v2i16_svs: ; GFX6: ; %bb.0: -; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15 -; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_and_b32 s2, s2, 0xffff +; GFX6-NEXT: s_and_b32 s4, s2, 15 +; GFX6-NEXT: s_andn2_b32 s2, 15, s2 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2 -; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15 -; GFX6-NEXT: s_or_b32 s2, s3, s2 -; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX6-NEXT: s_lshl_b32 s0, s1, 1 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3 -; GFX6-NEXT: v_or_b32_e32 v3, s0, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: s_xor_b32 s0, s2, -1 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX6-NEXT: s_lshr_b32 s1, s0, 16 -; GFX6-NEXT: s_and_b32 s2, s0, 15 -; GFX6-NEXT: s_andn2_b32 s0, 15, s0 -; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 -; GFX6-NEXT: s_and_b32 s0, s1, 15 -; GFX6-NEXT: s_andn2_b32 s1, 15, s1 -; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 -; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1 -; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX6-NEXT: s_lshl_b32 s0, s0, s2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s4, v0 +; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 +; GFX6-NEXT: s_and_b32 s0, s3, 15 +; GFX6-NEXT: s_andn2_b32 s2, 15, s3 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX6-NEXT: s_lshl_b32 s1, s1, s2 +; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 +; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 @@ -3874,31 +3788,21 @@ define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, < ; GFX8-LABEL: v_fshr_v2i16_svs: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 +; GFX8-NEXT: s_lshr_b32 s3, s1, 16 +; GFX8-NEXT: s_and_b32 s4, s1, 15 +; GFX8-NEXT: s_andn2_b32 s1, 15, s1 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 -; GFX8-NEXT: v_mov_b32_e32 v2, 15 +; GFX8-NEXT: s_lshl_b32 s0, s0, s1 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, s4, v0 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 -; GFX8-NEXT: s_lshl_b32 s0, s2, 1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 -; GFX8-NEXT: v_mov_b32_e32 v4, 1 -; GFX8-NEXT: s_xor_b32 s0, s1, -1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: s_and_b32 s2, s0, 15 -; GFX8-NEXT: s_andn2_b32 s0, 15, s0 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3 -; GFX8-NEXT: s_and_b32 s0, s1, 15 -; GFX8-NEXT: s_andn2_b32 s1, 15, s1 -; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2 -; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1 +; GFX8-NEXT: s_and_b32 s0, s3, 15 +; GFX8-NEXT: s_andn2_b32 s1, 15, s3 +; GFX8-NEXT: s_lshl_b32 s2, s2, 1 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshl_b32 s1, s2, s1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, s1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: ; return to shader part epilog @@ -3963,32 +3867,19 @@ define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, < define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { ; GFX6-LABEL: v_fshr_v2i16_vss: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshl_b32 s3, s3, 16 -; GFX6-NEXT: s_and_b32 s2, s2, 0xffff -; GFX6-NEXT: s_or_b32 s2, s3, s2 -; GFX6-NEXT: s_bfe_u32 s3, s0, 0xf0001 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: s_lshr_b32 s3, s3, 14 -; GFX6-NEXT: v_or_b32_e32 v0, s3, v0 -; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX6-NEXT: s_lshr_b32 s3, s3, 14 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_xor_b32 s2, s2, -1 -; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX6-NEXT: s_lshr_b32 s3, s2, 16 ; GFX6-NEXT: s_and_b32 s4, s2, 15 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2 -; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 -; GFX6-NEXT: s_lshr_b32 s0, s0, s2 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: s_and_b32 s0, s0, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0 +; GFX6-NEXT: s_lshr_b32 s0, s0, s4 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 ; GFX6-NEXT: s_and_b32 s0, s3, 15 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 -; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s0, s0, s2 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: s_and_b32 s1, s1, 0xffff +; GFX6-NEXT: v_lshlrev_b32_e32 v1, s2, v1 +; GFX6-NEXT: s_lshr_b32 s0, s1, s0 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -3998,32 +3889,21 @@ define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, < ; ; GFX8-LABEL: v_fshr_v2i16_vss: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s3, 0xffff, s0 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 -; GFX8-NEXT: s_lshr_b32 s3, s3, 15 -; GFX8-NEXT: v_mov_b32_e32 v2, 1 -; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: s_lshr_b32 s3, s2, 15 -; GFX8-NEXT: s_xor_b32 s1, s1, -1 -; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 -; GFX8-NEXT: v_or_b32_e32 v0, s3, v0 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 ; GFX8-NEXT: s_and_b32 s4, s1, 15 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1 -; GFX8-NEXT: s_lshr_b32 s0, s0, 1 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v1 -; GFX8-NEXT: s_lshr_b32 s0, s0, s1 -; GFX8-NEXT: s_lshl_b32 s2, s2, 1 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 +; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, s1, v1 +; GFX8-NEXT: s_lshr_b32 s0, s0, s4 +; GFX8-NEXT: v_mov_b32_e32 v2, 1 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 ; GFX8-NEXT: s_and_b32 s0, s3, 15 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 -; GFX8-NEXT: s_and_b32 s0, 0xffff, s2 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3 -; GFX8-NEXT: s_lshr_b32 s0, s0, 1 -; GFX8-NEXT: s_lshr_b32 s0, s0, s1 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 +; GFX8-NEXT: s_lshr_b32 s0, s2, s0 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -4084,46 +3964,26 @@ define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, < define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { ; GFX6-LABEL: s_fshr_v3i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_and_b32 s7, s7, 0xffff -; GFX6-NEXT: s_and_b32 s6, s6, 0xffff -; GFX6-NEXT: s_lshl_b32 s7, s7, 16 -; GFX6-NEXT: s_or_b32 s6, s6, s7 -; GFX6-NEXT: s_and_b32 s7, s8, 0xffff -; GFX6-NEXT: s_bfe_u32 s8, s3, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_lshr_b32 s8, s8, 14 -; GFX6-NEXT: s_or_b32 s0, s0, s8 -; GFX6-NEXT: s_bfe_u32 s8, s4, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 -; GFX6-NEXT: s_lshr_b32 s8, s8, 14 -; GFX6-NEXT: s_lshl_b32 s3, s3, 1 -; GFX6-NEXT: s_xor_b32 s6, s6, -1 -; GFX6-NEXT: s_or_b32 s1, s1, s8 -; GFX6-NEXT: s_lshr_b32 s8, s6, 16 ; GFX6-NEXT: s_and_b32 s9, s6, 15 ; GFX6-NEXT: s_andn2_b32 s6, 15, s6 -; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, s9 -; GFX6-NEXT: s_lshr_b32 s3, s3, s6 -; GFX6-NEXT: s_lshl_b32 s4, s4, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s3, s3, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s0, s6 +; GFX6-NEXT: s_lshr_b32 s3, s3, s9 ; GFX6-NEXT: s_or_b32 s0, s0, s3 -; GFX6-NEXT: s_and_b32 s3, s8, 15 -; GFX6-NEXT: s_andn2_b32 s6, 15, s8 -; GFX6-NEXT: s_lshl_b32 s1, s1, s3 -; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s3, s3, s6 -; GFX6-NEXT: s_or_b32 s1, s1, s3 -; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001 +; GFX6-NEXT: s_and_b32 s3, s7, 15 +; GFX6-NEXT: s_andn2_b32 s6, 15, s7 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_and_b32 s4, s4, 0xffff +; GFX6-NEXT: s_lshl_b32 s1, s1, s6 +; GFX6-NEXT: s_lshr_b32 s3, s4, s3 +; GFX6-NEXT: s_andn2_b32 s4, 15, s8 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1 -; GFX6-NEXT: s_lshr_b32 s3, s3, 14 -; GFX6-NEXT: s_or_b32 s2, s2, s3 -; GFX6-NEXT: s_lshl_b32 s3, s5, 1 -; GFX6-NEXT: s_xor_b32 s4, s7, -1 -; GFX6-NEXT: s_and_b32 s5, s4, 15 -; GFX6-NEXT: s_andn2_b32 s4, 15, s4 -; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s2, s2, s5 -; GFX6-NEXT: s_lshr_b32 s3, s3, s4 +; GFX6-NEXT: s_or_b32 s1, s1, s3 +; GFX6-NEXT: s_and_b32 s3, s8, 15 +; GFX6-NEXT: s_lshl_b32 s2, s2, s4 +; GFX6-NEXT: s_and_b32 s4, s5, 0xffff +; GFX6-NEXT: s_lshr_b32 s3, s4, s3 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_or_b32 s2, s2, s3 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 @@ -4134,43 +3994,26 @@ define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, < ; ; GFX8-LABEL: s_fshr_v3i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s8, 0xffff, s2 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16 -; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_lshr_b32 s8, s8, 15 -; GFX8-NEXT: s_lshl_b32 s2, s2, 1 -; GFX8-NEXT: s_or_b32 s0, s0, s8 -; GFX8-NEXT: s_lshl_b32 s6, s6, 1 -; GFX8-NEXT: s_lshr_b32 s8, s7, 15 -; GFX8-NEXT: s_xor_b32 s4, s4, -1 -; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX8-NEXT: s_or_b32 s6, s6, s8 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16 ; GFX8-NEXT: s_and_b32 s9, s4, 15 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4 -; GFX8-NEXT: s_lshr_b32 s2, s2, 1 -; GFX8-NEXT: s_lshl_b32 s0, s0, s9 -; GFX8-NEXT: s_lshr_b32 s2, s2, s4 -; GFX8-NEXT: s_lshl_b32 s7, s7, 1 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_lshr_b32 s2, s2, s9 ; GFX8-NEXT: s_or_b32 s0, s0, s2 ; GFX8-NEXT: s_and_b32 s2, s8, 15 -; GFX8-NEXT: s_lshl_b32 s2, s6, s2 -; GFX8-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8 -; GFX8-NEXT: s_lshr_b32 s6, s6, 1 -; GFX8-NEXT: s_lshr_b32 s4, s6, s4 -; GFX8-NEXT: s_or_b32 s2, s2, s4 -; GFX8-NEXT: s_and_b32 s4, 0xffff, s3 +; GFX8-NEXT: s_lshl_b32 s6, s6, 1 +; GFX8-NEXT: s_lshl_b32 s4, s6, s4 +; GFX8-NEXT: s_lshr_b32 s2, s7, s2 +; GFX8-NEXT: s_or_b32 s2, s4, s2 +; GFX8-NEXT: s_and_b32 s4, s5, 15 +; GFX8-NEXT: s_andn2_b32 s5, 15, s5 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 -; GFX8-NEXT: s_lshr_b32 s4, s4, 15 -; GFX8-NEXT: s_lshl_b32 s3, s3, 1 -; GFX8-NEXT: s_or_b32 s1, s1, s4 -; GFX8-NEXT: s_xor_b32 s4, s5, -1 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX8-NEXT: s_and_b32 s5, s4, 15 -; GFX8-NEXT: s_andn2_b32 s4, 15, s4 -; GFX8-NEXT: s_lshr_b32 s3, s3, 1 ; GFX8-NEXT: s_lshl_b32 s1, s1, s5 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 @@ -4325,92 +4168,58 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX6-LABEL: v_fshr_v3i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 -; GFX6-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GFX6-NEXT: v_bfe_u32 v8, v3, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX6-NEXT: v_bfe_u32 v8, v4, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6 ; GFX6-NEXT: v_and_b32_e32 v9, 15, v6 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v9, v3 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX6-NEXT: v_and_b32_e32 v3, 15, v8 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8 +; GFX6-NEXT: v_and_b32_e32 v3, 15, v7 +; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1 -; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 -; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3 -; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v5 -; GFX6-NEXT: v_and_b32_e32 v5, 15, v4 -; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v4 +; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v6, v1 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX6-NEXT: v_and_b32_e32 v3, 15, v8 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 +; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v4 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshr_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX8-NEXT: v_mov_b32_e32 v7, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, 15 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v10, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v10, 15, v10 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, v7, v6 -; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v9 -; GFX8-NEXT: v_lshrrev_b16_e32 v7, v10, v7 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX8-NEXT: v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 +; GFX8-NEXT: v_lshlrev_b16_e32 v8, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_lshrrev_b16_e32 v6, v6, v2 +; GFX8-NEXT: v_or_b32_e32 v6, v7, v6 +; GFX8-NEXT: v_mov_b32_e32 v7, 15 ; GFX8-NEXT: v_mov_b32_e32 v8, -1 +; GFX8-NEXT: v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v8, 1 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, 15, v3 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v3 -; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v3 -; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 -; GFX8-NEXT: v_and_b32_e32 v3, 15, v3 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v3 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -4484,64 +4293,38 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { ; GFX6-LABEL: s_fshr_v4i16: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_lshl_b32 s9, s9, 16 -; GFX6-NEXT: s_and_b32 s8, s8, 0xffff -; GFX6-NEXT: s_or_b32 s8, s9, s8 -; GFX6-NEXT: s_lshl_b32 s9, s11, 16 -; GFX6-NEXT: s_and_b32 s10, s10, 0xffff -; GFX6-NEXT: s_or_b32 s9, s9, s10 -; GFX6-NEXT: s_bfe_u32 s10, s4, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, 1 -; GFX6-NEXT: s_lshr_b32 s10, s10, 14 -; GFX6-NEXT: s_or_b32 s0, s0, s10 -; GFX6-NEXT: s_bfe_u32 s10, s5, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s1, s1, 1 -; GFX6-NEXT: s_lshr_b32 s10, s10, 14 -; GFX6-NEXT: s_lshl_b32 s4, s4, 1 -; GFX6-NEXT: s_xor_b32 s8, s8, -1 -; GFX6-NEXT: s_or_b32 s1, s1, s10 -; GFX6-NEXT: s_lshr_b32 s10, s8, 16 -; GFX6-NEXT: s_and_b32 s11, s8, 15 +; GFX6-NEXT: s_and_b32 s12, s8, 15 ; GFX6-NEXT: s_andn2_b32 s8, 15, s8 -; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s0, s0, s11 -; GFX6-NEXT: s_lshr_b32 s4, s4, s8 -; GFX6-NEXT: s_lshl_b32 s5, s5, 1 +; GFX6-NEXT: s_lshl_b32 s0, s0, 1 +; GFX6-NEXT: s_and_b32 s4, s4, 0xffff +; GFX6-NEXT: s_lshl_b32 s0, s0, s8 +; GFX6-NEXT: s_lshr_b32 s4, s4, s12 ; GFX6-NEXT: s_or_b32 s0, s0, s4 -; GFX6-NEXT: s_and_b32 s4, s10, 15 -; GFX6-NEXT: s_andn2_b32 s8, 15, s10 -; GFX6-NEXT: s_lshl_b32 s1, s1, s4 -; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s4, s4, s8 +; GFX6-NEXT: s_and_b32 s4, s9, 15 +; GFX6-NEXT: s_andn2_b32 s8, 15, s9 +; GFX6-NEXT: s_lshl_b32 s1, s1, 1 +; GFX6-NEXT: s_and_b32 s5, s5, 0xffff +; GFX6-NEXT: s_lshl_b32 s1, s1, s8 +; GFX6-NEXT: s_lshr_b32 s4, s5, s4 ; GFX6-NEXT: s_or_b32 s1, s1, s4 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16 +; GFX6-NEXT: s_andn2_b32 s4, 15, s10 +; GFX6-NEXT: s_lshl_b32 s2, s2, 1 ; GFX6-NEXT: s_or_b32 s0, s0, s1 -; GFX6-NEXT: s_lshl_b32 s1, s2, 1 -; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s2, s2, 14 -; GFX6-NEXT: s_or_b32 s1, s1, s2 -; GFX6-NEXT: s_lshl_b32 s2, s3, 1 -; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s3, s3, 14 -; GFX6-NEXT: s_or_b32 s2, s2, s3 -; GFX6-NEXT: s_lshl_b32 s3, s6, 1 -; GFX6-NEXT: s_xor_b32 s5, s9, -1 -; GFX6-NEXT: s_lshl_b32 s4, s7, 1 -; GFX6-NEXT: s_lshr_b32 s6, s5, 16 -; GFX6-NEXT: s_and_b32 s7, s5, 15 -; GFX6-NEXT: s_andn2_b32 s5, 15, s5 -; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 -; GFX6-NEXT: s_lshl_b32 s1, s1, s7 -; GFX6-NEXT: s_lshr_b32 s3, s3, s5 -; GFX6-NEXT: s_or_b32 s1, s1, s3 -; GFX6-NEXT: s_and_b32 s3, s6, 15 -; GFX6-NEXT: s_andn2_b32 s5, 15, s6 -; GFX6-NEXT: s_lshl_b32 s2, s2, s3 -; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 -; GFX6-NEXT: s_lshr_b32 s3, s3, s5 -; GFX6-NEXT: s_or_b32 s2, s2, s3 +; GFX6-NEXT: s_and_b32 s1, s10, 15 +; GFX6-NEXT: s_lshl_b32 s2, s2, s4 +; GFX6-NEXT: s_and_b32 s4, s6, 0xffff +; GFX6-NEXT: s_lshr_b32 s1, s4, s1 +; GFX6-NEXT: s_andn2_b32 s4, 15, s11 +; GFX6-NEXT: s_lshl_b32 s3, s3, 1 +; GFX6-NEXT: s_or_b32 s1, s2, s1 +; GFX6-NEXT: s_and_b32 s2, s11, 15 +; GFX6-NEXT: s_lshl_b32 s3, s3, s4 +; GFX6-NEXT: s_and_b32 s4, s7, 0xffff +; GFX6-NEXT: s_lshr_b32 s2, s4, s2 +; GFX6-NEXT: s_or_b32 s2, s3, s2 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16 @@ -4550,63 +4333,41 @@ define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg % ; ; GFX8-LABEL: s_fshr_v4i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_and_b32 s8, 0xffff, s2 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16 -; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_lshr_b32 s8, s8, 15 -; GFX8-NEXT: s_lshl_b32 s2, s2, 1 -; GFX8-NEXT: s_or_b32 s0, s0, s8 -; GFX8-NEXT: s_lshl_b32 s6, s6, 1 -; GFX8-NEXT: s_lshr_b32 s8, s7, 15 -; GFX8-NEXT: s_xor_b32 s4, s4, -1 -; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX8-NEXT: s_or_b32 s6, s6, s8 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16 ; GFX8-NEXT: s_and_b32 s9, s4, 15 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4 -; GFX8-NEXT: s_lshr_b32 s2, s2, 1 -; GFX8-NEXT: s_lshl_b32 s0, s0, s9 -; GFX8-NEXT: s_lshr_b32 s2, s2, s4 -; GFX8-NEXT: s_lshl_b32 s7, s7, 1 +; GFX8-NEXT: s_lshl_b32 s0, s0, 1 +; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 +; GFX8-NEXT: s_lshl_b32 s0, s0, s4 +; GFX8-NEXT: s_lshr_b32 s2, s2, s9 ; GFX8-NEXT: s_or_b32 s0, s0, s2 ; GFX8-NEXT: s_and_b32 s2, s8, 15 -; GFX8-NEXT: s_lshl_b32 s2, s6, s2 -; GFX8-NEXT: s_and_b32 s6, 0xffff, s7 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8 -; GFX8-NEXT: s_lshr_b32 s6, s6, 1 -; GFX8-NEXT: s_lshr_b32 s4, s6, s4 -; GFX8-NEXT: s_or_b32 s2, s2, s4 +; GFX8-NEXT: s_lshl_b32 s6, s6, 1 +; GFX8-NEXT: s_lshl_b32 s4, s6, s4 +; GFX8-NEXT: s_lshr_b32 s2, s7, s2 +; GFX8-NEXT: s_or_b32 s2, s4, s2 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16 -; GFX8-NEXT: s_and_b32 s6, 0xffff, s3 ; GFX8-NEXT: s_or_b32 s0, s0, s2 ; GFX8-NEXT: s_lshr_b32 s2, s1, 16 ; GFX8-NEXT: s_lshr_b32 s4, s3, 16 -; GFX8-NEXT: s_lshl_b32 s1, s1, 1 -; GFX8-NEXT: s_lshr_b32 s6, s6, 15 -; GFX8-NEXT: s_lshl_b32 s3, s3, 1 -; GFX8-NEXT: s_or_b32 s1, s1, s6 -; GFX8-NEXT: s_lshl_b32 s2, s2, 1 -; GFX8-NEXT: s_lshr_b32 s6, s4, 15 -; GFX8-NEXT: s_xor_b32 s5, s5, -1 -; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX8-NEXT: s_or_b32 s2, s2, s6 ; GFX8-NEXT: s_lshr_b32 s6, s5, 16 ; GFX8-NEXT: s_and_b32 s7, s5, 15 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5 -; GFX8-NEXT: s_lshr_b32 s3, s3, 1 -; GFX8-NEXT: s_lshl_b32 s1, s1, s7 -; GFX8-NEXT: s_lshr_b32 s3, s3, s5 -; GFX8-NEXT: s_lshl_b32 s4, s4, 1 +; GFX8-NEXT: s_lshl_b32 s1, s1, 1 +; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 +; GFX8-NEXT: s_lshl_b32 s1, s1, s5 +; GFX8-NEXT: s_lshr_b32 s3, s3, s7 ; GFX8-NEXT: s_or_b32 s1, s1, s3 ; GFX8-NEXT: s_and_b32 s3, s6, 15 -; GFX8-NEXT: s_lshl_b32 s2, s2, s3 -; GFX8-NEXT: s_and_b32 s3, 0xffff, s4 ; GFX8-NEXT: s_andn2_b32 s5, 15, s6 -; GFX8-NEXT: s_lshr_b32 s3, s3, 1 -; GFX8-NEXT: s_lshr_b32 s3, s3, s5 +; GFX8-NEXT: s_lshl_b32 s2, s2, 1 +; GFX8-NEXT: s_lshl_b32 s2, s2, s5 +; GFX8-NEXT: s_lshr_b32 s3, s4, s3 ; GFX8-NEXT: s_or_b32 s2, s2, s3 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 @@ -4742,120 +4503,76 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX6-LABEL: v_fshr_v4i16: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 -; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 -; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10 -; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 -; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 -; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10 -; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v10 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 -; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 -; GFX6-NEXT: v_and_b32_e32 v11, 15, v8 +; GFX6-NEXT: v_and_b32_e32 v12, 15, v8 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 -; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v12, v4 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 -; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 +; GFX6-NEXT: v_and_b32_e32 v4, 15, v9 +; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 -; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 -; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 -; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v5 +; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10 +; GFX6-NEXT: v_lshlrev_b32_e32 v1, v8, v1 +; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 +; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v5 +; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11 +; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 -; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 -; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 -; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 -; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 -; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 -; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 -; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 -; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 -; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 -; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 15, v11 +; GFX6-NEXT: v_lshlrev_b32_e32 v3, v5, v3 +; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v7 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v5 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_fshr_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX8-NEXT: v_mov_b32_e32 v7, 1 -; GFX8-NEXT: v_mov_b32_e32 v8, 15 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 -; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 -; GFX8-NEXT: v_xor_b32_e32 v11, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 -; GFX8-NEXT: v_and_b32_e32 v11, 15, v11 -; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 -; GFX8-NEXT: v_lshrrev_b16_e32 v9, v11, v9 -; GFX8-NEXT: v_mov_b32_e32 v10, -1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v6, v6, v9 -; GFX8-NEXT: v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 +; GFX8-NEXT: v_lshlrev_b16_e32 v8, 1, v0 +; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_lshrrev_b16_e32 v6, v6, v2 +; GFX8-NEXT: v_or_b32_e32 v6, v7, v6 +; GFX8-NEXT: v_mov_b32_e32 v7, 15 +; GFX8-NEXT: v_mov_b32_e32 v9, -1 +; GFX8-NEXT: v_and_b32_sdwa v8, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v10, 1 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v9, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v3 -; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_xor_b32_e32 v7, -1, v5 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v6, 15, v5 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v4 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, v6, v2 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX8-NEXT: v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v1 +; GFX8-NEXT: v_lshlrev_b16_e32 v4, v4, v6 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v3 +; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_and_b32_sdwa v4, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3 +; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index 0a15cc3824ae7..5f610924a33cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -196,90 +196,49 @@ body: | ; SI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -287,68 +246,42 @@ body: | ; VI-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[AND]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[AND2]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR4]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 @@ -699,136 +632,75 @@ body: | ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY6]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL4]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[LSHR7]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY12]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[LSHR9]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY13]](s32) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[LSHR11]], [[ZEXT9]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL9]] - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C2]] - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C3]] - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR5]](s16) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[SHL8]], [[C5]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[ZEXT11]](s32) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR14]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC3]], [[TRUNC4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC5]], [[TRUNC6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC7]], [[TRUNC8]] ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL11]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL12]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] - ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL6]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -852,94 +724,59 @@ body: | ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C1]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY4]], [[BITCAST4]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C1]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR8]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C1]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[LSHR9]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR10]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C1]], [[C2]] - ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C1]], [[C3]] - ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND8]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[LSHR11]], [[AND9]](s16) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL7]], [[LSHR12]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C1]](s16) - ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL9]] - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY5]], [[BITCAST6]] - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C2]] - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C3]] - ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C2]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[OR5]], [[AND10]](s16) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[SHL8]], [[C1]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[LSHR13]], [[AND11]](s16) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL10]], [[LSHR14]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[AND]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[AND2]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR4]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C3]](s16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[SHL4]], [[AND5]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[AND4]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR5]] ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL11]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; VI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C4]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL12]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C4]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[LSHR15]], [[SHL13]] - ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST11]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST12]](<2 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL7]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR6]], [[SHL8]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 @@ -1026,168 +863,87 @@ body: | ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[LSHR2]], [[ZEXT1]](s32) - ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC]], [[TRUNC1]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[COPY3]](s32) - ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[LSHR4]], [[ZEXT3]](s32) - ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[LSHR1]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL4]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY8]](s32) - ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) - ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR8]], [[ZEXT5]](s32) - ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC6]], [[TRUNC7]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST4]], [[COPY4]](s32) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND6]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[SHL5]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY9]](s32) - ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[LSHR10]], [[ZEXT7]](s32) - ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[COPY5]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[SHL7]], [[ZEXT8]](s32) ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY10]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[LSHR14]], [[ZEXT11]](s32) - ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND14]](s16) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[LSHR13]], [[COPY11]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[LSHR16]], [[ZEXT13]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC12]], [[TRUNC13]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY16]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[LSHR20]], [[ZEXT15]](s32) - ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY17]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[LSHR22]], [[ZEXT17]](s32) - ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC18]], [[TRUNC19]] - ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[LSHR6]], [[ZEXT9]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL9]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; VI-LABEL: name: test_fshr_v4s16_v4s16 @@ -1199,125 +955,73 @@ body: | ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL]], [[LSHR3]] - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR5]] - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[AND]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL1]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C3]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[AND2]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL3]], [[LSHR4]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL5]], [[LSHR9]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) - ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL8]], [[LSHR15]] - ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) - ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL9]], [[LSHR17]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL13]], [[LSHR21]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) - ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) - ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL14]], [[LSHR23]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC6]], [[C3]](s16) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[SHL5]], [[AND5]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[AND4]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL6]], [[LSHR8]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[TRUNC7]], [[C3]](s16) + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[SHL7]], [[AND7]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[AND6]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = disjoint G_OR [[SHL8]], [[LSHR9]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; GFX9-LABEL: name: test_fshr_v4s16_v4s16