diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 415164fc9e2cb..e10890087bf06 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4570,6 +4570,17 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { if (Subtarget->hasSPE()) return false; + // Optimise 'Not equal to zero-vector' comparisons using 'Greater than or + // less than' operators. Example: Consider k to be any non-zero positive + // value. + // for k != 0, change SETNE to SETUGT (k > 0) + // for 0 != k, change SETNE to SETULT (0 < k) + if (CC == ISD::SETNE) { + if (ISD::isBuildVectorAllZeros(RHS.getNode())) + CC = ISD::SETUGT; + else if (ISD::isBuildVectorAllZeros(LHS.getNode())) + CC = ISD::SETULT; + } EVT VecVT = LHS.getValueType(); bool Swap, Negate; unsigned int VCmpInst = diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll index d8e66d6500f5f..2e3704a7f9ae2 100644 --- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll +++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \ -; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64 +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ -; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 +; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 define i32 @test_Greater_than(ptr %colauths) { ; This testcase is for the special case of zero-vector comparisons. -; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor). -; This pattern is expected to be optimized in a future patch. +; Optimised version using vcmpgtuh. ; POWERPC_64LE-LABEL: test_Greater_than: ; POWERPC_64LE: # %bb.0: # %entry ; POWERPC_64LE-NEXT: lfd 0, 0(3) @@ -19,8 +18,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_64LE-NEXT: li 4, 0 ; POWERPC_64LE-NEXT: li 3, 4 ; POWERPC_64LE-NEXT: xxswapd 34, 0 -; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3 -; POWERPC_64LE-NEXT: xxlnor 34, 34, 34 +; POWERPC_64LE-NEXT: vcmpgtuh 2, 2, 3 ; POWERPC_64LE-NEXT: vmrglh 3, 2, 2 ; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2 ; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3 @@ -42,8 +40,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_64-NEXT: xxlxor 35, 35, 35 ; POWERPC_64-NEXT: li 4, 12 ; POWERPC_64-NEXT: li 3, 8 -; POWERPC_64-NEXT: vcmpequh 2, 2, 3 -; POWERPC_64-NEXT: xxlnor 34, 34, 34 +; POWERPC_64-NEXT: vcmpgtuh 2, 2, 3 ; POWERPC_64-NEXT: vmrghh 2, 2, 2 ; POWERPC_64-NEXT: vextuwlx 4, 4, 2 ; POWERPC_64-NEXT: vextuwlx 3, 3, 2 @@ -66,8 +63,7 @@ define i32 @test_Greater_than(ptr %colauths) { ; POWERPC_32-NEXT: xxlxor 35, 35, 35 ; POWERPC_32-NEXT: lxvwsx 0, 3, 4 ; POWERPC_32-NEXT: xxmrghw 34, 1, 0 -; POWERPC_32-NEXT: vcmpequh 2, 2, 3 -; POWERPC_32-NEXT: xxlnor 34, 34, 34 +; POWERPC_32-NEXT: vcmpgtuh 2, 2, 3 ; POWERPC_32-NEXT: vmrghh 2, 2, 2 ; POWERPC_32-NEXT: stxv 34, -32(1) ; POWERPC_32-NEXT: lwz 3, -20(1) diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll index 87208691eb047..b8a1d46afc3b2 100644 --- a/llvm/test/CodeGen/PowerPC/pr61315.ll +++ b/llvm/test/CodeGen/PowerPC/pr61315.ll @@ -1,23 +1,7 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI0_0: -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha @@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <16 x i8> %0, i64 7 %a5 = zext i8 %a4 to i16 @@ -44,23 +27,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) } define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI1_0: -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect2: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha @@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0 ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <16 x i8> %0, i64 7 %a5 = zext i8 %a4 to i32 @@ -87,23 +52,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0 } define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 { -; CHECK: .LCPI2_0: -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 22 # 0x16 -; CHECK-NEXT: .byte 23 # 0x17 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect3: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha @@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0 ; CHECK-NEXT: xxperm v2, v3, vs0 ; CHECK-NEXT: lxv vs0, 0(r3) ; CHECK-NEXT: xxland v2, v2, vs0 -; CHECK-NEXT: vcmpequb v2, v2, v3 -; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vcmpgtub v2, v2, v3 ; CHECK-NEXT: blr %a4 = extractelement <8 x i16> %0, i64 3 %a5 = zext i16 %a4 to i32 diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index 2598a410b8761..c3ee1c553eebe 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -1031,7 +1031,7 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 { ; CHECK-P7-NEXT: vmaddfp 5, 5, 0, 3 ; CHECK-P7-NEXT: vmaddfp 3, 5, 4, 3 ; CHECK-P7-NEXT: vxor 4, 4, 4 -; CHECK-P7-NEXT: vcmpeqfp 2, 2, 4 +; CHECK-P7-NEXT: vcmpgefp 2, 4, 2 ; CHECK-P7-NEXT: vnot 2, 2 ; CHECK-P7-NEXT: vand 2, 2, 3 ; CHECK-P7-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll index 1c3ac17666e26..90f963de428e9 100644 --- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -366,8 +366,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) { ; CHECK: # %bb.0: ; CHECK-NEXT: xxlor 34, 34, 35 ; CHECK-NEXT: xxlxor 35, 35, 35 -; CHECK-NEXT: vcmpequw 2, 2, 3 -; CHECK-NEXT: xxlnor 34, 34, 34 +; CHECK-NEXT: vcmpgtuw 2, 2, 3 ; CHECK-NEXT: blr %a = icmp ne <4 x i32> %P, zeroinitializer %b = icmp ne <4 x i32> %Q, zeroinitializer diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll index 43cbc62e0bb1c..986f255df4bd6 100644 --- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll @@ -13,8 +13,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR5-NEXT: vaddubm 3, 2, 3 ; PWR5-NEXT: vand 2, 2, 3 ; PWR5-NEXT: vxor 3, 3, 3 -; PWR5-NEXT: vcmpequb 2, 2, 3 -; PWR5-NEXT: vnot 2, 2 +; PWR5-NEXT: vcmpgtub 2, 2, 3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v16i8: @@ -23,8 +22,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR6-NEXT: vaddubm 3, 2, 3 ; PWR6-NEXT: vand 2, 2, 3 ; PWR6-NEXT: vxor 3, 3, 3 -; PWR6-NEXT: vcmpequb 2, 2, 3 -; PWR6-NEXT: vnot 2, 2 +; PWR6-NEXT: vcmpgtub 2, 2, 3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v16i8: @@ -33,8 +31,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; PWR7-NEXT: vaddubm 3, 2, 3 ; PWR7-NEXT: xxland 34, 34, 35 ; PWR7-NEXT: xxlxor 35, 35, 35 -; PWR7-NEXT: vcmpequb 2, 2, 3 -; PWR7-NEXT: xxlnor 34, 34, 34 +; PWR7-NEXT: vcmpgtub 2, 2, 3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v16i8: @@ -1081,8 +1078,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR5-NEXT: vadduhm 3, 2, 3 ; PWR5-NEXT: vand 2, 2, 3 ; PWR5-NEXT: vxor 3, 3, 3 -; PWR5-NEXT: vcmpequh 2, 2, 3 -; PWR5-NEXT: vnot 2, 2 +; PWR5-NEXT: vcmpgtuh 2, 2, 3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v8i16: @@ -1091,8 +1087,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR6-NEXT: vadduhm 3, 2, 3 ; PWR6-NEXT: vand 2, 2, 3 ; PWR6-NEXT: vxor 3, 3, 3 -; PWR6-NEXT: vcmpequh 2, 2, 3 -; PWR6-NEXT: vnot 2, 2 +; PWR6-NEXT: vcmpgtuh 2, 2, 3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v8i16: @@ -1101,8 +1096,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) { ; PWR7-NEXT: vadduhm 3, 2, 3 ; PWR7-NEXT: xxland 34, 34, 35 ; PWR7-NEXT: xxlxor 35, 35, 35 -; PWR7-NEXT: vcmpequh 2, 2, 3 -; PWR7-NEXT: xxlnor 34, 34, 34 +; PWR7-NEXT: vcmpgtuh 2, 2, 3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v8i16: @@ -4101,8 +4095,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR5-NEXT: vadduwm 3, 2, 3 ; PWR5-NEXT: vand 2, 2, 3 ; PWR5-NEXT: vxor 3, 3, 3 -; PWR5-NEXT: vcmpequw 2, 2, 3 -; PWR5-NEXT: vnot 2, 2 +; PWR5-NEXT: vcmpgtuw 2, 2, 3 ; PWR5-NEXT: blr ; ; PWR6-LABEL: ugt_1_v4i32: @@ -4111,8 +4104,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR6-NEXT: vadduwm 3, 2, 3 ; PWR6-NEXT: vand 2, 2, 3 ; PWR6-NEXT: vxor 3, 3, 3 -; PWR6-NEXT: vcmpequw 2, 2, 3 -; PWR6-NEXT: vnot 2, 2 +; PWR6-NEXT: vcmpgtuw 2, 2, 3 ; PWR6-NEXT: blr ; ; PWR7-LABEL: ugt_1_v4i32: @@ -4121,8 +4113,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) { ; PWR7-NEXT: vadduwm 3, 2, 3 ; PWR7-NEXT: xxland 34, 34, 35 ; PWR7-NEXT: xxlxor 35, 35, 35 -; PWR7-NEXT: vcmpequw 2, 2, 3 -; PWR7-NEXT: xxlnor 34, 34, 34 +; PWR7-NEXT: vcmpgtuw 2, 2, 3 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v4i32: @@ -11967,9 +11958,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { ; PWR7-NEXT: addis 3, 2, .LCPI100_0@toc@ha ; PWR7-NEXT: addi 3, 3, .LCPI100_0@toc@l ; PWR7-NEXT: xxland 34, 34, 0 -; PWR7-NEXT: vcmpequw 2, 2, 3 +; PWR7-NEXT: vcmpgtuw 2, 2, 3 ; PWR7-NEXT: lxvw4x 35, 0, 3 -; PWR7-NEXT: xxlnor 34, 34, 34 ; PWR7-NEXT: vperm 3, 2, 2, 3 ; PWR7-NEXT: xxlor 34, 35, 34 ; PWR7-NEXT: blr