diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index c73dc3021eb42..040909949dc1d 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -287,6 +287,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Additional instructions available with z17. if (Subtarget.hasVectorEnhancements3()) { setOperationAction(ISD::ABS, MVT::i128, Legal); + + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, + MVT::i128, Legal); } } @@ -492,6 +495,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); + + setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VT, + Legal); } } diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td index 10de8b05cf45f..479bab5ce62b8 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td @@ -680,41 +680,41 @@ let Predicates = [FeatureVector] in { let isCommutable = 1 in { // Maximum. def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>; - def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; - def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; - def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; - def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; + def VMXB : BinaryVRRc<"vmxb", 0xE7FF, smax, v128b, v128b, 0>; + def VMXH : BinaryVRRc<"vmxh", 0xE7FF, smax, v128h, v128h, 1>; + def VMXF : BinaryVRRc<"vmxf", 0xE7FF, smax, v128f, v128f, 2>; + def VMXG : BinaryVRRc<"vmxg", 0xE7FF, smax, v128g, v128g, 3>; let Predicates = [FeatureVectorEnhancements3] in - def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>; + def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, smax, v128q, v128q, 4>; // Maximum logical. def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>; - def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; - def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; - def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; - def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; + def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, umax, v128b, v128b, 0>; + def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, umax, v128h, v128h, 1>; + def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, umax, v128f, v128f, 2>; + def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, umax, v128g, v128g, 3>; let Predicates = [FeatureVectorEnhancements3] in - def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>; + def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, umax, v128q, v128q, 4>; } let isCommutable = 1 in { // Minimum. def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>; - def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; - def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; - def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; - def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; + def VMNB : BinaryVRRc<"vmnb", 0xE7FE, smin, v128b, v128b, 0>; + def VMNH : BinaryVRRc<"vmnh", 0xE7FE, smin, v128h, v128h, 1>; + def VMNF : BinaryVRRc<"vmnf", 0xE7FE, smin, v128f, v128f, 2>; + def VMNG : BinaryVRRc<"vmng", 0xE7FE, smin, v128g, v128g, 3>; let Predicates = [FeatureVectorEnhancements3] in - def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>; + def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, smin, v128q, v128q, 4>; // Minimum logical. def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>; - def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; - def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; - def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; - def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; + def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, umin, v128b, v128b, 0>; + def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, umin, v128h, v128h, 1>; + def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, umin, v128f, v128f, 2>; + def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, umin, v128g, v128g, 3>; let Predicates = [FeatureVectorEnhancements3] in - def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>; + def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, umin, v128q, v128q, 4>; } let isCommutable = 1 in { @@ -1250,54 +1250,45 @@ defm : IntegerAbsoluteVectorOps; defm : IntegerAbsoluteVectorOps; defm : IntegerAbsoluteVectorOps; -// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the -// signed or unsigned "set if greater than" comparison instruction and -// MIN and MAX are the associated minimum and maximum instructions. -multiclass IntegerMinMaxVectorOps { - let Predicates = [FeatureVector] in { - def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), - (max VR128:$x, VR128:$y)>; - def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), - (min VR128:$x, VR128:$y)>; - def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), - VR128:$x, VR128:$y)), - (min VR128:$x, VR128:$y)>; - def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), - VR128:$y, VR128:$x)), - (max VR128:$x, VR128:$y)>; - } +// Instantiate packs/packu: recognize a saturating truncation and convert +// into the corresponding packs/packu instruction. +multiclass SignedSaturatingTruncate { + def : Pat< + (output (z_pack + (smin (smax (input VR128:$a), ssat_trunc_min_vec), ssat_trunc_max_vec), + (smin (smax (input VR128:$b), ssat_trunc_min_vec), ssat_trunc_max_vec) + )), + (packs VR128:$a, VR128:$b) + >; + + def : Pat< + (output (z_pack + (smax (smin (input VR128:$a), ssat_trunc_max_vec), ssat_trunc_min_vec), + (smax (smin (input VR128:$b), ssat_trunc_max_vec), ssat_trunc_min_vec) + )), + (packs VR128:$a, VR128:$b) + >; } -// Signed min/max. -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; - -let Predicates = [FeatureVectorEnhancements3] in { - def : Pat<(i128 (or (and VR128:$x, (z_vicmph VR128:$x, VR128:$y)), - (and VR128:$y, (not (z_vicmph VR128:$x, VR128:$y))))), - (VMXQ VR128:$x, VR128:$y)>; - def : Pat<(i128 (or (and VR128:$y, (z_vicmph VR128:$x, VR128:$y)), - (and VR128:$x, (not (z_vicmph VR128:$x, VR128:$y))))), - (VMNQ VR128:$x, VR128:$y)>; +defm : SignedSaturatingTruncate; +defm : SignedSaturatingTruncate; +defm : SignedSaturatingTruncate; + +multiclass UnsignedSaturatingTruncate { + def : Pat< + (output (z_pack + (umin (input VR128:$a), usat_trunc_max_vec), + (umin (input VR128:$b), usat_trunc_max_vec) + )), + (packu VR128:$a, VR128:$b) + >; } -// Unsigned min/max. -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; -defm : IntegerMinMaxVectorOps; - -let Predicates = [FeatureVectorEnhancements3] in { - def : Pat<(i128 (or (and VR128:$x, (z_vicmphl VR128:$x, VR128:$y)), - (and VR128:$y, (not (z_vicmphl VR128:$x, VR128:$y))))), - (VMXLQ VR128:$x, VR128:$y)>; - def : Pat<(i128 (or (and VR128:$y, (z_vicmphl VR128:$x, VR128:$y)), - (and VR128:$x, (not (z_vicmphl VR128:$x, VR128:$y))))), - (VMNLQ VR128:$x, VR128:$y)>; -} +defm : UnsignedSaturatingTruncate; +defm : UnsignedSaturatingTruncate; +defm : UnsignedSaturatingTruncate; // Instantiate comparison patterns to recognize VACC/VSCBI for TYPE. multiclass IntegerComputeCarryOrBorrow; def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>; +// Vector constants for saturating truncation, containing the minimum and +// maximum value for the integer type that is half of the element width. +def ssat_trunc_min_vec: PatFrag<(ops), (build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + unsigned SizeInBits = EltTy.getSizeInBits(); + APInt min = APInt::getSignedMinValue(SizeInBits / 2).sext(SizeInBits); + return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, min); +}]>; +def ssat_trunc_max_vec: PatFrag<(ops), (build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + unsigned SizeInBits = EltTy.getSizeInBits(); + APInt max = APInt::getSignedMaxValue(SizeInBits / 2).sext(SizeInBits); + return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max); +}]>; + +def usat_trunc_max_vec: PatFrag<(ops), (build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + unsigned SizeInBits = EltTy.getSizeInBits(); + APInt max = APInt::getMaxValue(SizeInBits / 2).zext(SizeInBits); + return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max); +}]>; + // Signed "integer greater than zero" on vectors. def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>; diff --git a/llvm/test/CodeGen/SystemZ/int-max-02.ll b/llvm/test/CodeGen/SystemZ/int-max-02.ll index 5f5188c66065d..00fd01a0ccd63 100644 --- a/llvm/test/CodeGen/SystemZ/int-max-02.ll +++ b/llvm/test/CodeGen/SystemZ/int-max-02.ll @@ -7,8 +7,8 @@ define i128 @f1(i128 %val1, i128 %val2) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r4), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vmxq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) { define i128 @f4(i128 %val1, i128 %val2) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r4), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vmxq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) { define i128 @f5(i128 %val1, i128 %val2) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r4), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vmxlq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) { define i128 @f8(i128 %val1, i128 %val2) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r4), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vmxlq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/int-min-02.ll b/llvm/test/CodeGen/SystemZ/int-min-02.ll index 3066af924fb8e..f13db7c4b8995 100644 --- a/llvm/test/CodeGen/SystemZ/int-min-02.ll +++ b/llvm/test/CodeGen/SystemZ/int-min-02.ll @@ -7,8 +7,8 @@ define i128 @f1(i128 %val1, i128 %val2) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vmnq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) { define i128 @f4(i128 %val1, i128 %val2) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vmnq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) { define i128 @f5(i128 %val1, i128 %val2) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vmnlq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) { define i128 @f8(i128 %val1, i128 %val2) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vmnlq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/saturating-truncation.ll b/llvm/test/CodeGen/SystemZ/saturating-truncation.ll new file mode 100644 index 0000000000000..0ea29202c1ef5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/saturating-truncation.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s + +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2 +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2 + +define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: i16_signed: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpksh %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> + %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128)) + %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127)) + %3 = trunc nsw <16 x i16> %2 to <16 x i8> + ret <16 x i8> %3 + ret <16 x i8> %3 +} + +define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: i32_signed: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpksf %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> + %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768)) + %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767)) + %3 = trunc nsw <8 x i32> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: i64_signed: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpksg %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + %1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648)) + %2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647)) + %3 = trunc nsw <4 x i64> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: i64_signed_flipped: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpksg %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + %1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0) + %2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1) + %3 = trunc nsw <4 x i64> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: i16_unsigned: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpklsh %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> + %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255)) + %2 = trunc nuw <16 x i16> %1 to <16 x i8> + ret <16 x i8> %2 +} + +define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: i32_unsigned: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpklsf %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> + %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535)) + %2 = trunc nsw <8 x i32> %1 to <8 x i16> + ret <8 x i16> %2 +} + +define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: i64_unsigned: +; CHECK: # %bb.0: # %bb2 +; CHECK-NEXT: vpklsg %v24, %v24, %v26 +; CHECK-NEXT: br %r14 +bb2: + %0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> + %1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295)) + %2 = trunc nuw <4 x i64> %1 to <4 x i32> + ret <4 x i32> %2 +}