-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoongArch] Custom lower vecreduce. #155196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
tangaac
wants to merge
1
commit into
llvm:main
Choose a base branch
from
tangaac:vecreduce
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-loongarch Author: None (tangaac) ChangesPatch is 80.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155196.diff 17 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 18d3e66bc0763..a2b925e3fee6d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -341,6 +341,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
}
@@ -526,6 +533,14 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerBF16_TO_FP(Op, DAG);
case ISD::VECREDUCE_ADD:
return lowerVECREDUCE_ADD(Op, DAG);
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ return lowerVECREDUCE(Op, DAG);
}
return SDValue();
}
@@ -580,6 +595,34 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
+SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ MVT OpVT = Op.getSimpleValueType();
+ SDValue Val = Op.getOperand(0);
+
+ unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
+ unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
+
+ // Ensure operand type legal or enable it legal.
+ while (!isTypeLegal(Val.getSimpleValueType())) {
+ Val = DAG.WidenVector(Val, DL);
+ }
+
+ unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
+ MVT VecTy = Val.getSimpleValueType();
+
+ for (int i = NumEles; i > 1; i /= 2) {
+ SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
+ SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
+ Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
+ }
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
+ DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+}
+
SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
SelectionDAG &DAG) const {
unsigned IsData = Op.getConstantOperandVal(4);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 40e237b1c69e4..ff22afa86ea9c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -390,6 +390,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBF16_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index efe898c33072e..f548a8dd0532b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -100,6 +100,13 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
default:
return true;
case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_xor:
return false;
}
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
index a3160f10c8ca8..fd64beab57bf0 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
@@ -5,22 +5,17 @@ define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %v)
@@ -32,19 +27,15 @@ define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %v)
@@ -56,16 +47,13 @@ define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v)
@@ -77,15 +65,11 @@ define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
index bc910c23e4b17..cdb08d9de3821 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
@@ -5,22 +5,17 @@ define void @vec_reduce_or_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %v)
@@ -32,19 +27,15 @@ define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %v)
@@ -56,16 +47,13 @@ define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v)
@@ -77,15 +65,11 @@ define void @vec_reduce_or_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
index 378088c9f8280..1d182731c93be 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
@@ -5,22 +5,17 @@ define void @vec_reduce_smax_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
-; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
-; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
-; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
-; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
+; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %v)
@@ -32,19 +27,15 @@ define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 228
-; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.h $xr1, $xr1, 14
-; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.h $xr1, $xr1, 1
-; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %v)
@@ -56,16 +47,13 @@ define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 228
-; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.w $xr1, $xr1, 14
-; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.w $xr1, $xr1, 1
-; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v)
@@ -77,15 +65,11 @@ define void @vec_reduce_smax_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr2
-; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.d $xr1, $xr1, 1
-; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vmax.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
index 1c7f2054cd4e1..369afdd1fc7bc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
@@ -5,22 +5,17 @@ define void @vec_reduce_smin_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 78
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 228
-; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvsrli.d $xr1, $xr1, 32
-; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvshuf4i.b $xr1, $xr1, 14
-; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 68
-; CHECK-NEXT: xvrepl128vei.b $xr1, $xr1, 1
-; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1
-; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
+; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
+; CHECK-NEXT: vmin.b $vr...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.