-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[LoongArch] Optimize extractelement containing variable index for lsx #156792
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesFull diff: https://github.com/llvm/llvm-project/pull/156792.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..0fffa50db7dd0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -287,7 +287,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UNDEF, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Legal);
@@ -421,12 +421,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
if (Subtarget.hasExtLSX()) {
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BITCAST);
- }
-
- // Set DAG combine for 'LASX' feature.
-
- if (Subtarget.hasExtLASX())
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ }
// Compute derived properties from the register classes.
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2834,37 +2830,47 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
- assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
-
if (isa<ConstantSDNode>(Idx))
return Op;
switch (VecTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Unexpected type");
+ case MVT::v4f32:
+ case MVT::v2f64:
+ return Op;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
case MVT::v32i8:
case MVT::v16i16:
case MVT::v4i64:
case MVT::v4f64: {
- // Extract the high half subvector and place it to the low half of a new
- // vector. It doesn't matter what the high half of the new vector is.
- EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
- SDValue VecHi =
- DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
- SDValue TmpVec =
- DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
- VecHi, DAG.getConstant(0, DL, GRLenVT));
+ SDValue TmpVec;
+ if (VecTy.is256BitVector()) {
+ // Extract the high half subvector and place it to the low half of a new
+ // vector. It doesn't matter what the high half of the new vector is.
+ EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue VecHi = DAG.getExtractSubvector(DL, HalfTy, Vec,
+ HalfTy.getVectorNumElements());
+ TmpVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
+ VecHi, DAG.getConstant(0, DL, GRLenVT));
+ }
// Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
// of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
// desired element.
SDValue IdxCp =
DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
- SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
+ SDValue IdxVec =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL,
+ (VecTy.is128BitVector() ? MVT::v4f32 : MVT::v8f32), IdxCp);
SDValue MaskVec =
- DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
- SDValue ResVec =
- DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
+ DAG.getBitcast(VecTy.changeVectorElementTypeToInteger(), IdxVec);
+ SDValue ResVec = DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec,
+ (VecTy.is128BitVector() ? Vec : TmpVec), Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
DAG.getConstant(0, DL, GRLenVT));
@@ -6254,12 +6260,11 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
MVT EltVT = N->getSimpleValueType(0);
SDValue Vec = N->getOperand(0);
- EVT VecTy = Vec->getValueType(0);
SDValue Idx = N->getOperand(1);
unsigned IdxOp = Idx.getOpcode();
SDLoc DL(N);
- if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
+ if (isa<ConstantSDNode>(Idx))
return SDValue();
// Combine:
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index eb7120ffb41a6..deb86513859f5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2086,24 +2086,6 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
(f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
// Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
- i64:$rk),
- sub_32)),
- GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
- (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
- i64:$rk),
- sub_32)),
- GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
- sub_32)),
- GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
- sub_64)),
- GPR)>;
def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
(f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index c9c95f19c26f8..73fda3174a224 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -77,11 +77,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_16xi8_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 24
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 %idx
@@ -93,11 +91,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_8xi16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: srai.w $a0, $a0, 16
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.h $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 %idx
@@ -109,10 +105,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xi32_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.w $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 %idx
@@ -124,10 +119,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xi64_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
-; CHECK-NEXT: movfr2gr.d $a0, $fa0
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: movgr2fr.w $fa1, $a2
+; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0
+; CHECK-NEXT: vstelm.d $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%e = extractelement <2 x i64> %v, i32 %idx
@@ -139,8 +133,7 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_4xfloat_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0
+; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2
; CHECK-NEXT: fst.s $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
@@ -153,8 +146,7 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; CHECK-LABEL: extract_2xdouble_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0
+; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2
; CHECK-NEXT: fst.d $fa0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Very little regression. |
So do we need to handle the result differently depending on whether it goes into a GPR or a VR? |
No description provided.