diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 634914d3b3fd0..0fffa50db7dd0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -287,7 +287,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UNDEF, VT, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::SETCC, VT, Legal); @@ -421,12 +421,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasExtLSX()) { setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BITCAST); - } - - // Set DAG combine for 'LASX' feature. - - if (Subtarget.hasExtLASX()) setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -2834,37 +2830,47 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDLoc DL(Op); MVT GRLenVT = Subtarget.getGRLenVT(); - assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type"); - if (isa(Idx)) return Op; switch (VecTy.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected type"); + case MVT::v4f32: + case MVT::v2f64: + return Op; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: case MVT::v32i8: case MVT::v16i16: case MVT::v4i64: case MVT::v4f64: { - // Extract the high half subvector and place it to the low half of a new - // vector. It doesn't matter what the high half of the new vector is. - EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext()); - SDValue VecHi = - DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements()); - SDValue TmpVec = - DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy), - VecHi, DAG.getConstant(0, DL, GRLenVT)); + SDValue TmpVec; + if (VecTy.is256BitVector()) { + // Extract the high half subvector and place it to the low half of a new + // vector. It doesn't matter what the high half of the new vector is. + EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext()); + SDValue VecHi = DAG.getExtractSubvector(DL, HalfTy, Vec, + HalfTy.getVectorNumElements()); + TmpVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy), + VecHi, DAG.getConstant(0, DL, GRLenVT)); + } // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the // desired element. SDValue IdxCp = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx); - SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp); + SDValue IdxVec = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, + (VecTy.is128BitVector() ? MVT::v4f32 : MVT::v8f32), IdxCp); SDValue MaskVec = - DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec); - SDValue ResVec = - DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec); + DAG.getBitcast(VecTy.changeVectorElementTypeToInteger(), IdxVec); + SDValue ResVec = DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, + (VecTy.is128BitVector() ? Vec : TmpVec), Vec); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec, DAG.getConstant(0, DL, GRLenVT)); @@ -6254,12 +6260,11 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, MVT EltVT = N->getSimpleValueType(0); SDValue Vec = N->getOperand(0); - EVT VecTy = Vec->getValueType(0); SDValue Idx = N->getOperand(1); unsigned IdxOp = Idx.getOpcode(); SDLoc DL(N); - if (!VecTy.is256BitVector() || isa(Idx)) + if (isa(Idx)) return SDValue(); // Combine: diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index eb7120ffb41a6..deb86513859f5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2086,24 +2086,6 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; // Vector extraction with variable index. -def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, - i64:$rk), - sub_32)), - GPR), (i64 24))>; -def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), - (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, - i64:$rk), - sub_32)), - GPR), (i64 16))>; -def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), - (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), - sub_32)), - GPR)>; -def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), - (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), - sub_64)), - GPR)>; def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll index c9c95f19c26f8..73fda3174a224 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -77,11 +77,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_16xi8_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: srai.w $a0, $a0, 24 -; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: movgr2fr.w $fa1, $a2 +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1 +; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <16 x i8>, ptr %src %e = extractelement <16 x i8> %v, i32 %idx @@ -93,11 +91,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_8xi16_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: srai.w $a0, $a0, 16 -; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: movgr2fr.w $fa1, $a2 +; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0 +; CHECK-NEXT: vstelm.h $vr1, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <8 x i16>, ptr %src %e = extractelement <8 x i16> %v, i32 %idx @@ -109,10 +105,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_4xi32_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.s $a0, $fa0 -; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: movgr2fr.w $fa1, $a2 +; CHECK-NEXT: vshuf.w $vr1, $vr0, $vr0 +; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <4 x i32>, ptr %src %e = extractelement <4 x i32> %v, i32 %idx @@ -124,10 +119,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_2xi64_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 -; CHECK-NEXT: movfr2gr.d $a0, $fa0 -; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: movgr2fr.w $fa1, $a2 +; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0 +; CHECK-NEXT: vstelm.d $vr1, $a1, 0, 0 ; CHECK-NEXT: ret %v = load volatile <2 x i64>, ptr %src %e = extractelement <2 x i64> %v, i32 %idx @@ -139,8 +133,7 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_4xfloat_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 ; CHECK-NEXT: fst.s $fa0, $a1, 0 ; CHECK-NEXT: ret %v = load volatile <4 x float>, ptr %src @@ -153,8 +146,7 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; CHECK-LABEL: extract_2xdouble_idx: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a0, 0 -; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0 -; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 ; CHECK-NEXT: fst.d $fa0, $a1, 0 ; CHECK-NEXT: ret %v = load volatile <2 x double>, ptr %src