Skip to content

Conversation

zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Sep 4, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/156792.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+27-22)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (-18)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll (+14-22)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 634914d3b3fd0..0fffa50db7dd0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -287,7 +287,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::UNDEF, VT, Legal);
 
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 
       setOperationAction(ISD::SETCC, VT, Legal);
@@ -421,12 +421,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasExtLSX()) {
     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
     setTargetDAGCombine(ISD::BITCAST);
-  }
-
-  // Set DAG combine for 'LASX' feature.
-
-  if (Subtarget.hasExtLASX())
     setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  }
 
   // Compute derived properties from the register classes.
   computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -2834,37 +2830,47 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   SDLoc DL(Op);
   MVT GRLenVT = Subtarget.getGRLenVT();
 
-  assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
-
   if (isa<ConstantSDNode>(Idx))
     return Op;
 
   switch (VecTy.getSimpleVT().SimpleTy) {
   default:
     llvm_unreachable("Unexpected type");
+  case MVT::v4f32:
+  case MVT::v2f64:
+    return Op;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v2i64:
   case MVT::v32i8:
   case MVT::v16i16:
   case MVT::v4i64:
   case MVT::v4f64: {
-    // Extract the high half subvector and place it to the low half of a new
-    // vector. It doesn't matter what the high half of the new vector is.
-    EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
-    SDValue VecHi =
-        DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
-    SDValue TmpVec =
-        DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
-                    VecHi, DAG.getConstant(0, DL, GRLenVT));
+    SDValue TmpVec;
+    if (VecTy.is256BitVector()) {
+      // Extract the high half subvector and place it to the low half of a new
+      // vector. It doesn't matter what the high half of the new vector is.
+      EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
+      SDValue VecHi = DAG.getExtractSubvector(DL, HalfTy, Vec,
+                                              HalfTy.getVectorNumElements());
+      TmpVec =
+          DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
+                      VecHi, DAG.getConstant(0, DL, GRLenVT));
+    }
 
     // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
     // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
     // desired element.
     SDValue IdxCp =
         DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
-    SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
+    SDValue IdxVec =
+        DAG.getNode(ISD::SCALAR_TO_VECTOR, DL,
+                    (VecTy.is128BitVector() ? MVT::v4f32 : MVT::v8f32), IdxCp);
     SDValue MaskVec =
-        DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
-    SDValue ResVec =
-        DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
+        DAG.getBitcast(VecTy.changeVectorElementTypeToInteger(), IdxVec);
+    SDValue ResVec = DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec,
+                                 (VecTy.is128BitVector() ? Vec : TmpVec), Vec);
 
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
                        DAG.getConstant(0, DL, GRLenVT));
@@ -6254,12 +6260,11 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
 
   MVT EltVT = N->getSimpleValueType(0);
   SDValue Vec = N->getOperand(0);
-  EVT VecTy = Vec->getValueType(0);
   SDValue Idx = N->getOperand(1);
   unsigned IdxOp = Idx.getOpcode();
   SDLoc DL(N);
 
-  if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
+  if (isa<ConstantSDNode>(Idx))
     return SDValue();
 
   // Combine:
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index eb7120ffb41a6..deb86513859f5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -2086,24 +2086,6 @@ def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)),
           (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>;
 
 // Vector extraction with variable index.
-def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)),
-          (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj,
-                                                                    i64:$rk),
-                                                         sub_32)),
-                                    GPR), (i64 24))>;
-def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)),
-          (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj,
-                                                                    i64:$rk),
-                                                         sub_32)),
-                                    GPR), (i64 16))>;
-def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)),
-          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk),
-                                                 sub_32)),
-                            GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
-          (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
-                                                 sub_64)),
-                            GPR)>;
 def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
           (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>;
 def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index c9c95f19c26f8..73fda3174a224 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -77,11 +77,9 @@ define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_16xi8_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.b $vr0, $vr0, $a0
-; CHECK-NEXT:    movfr2gr.s $a0, $fa0
-; CHECK-NEXT:    srai.w $a0, $a0, 24
-; CHECK-NEXT:    st.b $a0, $a1, 0
+; CHECK-NEXT:    movgr2fr.w $fa1, $a2
+; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vstelm.b $vr0, $a1, 0, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <16 x i8>, ptr %src
   %e = extractelement <16 x i8> %v, i32 %idx
@@ -93,11 +91,9 @@ define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_8xi16_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.h $vr0, $vr0, $a0
-; CHECK-NEXT:    movfr2gr.s $a0, $fa0
-; CHECK-NEXT:    srai.w $a0, $a0, 16
-; CHECK-NEXT:    st.h $a0, $a1, 0
+; CHECK-NEXT:    movgr2fr.w $fa1, $a2
+; CHECK-NEXT:    vshuf.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vstelm.h $vr1, $a1, 0, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <8 x i16>, ptr %src
   %e = extractelement <8 x i16> %v, i32 %idx
@@ -109,10 +105,9 @@ define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_4xi32_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a0
-; CHECK-NEXT:    movfr2gr.s $a0, $fa0
-; CHECK-NEXT:    st.w $a0, $a1, 0
+; CHECK-NEXT:    movgr2fr.w $fa1, $a2
+; CHECK-NEXT:    vshuf.w $vr1, $vr0, $vr0
+; CHECK-NEXT:    vstelm.w $vr1, $a1, 0, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x i32>, ptr %src
   %e = extractelement <4 x i32> %v, i32 %idx
@@ -124,10 +119,9 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_2xi64_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa0
-; CHECK-NEXT:    st.d $a0, $a1, 0
+; CHECK-NEXT:    movgr2fr.w $fa1, $a2
+; CHECK-NEXT:    vshuf.d $vr1, $vr0, $vr0
+; CHECK-NEXT:    vstelm.d $vr1, $a1, 0, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <2 x i64>, ptr %src
   %e = extractelement <2 x i64> %v, i32 %idx
@@ -139,8 +133,7 @@ define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_4xfloat_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a0
+; CHECK-NEXT:    vreplve.w $vr0, $vr0, $a2
 ; CHECK-NEXT:    fst.s $fa0, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <4 x float>, ptr %src
@@ -153,8 +146,7 @@ define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
 ; CHECK-LABEL: extract_2xdouble_idx:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a0
+; CHECK-NEXT:    vreplve.d $vr0, $vr0, $a2
 ; CHECK-NEXT:    fst.d $fa0, $a1, 0
 ; CHECK-NEXT:    ret
   %v = load volatile <2 x double>, ptr %src

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@tangaac
Copy link
Member

tangaac commented Sep 5, 2025

Very little regression.
tangaac/loong-opt-cov-ts@9805f6b

@heiher
Copy link
Member

heiher commented Sep 5, 2025

Very little regression. tangaac/loong-opt-cov-ts@9805f6b

So do we need to handle the result differently depending on whether it goes into a GPR or a VR?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants