Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 75 additions & 35 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2485,6 +2485,43 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
return SDValue();
}

// Sequentially insert elements from Ops into Vector, from low to high indices.
// Note: Ops can have fewer elements than Vector.
static void fillVector(ArrayRef<SDValue> Ops, SelectionDAG &DAG, SDLoc DL,
const LoongArchSubtarget &Subtarget, SDValue &Vector,
EVT ResTy) {
assert(Ops.size() <= ResTy.getVectorNumElements());

SDValue Op0 = Ops[0];
if (!Op0.isUndef())
Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
for (unsigned i = 1; i < Ops.size(); ++i) {
SDValue Opi = Ops[i];
if (Opi.isUndef())
continue;
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
}

// Build a ResTy subvector from Node, taking NumElts elements starting at index
// 'first'.
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node,
SelectionDAG &DAG, SDLoc DL,
const LoongArchSubtarget &Subtarget,
EVT ResTy, unsigned first) {
unsigned NumElts = ResTy.getVectorNumElements();

assert(first >= 0 &&
first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());

SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
Node->op_begin() + first + NumElts);
SDValue Vector = DAG.getUNDEF(ResTy);
fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
return Vector;
}

SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
Expand Down Expand Up @@ -2583,29 +2620,18 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
BitVector UndefElements;
if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
UndefElements.count() == 0) {
SDValue Vector = DAG.getUNDEF(ResTy);
SDValue FillVec = Vector;
EVT FillTy = ResTy;

// Using LSX instructions to fill the sub-sequence of 256-bits vector,
// because the high part can be simply treated as undef.
if (Is256Vec) {
FillTy = ResTy.getHalfNumVectorElementsVT(*DAG.getContext());
FillVec = DAG.getExtractSubvector(DL, FillTy, Vector, 0);
}
SDValue Vector = DAG.getUNDEF(ResTy);
EVT FillTy = Is256Vec
? ResTy.getHalfNumVectorElementsVT(*DAG.getContext())
: ResTy;
SDValue FillVec =
Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;

SDValue Op0 = Sequence[0];
unsigned SeqLen = Sequence.size();
if (!Op0.isUndef())
FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
for (unsigned i = 1; i < SeqLen; ++i) {
SDValue Opi = Sequence[i];
if (Opi.isUndef())
continue;
FillVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);

unsigned SeqLen = Sequence.size();
unsigned SplatLen = NumElts / SeqLen;
MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
Expand All @@ -2632,24 +2658,38 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
return DAG.getBitcast(ResTy, SplatVec);
}

// Use INSERT_VECTOR_ELT operations rather than expand to stores.
// The resulting code is the same length as the expansion, but it doesn't
// use memory operations.
assert(ResTy.isVector());
// Use INSERT_VECTOR_ELT operations rather than expand to stores, because
// using memory operations is much lower.
//
// For 256-bit vectors, normally split into two halves and concatenate.
// Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
// one non-undef element, skip spliting to avoid a worse result.
if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
ResTy == MVT::v4f64) {
unsigned NonUndefCount = 0;
for (unsigned i = NumElts / 2; i < NumElts; ++i) {
if (!Node->getOperand(i).isUndef()) {
++NonUndefCount;
if (NonUndefCount > 1)
break;
}
}
if (NonUndefCount == 1)
return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
}

SDValue Op0 = Node->getOperand(0);
SDValue Vector = DAG.getUNDEF(ResTy);
EVT VecTy =
Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
SDValue Vector =
fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);

if (!Op0.isUndef())
Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
for (unsigned i = 1; i < NumElts; ++i) {
SDValue Opi = Node->getOperand(i);
if (Opi.isUndef())
continue;
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
return Vector;
if (Is128Vec)
return Vector;

SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
VecTy, NumElts / 2);

return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
}

return SDValue();
Expand Down
72 changes: 38 additions & 34 deletions llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,19 @@ declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: test_bitreverse_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.8b $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
%b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
Expand All @@ -30,19 +31,20 @@ declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
; CHECK-LABEL: test_bitreverse_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
; CHECK-NEXT: xvshuf4i.h $xr0, $xr1, 27
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
; CHECK-NEXT: xvshuf4i.h $xr0, $xr2, 27
; CHECK-NEXT: ret
%b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
ret <16 x i16> %b
Expand All @@ -53,19 +55,20 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: test_bitreverse_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
; CHECK-NEXT: xvshuf4i.w $xr0, $xr1, 177
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
; CHECK-NEXT: xvshuf4i.w $xr0, $xr2, 177
; CHECK-NEXT: ret
%b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
ret <8 x i32> %b
Expand All @@ -76,18 +79,19 @@ declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: test_bitreverse_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
; CHECK-NEXT: bitrev.d $a0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
%b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
Expand Down
Loading
Loading