diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 675a230bd2c94..af5273bd210d3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7949,6 +7949,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false)); auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc()); + SafeRHS->setUnderlyingValue(I); Ops[1] = SafeRHS; return new VPWidenRecipe(*I, Ops); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fa62547d374cd..da11f922b5362 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -972,6 +972,19 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ctx.CostKind); } + case Instruction::Select: { + // TODO: Compute cost for VPInstructions without underlying values once + // the legacy cost model has been retired. + if (!getUnderlyingValue()) + return 0; + Type *ResTy = Ctx.Types.inferScalarType(this); + if (!vputils::onlyFirstLaneUsed(this)) + ResTy = toVectorTy(ResTy, VF); + return Ctx.TTI.getCmpSelInstrCost( + Instruction::Select, ResTy, + ResTy->getWithNewType(Type::getInt1Ty(ResTy->getContext())), + CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind); + } case VPInstruction::AnyOf: { auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(this), VF); return Ctx.TTI.getArithmeticReductionCost( @@ -2037,8 +2050,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, case Instruction::SDiv: case Instruction::SRem: case Instruction::URem: - // More complex computation, let the legacy cost-model handle this for now. - return Ctx.getLegacyCost(cast(getUnderlyingValue()), VF); case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll new file mode 100644 index 0000000000000..d4b3cfffa4501 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr154103.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v < %s -S | FileCheck %s + +define void @pr154103(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) { +; CHECK-LABEL: define void @pr154103( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul [[TMP0]], splat (i64 7) +; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i64 1), [[TMP1]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ -7905747460161236406, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 7, [[TMP3]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i32() +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult [[TMP5]], [[BROADCAST_SPLAT6]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], [[VEC_IND]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[TMP7]], splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[TMP8:%.*]] = zext [[WIDE_MASKED_GATHER]] to +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.vp.merge.nxv4i64( splat (i1 true), [[TMP8]], splat (i64 1), i32 [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = sdiv zeroinitializer, [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select [[TMP6]], [[TMP11]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call @llvm.vp.gather.nxv4i8.nxv4p0( align 1 [[BROADCAST_SPLAT]], [[TMP11]], i32 [[TMP2]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext [[WIDE_MASKED_GATHER7]] to +; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP13]], zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = trunc [[PREDPHI]] to +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i16.nxv4p0( [[TMP15]], align 2 [[BROADCAST_SPLAT2]], splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: store i32 0, ptr [[D]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], -7905747460161236406 +; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[X]] to i64 +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 0, [[CONV]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[DIV]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[THEN:.*]], label %[[LATCH]] +; CHECK: [[THEN]]: +; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[B]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[Y]] to i64 +; CHECK-NEXT: [[NOT:%.*]] = xor i64 [[ZEXT]], 0 +; CHECK-NEXT: br label %[[LATCH]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[NOT]], %[[THEN]] ], [ 0, %[[LOOP]] ] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[COND]] to i16 +; CHECK-NEXT: store i16 [[TRUNC]], ptr [[C]], align 2 +; CHECK-NEXT: store i32 0, ptr [[D]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 7 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %latch ] + %gep = getelementptr i8, ptr %a, i64 %iv + %x = load i8, ptr %gep, align 1 + %conv = zext i8 %x to i64 + %div = sdiv i64 0, %conv + %cmp = icmp sgt i64 %div, 0 + br i1 %cmp, label %then, label %latch + +then: + %y = load i8, ptr %b + %zext = zext i8 %y to i64 + %not = xor i64 %zext, 0 + br label %latch + +latch: + %cond = phi i64 [ %not, %then ], [ 0, %loop ] + %trunc = trunc i64 %cond to i16 + store i16 %trunc, ptr %c + store i32 0, ptr %d + %iv.next = add i64 %iv, 7 + %done = icmp eq i64 %iv, 0 + br i1 %done, label %exit, label %loop + +exit: + ret void +}