Skip to content

Commit 541e511

Browse files
authored
[LV] Use getFixedValue instead of getKnownMinValue when appropriate (#143526)
There are many places in VPlan and LoopVectorize where we use getKnownMinValue to discover the number of elements in a vector. Where we expect the vector to have a fixed length, I have used the stronger getFixedValue call. I believe this is clearer and adds extra protection in the form of an assert in getFixedValue that the vector is not scalable. While looking at VPFirstOrderRecurrencePHIRecipe::computeCost I also took the liberty of simplifying the code. In theory I believe this patch should be NFC, but I'm reluctant to add that to the title in case we're just missing tests for some of the VPlan changes. I built and ran the LLVM test suite when targeting neoverse-v1 and it seemed ok.
1 parent 2019553 commit 541e511

File tree

3 files changed

+31
-28
lines changed

3 files changed

+31
-28
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3116,12 +3116,13 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
31163116
// that we will create. This cost is likely to be zero. The phi node
31173117
// cost, if any, should be scaled by the block probability because it
31183118
// models a copy at the end of each predicated block.
3119-
ScalarizationCost += VF.getKnownMinValue() *
3120-
TTI.getCFInstrCost(Instruction::PHI, CostKind);
3119+
ScalarizationCost +=
3120+
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
31213121

31223122
// The cost of the non-predicated instruction.
3123-
ScalarizationCost += VF.getKnownMinValue() *
3124-
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
3123+
ScalarizationCost +=
3124+
VF.getFixedValue() *
3125+
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
31253126

31263127
// The cost of insertelement and extractelement instructions needed for
31273128
// scalarization.
@@ -4289,7 +4290,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42894290
return NumLegalParts <= VF.getKnownMinValue();
42904291
}
42914292
// Two or more elements that share a register - are vectorized.
4292-
return NumLegalParts < VF.getKnownMinValue();
4293+
return NumLegalParts < VF.getFixedValue();
42934294
};
42944295

42954296
// If no def nor is a store, e.g., branches, continue - no value to check.
@@ -4574,8 +4575,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45744575
assert(!isa<SCEVCouldNotCompute>(TC) &&
45754576
"Trip count SCEV must be computable");
45764577
RemainingIterations = SE.getURemExpr(
4577-
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
4578-
MaxTripCount = MainLoopVF.getKnownMinValue() * IC - 1;
4578+
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
4579+
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
45794580
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
45804581
SE.getConstant(TCType, MaxTripCount))) {
45814582
MaxTripCount =
@@ -4586,7 +4587,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
45864587
}
45874588
if (SE.isKnownPredicate(
45884589
CmpInst::ICMP_UGT,
4589-
SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
4590+
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
45904591
RemainingIterations))
45914592
continue;
45924593
}
@@ -5257,14 +5258,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52575258

52585259
// Get the cost of the scalar memory instruction and address computation.
52595260
InstructionCost Cost =
5260-
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
5261+
VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
52615262

52625263
// Don't pass *I here, since it is scalar but will actually be part of a
52635264
// vectorized loop where the user of it is a vectorized instruction.
52645265
const Align Alignment = getLoadStoreAlignment(I);
5265-
Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
5266-
ValTy->getScalarType(),
5267-
Alignment, AS, CostKind);
5266+
Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(),
5267+
ValTy->getScalarType(),
5268+
Alignment, AS, CostKind);
52685269

52695270
// Get the overhead of the extractelement and insertelement instructions
52705271
// we might create due to scalarization.
@@ -5280,7 +5281,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
52805281
auto *VecI1Ty =
52815282
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
52825283
Cost += TTI.getScalarizationOverhead(
5283-
VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
5284+
VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),
52845285
/*Insert=*/false, /*Extract=*/true, CostKind);
52855286
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);
52865287

@@ -5341,6 +5342,10 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
53415342
StoreInst *SI = cast<StoreInst>(I);
53425343

53435344
bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
5345+
// TODO: We have existing tests that request the cost of extracting element
5346+
// VF.getKnownMinValue() - 1 from a scalable vector. This does not represent
5347+
// the actual generated code, which involves extracting the last element of
5348+
// a scalable vector where the lane to extract is unknown at compile time.
53445349
return TTI.getAddressComputationCost(ValTy) +
53455350
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
53465351
CostKind) +
@@ -5623,7 +5628,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
56235628

56245629
for (Type *VectorTy : getContainedTypes(RetTy)) {
56255630
Cost += TTI.getScalarizationOverhead(
5626-
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()),
5631+
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
56275632
/*Insert=*/true,
56285633
/*Extract=*/false, CostKind);
56295634
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
331331

332332
bool IsSingleScalar = vputils::isSingleScalar(Def);
333333

334-
VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1);
334+
VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
335335
// Check if there is a scalar value for the selected lane.
336336
if (!hasScalarValue(Def, LastLane)) {
337337
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
@@ -368,7 +368,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
368368
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
369369
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
370370
set(Def, Undef);
371-
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
371+
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
372372
packScalarIntoVectorizedValue(Def, Lane);
373373
VectorValue = get(Def);
374374
}
@@ -789,8 +789,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
789789
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
790790
Entry);
791791
State->Lane = VPLane(0);
792-
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
793-
++Lane) {
792+
for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
794793
State->Lane = VPLane(Lane, VPLane::Kind::First);
795794
// Visit the VPBlocks connected to \p this, starting from it.
796795
for (VPBlockBase *Block : RPOT) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ void VPInstruction::execute(VPTransformState &State) {
871871
isVectorToScalar() || isSingleScalar());
872872
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
873873
if (GeneratesPerAllLanes) {
874-
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
874+
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
875875
Lane != NumLanes; ++Lane) {
876876
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
877877
assert(GeneratedValue && "generatePerLane must produce a value");
@@ -2787,8 +2787,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
27872787
}
27882788

27892789
// Generate scalar instances for all VF lanes.
2790-
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
2791-
const unsigned EndLane = State.VF.getKnownMinValue();
2790+
const unsigned EndLane = State.VF.getFixedValue();
27922791
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
27932792
scalarizeInstruction(UI, this, VPLane(Lane), State);
27942793
}
@@ -2841,7 +2840,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
28412840
UI->getOpcode(), ResultTy, CostKind,
28422841
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
28432842
Op2Info, Operands, UI, &Ctx.TLI) *
2844-
(isSingleScalar() ? 1 : VF.getKnownMinValue());
2843+
(isSingleScalar() ? 1 : VF.getFixedValue());
28452844
}
28462845
}
28472846

@@ -3390,7 +3389,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
33903389
Value *ResBlockInMask = State.get(BlockInMask);
33913390
Value *ShuffledMask = State.Builder.CreateShuffleVector(
33923391
ResBlockInMask,
3393-
createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
3392+
createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
33943393
"interleaved.mask");
33953394
return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
33963395
ShuffledMask, MaskForGaps)
@@ -3402,8 +3401,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34023401
if (isa<LoadInst>(Instr)) {
34033402
Value *MaskForGaps = nullptr;
34043403
if (NeedsMaskForGaps) {
3405-
MaskForGaps = createBitMaskForGaps(State.Builder,
3406-
State.VF.getKnownMinValue(), *Group);
3404+
MaskForGaps =
3405+
createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
34073406
assert(MaskForGaps && "Mask for Gaps is required but it is null");
34083407
}
34093408

@@ -3454,6 +3453,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34543453

34553454
return;
34563455
}
3456+
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
34573457

34583458
// For each member in the group, shuffle out the appropriate data from the
34593459
// wide loads.
@@ -3466,13 +3466,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
34663466
continue;
34673467

34683468
auto StrideMask =
3469-
createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
3469+
createStrideMask(I, InterleaveFactor, State.VF.getFixedValue());
34703470
Value *StridedVec =
34713471
State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");
34723472

34733473
// If this member has different type, cast the result type.
34743474
if (Member->getType() != ScalarTy) {
3475-
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
34763475
VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
34773476
StridedVec =
34783477
createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
@@ -3808,7 +3807,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
38083807
if (VF.isScalar())
38093808
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
38103809

3811-
if (VF.isScalable() && VF.getKnownMinValue() == 1)
3810+
if (VF == ElementCount::getScalable(1))
38123811
return InstructionCost::getInvalid();
38133812

38143813
return 0;

0 commit comments

Comments
 (0)