Skip to content

[LV] Use getFixedValue instead of getKnownMinValue when appropriate #143526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3107,12 +3107,13 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
// that we will create. This cost is likely to be zero. The phi node
// cost, if any, should be scaled by the block probability because it
// models a copy at the end of each predicated block.
ScalarizationCost += VF.getKnownMinValue() *
TTI.getCFInstrCost(Instruction::PHI, CostKind);
ScalarizationCost +=
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);

// The cost of the non-predicated instruction.
ScalarizationCost += VF.getKnownMinValue() *
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);
ScalarizationCost +=
VF.getFixedValue() *
TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind);

// The cost of insertelement and extractelement instructions needed for
// scalarization.
Expand Down Expand Up @@ -4280,7 +4281,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
return NumLegalParts <= VF.getKnownMinValue();
}
// Two or more elements that share a register - are vectorized.
return NumLegalParts < VF.getKnownMinValue();
return NumLegalParts < VF.getFixedValue();
};

// If no def nor is a store, e.g., branches, continue - no value to check.
Expand Down Expand Up @@ -4565,8 +4566,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
assert(!isa<SCEVCouldNotCompute>(TC) &&
"Trip count SCEV must be computable");
RemainingIterations = SE.getURemExpr(
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
MaxTripCount = MainLoopVF.getKnownMinValue() * IC - 1;
TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC));
MaxTripCount = MainLoopVF.getFixedValue() * IC - 1;
if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations,
SE.getConstant(TCType, MaxTripCount))) {
MaxTripCount =
Expand All @@ -4577,7 +4578,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
}
if (SE.isKnownPredicate(
CmpInst::ICMP_UGT,
SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
SE.getConstant(TCType, NextVF.Width.getFixedValue()),
RemainingIterations))
continue;
}
Expand Down Expand Up @@ -5248,14 +5249,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,

// Get the cost of the scalar memory instruction and address computation.
InstructionCost Cost =
VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);

// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.
const Align Alignment = getLoadStoreAlignment(I);
Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(),
ValTy->getScalarType(),
Alignment, AS, CostKind);
Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(),
ValTy->getScalarType(),
Alignment, AS, CostKind);

// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
Expand All @@ -5271,7 +5272,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
auto *VecI1Ty =
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),
/*Insert=*/false, /*Extract=*/true, CostKind);
Cost += TTI.getCFInstrCost(Instruction::Br, CostKind);

Expand Down Expand Up @@ -5332,6 +5333,10 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
StoreInst *SI = cast<StoreInst>(I);

bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
// TODO: We have existing tests that request the cost of extracting element
// VF.getKnownMinValue() - 1 from a scalable vector. This does not represent
// the actual generated code, which involves extracting the last element of
// a scalable vector where the lane to extract is unknown at compile time.
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
Expand Down Expand Up @@ -5614,7 +5619,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,

for (Type *VectorTy : getContainedTypes(RetTy)) {
Cost += TTI.getScalarizationOverhead(
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()),
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
/*Insert=*/true,
/*Extract=*/false, CostKind);
}
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {

bool IsSingleScalar = vputils::isSingleScalar(Def);

VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1);
VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I ask where it is guaranteed that the VF is not scalable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an existing invariant, in the bit below there's an assertion that !VF.isScalable() if IsSingleScalar is false

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question, but no tests broke (including LLVM test suite) when I made this change so either:

  1. It's not possible to get here for scalable vectors, or
  2. We're missing tests.

and also I think the code is already broken for scalable vectors because the last lane isn't VF.getKnownMinValue() - 1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I reasoned that we may as well strength the code, since it looks like we're effectively assuming fixed-width already.

// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, LastLane)) {
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
Expand Down Expand Up @@ -368,7 +368,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
packScalarIntoVectorizedValue(Def, Lane);
VectorValue = get(Def);
}
Expand Down Expand Up @@ -789,8 +789,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Entry);
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
State->Lane = VPLane(Lane, VPLane::Kind::First);
// Visit the VPBlocks connected to \p this, starting from it.
for (VPBlockBase *Block : RPOT) {
Expand Down
19 changes: 9 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ void VPInstruction::execute(VPTransformState &State) {
isVectorToScalar() || isSingleScalar());
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
Lane != NumLanes; ++Lane) {
Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
assert(GeneratedValue && "generatePerLane must produce a value");
Expand Down Expand Up @@ -2792,8 +2792,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
}

// Generate scalar instances for all VF lanes.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
const unsigned EndLane = State.VF.getKnownMinValue();
const unsigned EndLane = State.VF.getFixedValue();
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
scalarizeInstruction(UI, this, VPLane(Lane), State);
}
Expand Down Expand Up @@ -2846,7 +2845,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
UI->getOpcode(), ResultTy, CostKind,
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
Op2Info, Operands, UI, &Ctx.TLI) *
(isSingleScalar() ? 1 : VF.getKnownMinValue());
(isSingleScalar() ? 1 : VF.getFixedValue());
}
}

Expand Down Expand Up @@ -3399,7 +3398,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
Value *ResBlockInMask = State.get(BlockInMask);
Value *ShuffledMask = State.Builder.CreateShuffleVector(
ResBlockInMask,
createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),
"interleaved.mask");
return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
ShuffledMask, MaskForGaps)
Expand All @@ -3411,8 +3410,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
if (isa<LoadInst>(Instr)) {
Value *MaskForGaps = nullptr;
if (NeedsMaskForGaps) {
MaskForGaps = createBitMaskForGaps(State.Builder,
State.VF.getKnownMinValue(), *Group);
MaskForGaps =
createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);
assert(MaskForGaps && "Mask for Gaps is required but it is null");
}

Expand Down Expand Up @@ -3463,6 +3462,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {

return;
}
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");

// For each member in the group, shuffle out the appropriate data from the
// wide loads.
Expand All @@ -3475,13 +3475,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
continue;

auto StrideMask =
createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
createStrideMask(I, InterleaveFactor, State.VF.getFixedValue());
Value *StridedVec =
State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec");

// If this member has different type, cast the result type.
if (Member->getType() != ScalarTy) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it would be fine to drop this here as it is covered by the getFixedValue above, but it may be clearer to keep the assert here or add it somewhere above?

VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
StridedVec =
createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
Expand Down Expand Up @@ -3817,7 +3816,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
if (VF.isScalar())
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

if (VF.isScalable() && VF.getKnownMinValue() == 1)
if (VF == ElementCount::getScalable(1))
return InstructionCost::getInvalid();

return 0;
Expand Down