diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8177b76ad5bdf..86bea6afdb738 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3107,12 +3107,13 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, // that we will create. This cost is likely to be zero. The phi node // cost, if any, should be scaled by the block probability because it // models a copy at the end of each predicated block. - ScalarizationCost += VF.getKnownMinValue() * - TTI.getCFInstrCost(Instruction::PHI, CostKind); + ScalarizationCost += + VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind); // The cost of the non-predicated instruction. - ScalarizationCost += VF.getKnownMinValue() * - TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind); + ScalarizationCost += + VF.getFixedValue() * + TTI.getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -4280,7 +4281,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, return NumLegalParts <= VF.getKnownMinValue(); } // Two or more elements that share a register - are vectorized. - return NumLegalParts < VF.getKnownMinValue(); + return NumLegalParts < VF.getFixedValue(); }; // If no def nor is a store, e.g., branches, continue - no value to check. @@ -4565,8 +4566,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( assert(!isa(TC) && "Trip count SCEV must be computable"); RemainingIterations = SE.getURemExpr( - TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC)); - MaxTripCount = MainLoopVF.getKnownMinValue() * IC - 1; + TC, SE.getConstant(TCType, MainLoopVF.getFixedValue() * IC)); + MaxTripCount = MainLoopVF.getFixedValue() * IC - 1; if (SE.isKnownPredicate(CmpInst::ICMP_ULT, RemainingIterations, SE.getConstant(TCType, MaxTripCount))) { MaxTripCount = @@ -4577,7 +4578,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( } if (SE.isKnownPredicate( CmpInst::ICMP_UGT, - SE.getConstant(TCType, NextVF.Width.getKnownMinValue()), + SE.getConstant(TCType, NextVF.Width.getFixedValue()), RemainingIterations)) continue; } @@ -5248,14 +5249,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Get the cost of the scalar memory instruction and address computation. InstructionCost Cost = - VF.getKnownMinValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); + VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. const Align Alignment = getLoadStoreAlignment(I); - Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(), - ValTy->getScalarType(), - Alignment, AS, CostKind); + Cost += VF.getFixedValue() * TTI.getMemoryOpCost(I->getOpcode(), + ValTy->getScalarType(), + Alignment, AS, CostKind); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5271,7 +5272,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, auto *VecI1Ty = VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF); Cost += TTI.getScalarizationOverhead( - VecI1Ty, APInt::getAllOnes(VF.getKnownMinValue()), + VecI1Ty, APInt::getAllOnes(VF.getFixedValue()), /*Insert=*/false, /*Extract=*/true, CostKind); Cost += TTI.getCFInstrCost(Instruction::Br, CostKind); @@ -5332,6 +5333,10 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, StoreInst *SI = cast(I); bool IsLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand()); + // TODO: We have existing tests that request the cost of extracting element + // VF.getKnownMinValue() - 1 from a scalable vector. This does not represent + // the actual generated code, which involves extracting the last element of + // a scalable vector where the lane to extract is unknown at compile time. return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, CostKind) + @@ -5614,7 +5619,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, for (Type *VectorTy : getContainedTypes(RetTy)) { Cost += TTI.getScalarizationOverhead( - cast(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()), + cast(VectorTy), APInt::getAllOnes(VF.getFixedValue()), /*Insert=*/true, /*Extract=*/false, CostKind); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1838562f26b82..b5fa98d341756 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -331,7 +331,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { bool IsSingleScalar = vputils::isSingleScalar(Def); - VPLane LastLane(IsSingleScalar ? 0 : VF.getKnownMinValue() - 1); + VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1); // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, LastLane)) { // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and @@ -368,7 +368,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { assert(!VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); set(Def, Undef); - for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) + for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane) packScalarIntoVectorizedValue(Def, Lane); VectorValue = get(Def); } @@ -789,8 +789,7 @@ void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal> RPOT( Entry); State->Lane = VPLane(0); - for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; - ++Lane) { + for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) { State->Lane = VPLane(Lane, VPLane::Kind::First); // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index aa6b13c217bd1..c4c6141cb62c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -872,7 +872,7 @@ void VPInstruction::execute(VPTransformState &State) { isVectorToScalar() || isSingleScalar()); bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); if (GeneratesPerAllLanes) { - for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); + for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); Lane != NumLanes; ++Lane) { Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); assert(GeneratedValue && "generatePerLane must produce a value"); @@ -2792,8 +2792,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { } // Generate scalar instances for all VF lanes. - assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - const unsigned EndLane = State.VF.getKnownMinValue(); + const unsigned EndLane = State.VF.getFixedValue(); for (unsigned Lane = 0; Lane < EndLane; ++Lane) scalarizeInstruction(UI, this, VPLane(Lane), State); } @@ -2846,7 +2845,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, UI->getOpcode(), ResultTy, CostKind, {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, Op2Info, Operands, UI, &Ctx.TLI) * - (isSingleScalar() ? 1 : VF.getKnownMinValue()); + (isSingleScalar() ? 1 : VF.getFixedValue()); } } @@ -3399,7 +3398,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { Value *ResBlockInMask = State.get(BlockInMask); Value *ShuffledMask = State.Builder.CreateShuffleVector( ResBlockInMask, - createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()), + createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()), "interleaved.mask"); return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And, ShuffledMask, MaskForGaps) @@ -3411,8 +3410,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { if (isa(Instr)) { Value *MaskForGaps = nullptr; if (NeedsMaskForGaps) { - MaskForGaps = createBitMaskForGaps(State.Builder, - State.VF.getKnownMinValue(), *Group); + MaskForGaps = + createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group); assert(MaskForGaps && "Mask for Gaps is required but it is null"); } @@ -3463,6 +3462,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { return; } + assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); // For each member in the group, shuffle out the appropriate data from the // wide loads. @@ -3475,13 +3475,12 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { continue; auto StrideMask = - createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue()); + createStrideMask(I, InterleaveFactor, State.VF.getFixedValue()); Value *StridedVec = State.Builder.CreateShuffleVector(NewLoad, StrideMask, "strided.vec"); // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { - assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF); StridedVec = createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL); @@ -3817,7 +3816,7 @@ VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, if (VF.isScalar()) return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); - if (VF.isScalable() && VF.getKnownMinValue() == 1) + if (VF == ElementCount::getScalable(1)) return InstructionCost::getInvalid(); return 0;