From 11870a25909b02efb5c7b2b12a71c651b839c708 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 8 Jun 2025 21:46:21 +0100 Subject: [PATCH 01/20] [LV] Vectorize select min/max index. Add support for vectorizing loops that select the index of the minimum or maximum element. The patch implements vectorizing those patterns by combining Min/Max and FindFirstIV reductions. It extends matching Min/Max reductions to allow in-loop users that are FindLastIV reductions. It records a flag indicating that the Min/Max reduction is used by another reduction. When creating reduction recipes, we process any reduction that has other reduction users. The reduction using the min/max reduction needs adjusting to compute the correct result: 1. We need to find the first IV for which the condition based on the min/max reduction is true, 2. Compare the partial min/max reduction result to its final value and, 3. Select the lanes of the partial FindLastIV reductions which correspond to the lanes matching the min/max reduction result. --- .../Vectorize/LoopVectorizationLegality.cpp | 21 +- .../Transforms/Vectorize/LoopVectorize.cpp | 30 +- llvm/lib/Transforms/Vectorize/VPlan.h | 9 +- .../Vectorize/VPlanConstruction.cpp | 145 ++++++++++ .../Transforms/Vectorize/VPlanTransforms.h | 5 + .../LoopVectorize/AArch64/select-index.ll | 264 ++++++++++++++--- .../select-index-interleaving.ll | 264 ++++++++++++++--- .../LoopVectorize/select-smax-last-index.ll | 218 +++++++++++--- .../LoopVectorize/select-smin-last-index.ll | 228 ++++++++++++--- .../LoopVectorize/select-umax-last-index.ll | 218 +++++++++++--- .../LoopVectorize/select-umin-last-index.ll | 272 +++++++++++++++--- 11 files changed, 1433 insertions(+), 241 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index c47fd9421fddd..1ecb120cf1ed7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. + PHINode *UnclassifiedPhi = nullptr; for (Instruction &I : *BB) { if (auto *Phi = dyn_cast(&I)) { Type *PhiTy = Phi->getType(); @@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { addInductionPhi(Phi, ID, AllowedExit); continue; } - - reportVectorizationFailure("Found an unidentified PHI", - "value that could not be identified as " - "reduction is used outside the loop", - "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); - return false; + UnclassifiedPhi = Phi; } // end of PHI handling // We handle calls that: @@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } } // next instr. + if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) { + auto I = Reductions.find(&P); + return I != Reductions.end() && + RecurrenceDescriptor::isFindLastIVRecurrenceKind( + I->second.getRecurrenceKind()); + })) { + reportVectorizationFailure("Found an unidentified PHI", + "value that could not be identified as " + "reduction is used outside the loop", + "NonReductionValueUsedOutsideLoop", ORE, + TheLoop, UnclassifiedPhi); + return false; + } } if (!PrimaryInduction) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 850c4a11edc67..cfd09dce3e6a4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7250,6 +7250,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( Value *StartV = getStartValueFromReductionResult(EpiRedResult); Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue(); using namespace llvm::PatternMatch; + MainResumeValue = cast(EpiRedHeaderPhi->getStartValue()) + ->getOperand(0) + ->getUnderlyingValue(); Value *Cmp, *OrigResumeV, *CmpOp; [[maybe_unused]] bool IsExpectedPattern = match(MainResumeValue, @@ -7260,7 +7263,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp)))); assert(IsExpectedPattern && "Unexpected reduction resume pattern"); MainResumeValue = OrigResumeV; + } else { + if (auto *VPI = dyn_cast(EpiRedHeaderPhi->getStartValue())) + MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); } + PHINode *MainResumePhi = cast(MainResumeValue); // When fixing reductions in the epilogue loop we should already have @@ -8276,9 +8283,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; - assert((Legal->isReductionVariable(Phi) || - Legal->isFixedOrderRecurrence(Phi)) && - "can only widen reductions and fixed-order recurrences here"); VPValue *StartV = Operands[0]; if (Legal->isReductionVariable(Phi)) { const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi); @@ -8291,12 +8295,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, PhiRecipe = new VPReductionPHIRecipe( Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc), ScaleFactor); - } else { + } else if (Legal->isFixedOrderRecurrence(Phi)) { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate // recurrences in the chain, the fixed order recurrence should be modeled // directly, enabling more efficient codegen. PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); + } else { + // Failed to identify phi as reduction or fixed-order recurrence. Keep the + // original VPWidenPHIRecipe for now, to be legalized later if possible. + setRecipe(Phi, R); + return nullptr; } // Add backedge value. PhiRecipe->addOperand(Operands[1]); @@ -8481,7 +8490,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, // TODO: Extract final value from induction recipe initially, optimize to // pre-computed end value together in optimizeInductionExitUsers. auto *VectorPhiR = - cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); + cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -8503,7 +8512,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, // which for FORs is a vector whose last element needs to be extracted. The // start value provides the value if the loop is bypassed. bool IsFOR = isa(VectorPhiR); - auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue(); + auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1); assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() && "Cannot handle loops with uncountable early exits"); if (IsFOR) @@ -8512,7 +8521,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, "vector.recur.extract"); StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx"; auto *ResumePhiR = ScalarPHBuilder.createScalarPhi( - {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name); + {ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name); ScalarPhiIRI->addOperand(ResumePhiR); } } @@ -8783,6 +8792,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range); if (!Recipe) { + if (isa(SingleDef)) + continue; SmallVector Operands(R.operands()); Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range); } @@ -8845,6 +8856,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); + // Try to convert remaining VPWidenPHIRecipes to reduction recipes. + if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis, + *Plan)) + return nullptr; // Apply mandatory transformation to handle FP maxnum/minnum reduction with // NaNs if possible, bail out otherwise. if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions, @@ -9317,6 +9332,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( PhiR->setOperand(0, StartV); } } + for (VPRecipeBase *R : ToDelete) R->eraseFromParent(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a5de5933d5ff1..89d797520100b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1896,7 +1896,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe, ~VPHeaderPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *B) { + static inline bool classof(const VPUser *U) { + auto *B = cast(U); return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && B->getVPDefID() <= VPDef::VPLastHeaderPHISC; } @@ -1905,6 +1906,10 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe, return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC && B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC; } + static inline bool classof(const VPSingleDefRecipe *B) { + return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && + B->getVPDefID() <= VPDef::VPLastHeaderPHISC; + } /// Generate the phi nodes. void execute(VPTransformState &State) override = 0; @@ -1966,7 +1971,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { return R && classof(R); } - static inline bool classof(const VPHeaderPHIRecipe *R) { + static inline bool classof(const VPSingleDefRecipe *R) { return classof(static_cast(R)); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 6c1f53b4eaa24..ad67b107562ae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -815,3 +815,148 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { MiddleTerm->setOperand(0, NewCond); return true; } + +bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { + using namespace VPlanPatternMatch; + for (auto &PhiR : make_early_inc_range( + Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { + if (!isa(&PhiR)) + continue; + + // Check if PhiR is a min/max reduction that has a user inside the loop + // outside the min/max reduction chain. The other user must be the compare + // of a FindLastIV reduction chain. + auto *MinMaxPhiR = cast(&PhiR); + auto *MinMaxOp = dyn_cast_or_null( + MinMaxPhiR->getOperand(1)->getDefiningRecipe()); + if (!MinMaxOp) + return false; + + // The incoming value must be a min/max instrinsic. + // TODO: Also handle the select variant. + Intrinsic::ID ID = Intrinsic::not_intrinsic; + if (auto *WideInt = dyn_cast(MinMaxOp)) + ID = WideInt->getVectorIntrinsicID(); + else { + auto *RepR = dyn_cast(MinMaxOp); + if (!RepR || !isa(RepR->getUnderlyingInstr())) + return false; + ID = cast(RepR->getUnderlyingInstr())->getIntrinsicID(); + } + RecurKind RdxKind = RecurKind::None; + switch (ID) { + case Intrinsic::umax: + RdxKind = RecurKind::UMax; + break; + case Intrinsic::umin: + RdxKind = RecurKind::UMin; + break; + case Intrinsic::smax: + RdxKind = RecurKind::SMax; + break; + case Intrinsic::smin: + RdxKind = RecurKind::SMin; + break; + default: + return false; + } + + // The min/max intrinsic must use the phi and itself must only be used by + // the phi and a resume-phi in the scalar preheader. + if (MinMaxOp->getOperand(0) != MinMaxPhiR && + MinMaxOp->getOperand(1) != MinMaxPhiR) + return false; + if (MinMaxPhiR->getNumUsers() != 2 || + any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) { + auto *Phi = dyn_cast(U); + return MinMaxPhiR != U && + (!Phi || Phi->getParent() != Plan.getScalarPreheader()); + })) + return false; + + // One user of MinMaxPhiR is MinMaxOp, the other users must be a compare + // that's part of a FindLastIV chain. + auto MinMaxUsers = to_vector(MinMaxPhiR->users()); + auto *Cmp = dyn_cast( + MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]); + VPValue *CmpOpA; + VPValue *CmpOpB; + if (!Cmp || Cmp->getNumUsers() != 1 || + !match(Cmp, m_Binary(m_VPValue(CmpOpA), + m_VPValue(CmpOpB)))) + return false; + + // Normalize the predicate so MinMaxPhiR is on the right side. + CmpInst::Predicate Pred = Cmp->getPredicate(); + if (CmpOpA == MinMaxPhiR) + Pred = CmpInst::getSwappedPredicate(Pred); + + // Determine if the predicate is not strict. + bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred); + // Account for a mis-match between RdxKind and the predicate. + switch (RdxKind) { + case RecurKind::UMin: + case RecurKind::SMin: + IsNonStrictPred |= ICmpInst::isGT(Pred); + break; + case RecurKind::UMax: + case RecurKind::SMax: + IsNonStrictPred |= ICmpInst::isLT(Pred); + break; + default: + llvm_unreachable("unsupported kind"); + } + + // TODO: Strict predicates need to find the first IV value for which the + // predicate holds, not the last. + if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred) + return false; + + // Cmp must be used by the select of a FindLastIV chain. + VPValue *Sel = dyn_cast(*Cmp->user_begin()); + VPValue *IVOp, *FindIV; + if (!Sel || + !match(Sel, + m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) || + Sel->getNumUsers() != 2 || !isa(IVOp)) + return false; + auto *FindIVPhiR = dyn_cast(FindIV); + if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind( + FindIVPhiR->getRecurrenceKind())) + return false; + + assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() && + "cannot handle inloop/ordered reductions yet"); + + auto NewPhiR = new VPReductionPHIRecipe( + cast(MinMaxPhiR->getUnderlyingInstr()), RdxKind, + *MinMaxPhiR->getOperand(0), false, false, 1); + NewPhiR->insertBefore(MinMaxPhiR); + MinMaxPhiR->replaceAllUsesWith(NewPhiR); + NewPhiR->addOperand(MinMaxPhiR->getOperand(1)); + MinMaxPhiR->eraseFromParent(); + + // The reduction using MinMaxPhiR needs adjusting to compute the correct + // result: + // 1. We need to find the last IV for which the condition based on the + // min/max recurrence is true, + // 2. Compare the partial min/max reduction result to its final value and, + // 3. Select the lanes of the partial FindLastIV reductions which + // correspond to the lanes matching the min/max reduction result. + VPInstruction *FindIVResult = cast( + *(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0))); + VPBuilder B(FindIVResult); + VPInstruction *MinMaxResult = + B.createNaryOp(VPInstruction::ComputeReductionResult, + {NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {}); + NewPhiR->getBackedgeValue()->replaceUsesWithIf( + MinMaxResult, [](VPUser &U, unsigned) { return isa(&U); }); + auto *FinalMinMaxCmp = B.createICmp( + CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult); + auto *FinalIVSelect = + B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3), + FindIVResult->getOperand(2)); + FindIVResult->setOperand(3, FinalIVSelect); + } + return true; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 880159f760922..35f6447715bc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -93,6 +93,11 @@ struct VPlanTransforms { GetIntOrFpInductionDescriptor, ScalarEvolution &SE, const TargetLibraryInfo &TLI); + /// Try to legalize unclassified phis by converting VPWidenPHIRecipes to + /// min-max reductions used by FindLastIV reductions if possible. Returns + /// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized. + static bool legalizeUnclassifiedPhis(VPlan &Plan); + /// Try to have all users of fixed-order recurrences appear after the recipe /// defining their previous value, by either sinking users or hoisting recipes /// defining their previous value (and its operands). Then introduce diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index cc7b9e26ca256..05a79678a4de9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -47,21 +47,69 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[VEC_PHI2]], <2 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[VEC_PHI3]], <2 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <2 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i64> [[TMP7]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP12]], <2 x i64> [[TMP8]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -131,21 +179,69 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[VEC_PHI2]], <2 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[VEC_PHI3]], <2 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <2 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i64> [[TMP7]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP12]], <2 x i64> [[TMP8]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -215,21 +311,69 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umax_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[VEC_PHI2]], <2 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[VEC_PHI3]], <2 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <2 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i64> [[TMP7]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP12]], <2 x i64> [[TMP8]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -299,21 +443,69 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smax_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[VEC_PHI2]], <2 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[VEC_PHI3]], <2 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <2 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <2 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP11]], <2 x i64> [[TMP7]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP12]], <2 x i64> [[TMP8]], <2 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP13]], <2 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 6b44586286c7b..9298cf24013b1 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -47,21 +47,69 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI2]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI3]], <4 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP7]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i64> [[TMP8]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -131,21 +179,69 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI2]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI3]], <4 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP7]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i64> [[TMP8]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -215,21 +311,69 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umax_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI2]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI3]], <4 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP7]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i64> [[TMP8]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -299,21 +443,69 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smax_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] +; CHECK-NEXT: [[TMP5]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI2]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP6]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI3]], <4 x i64> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[TMP6]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP7]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x i64> [[TMP8]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[RDX_MINMAX5:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index 48d2eee600151..a1a825fbc473e 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -5,21 +5,57 @@ define i64 @test_vectorize_select_smax_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smax_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -47,21 +83,57 @@ define i64 @test_vectorize_select_smax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smax_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -219,21 +291,57 @@ define i64 @test_vectorize_select_smax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smax_idx_min_ops_switched( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -303,21 +411,59 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_smax_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[X]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index bca6bf38da8d9..d9d241213ba4f 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -7,21 +7,57 @@ define i64 @test_vectorize_select_smin_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -49,21 +85,57 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -93,15 +165,15 @@ define i64 @test_vectorize_select_smin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV1]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: @@ -221,21 +293,57 @@ define i64 @test_vectorize_select_smin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_idx_min_ops_switched( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -305,21 +413,59 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_smin_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[X]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index cabb868a1dbe5..696264238a860 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -5,21 +5,57 @@ define i64 @test_vectorize_select_umax_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umax_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -47,21 +83,57 @@ define i64 @test_vectorize_select_umax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umax_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -219,21 +291,57 @@ define i64 @test_vectorize_select_umax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umax_idx_min_ops_switched( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -303,21 +411,59 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_umax_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[X]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umax.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index 549ee0167f2f2..9d36af3216520 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -7,21 +7,57 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -49,21 +85,57 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -93,15 +165,15 @@ define i64 @test_vectorize_select_umin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV1]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: @@ -221,21 +293,57 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_min_ops_switched( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -305,21 +413,59 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_umin_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[X]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -553,5 +699,49 @@ exit: ret i64 %res } +define i64 @test_cmp_use_find_iv_phi_twice(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_cmp_use_find_iv_phi_twice( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[ADD]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr i64, ptr %src, i64 %iv + %l = load i64, ptr %gep + %add = add i64 %min.val, %l + %cmp = icmp uge i64 %min.val, %add + %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + declare i64 @llvm.umin.i64(i64, i64) declare i16 @llvm.umin.i16(i16, i16) From 2688d03e11f51fed359fa6616fa0ed218c023692 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 22 Jul 2025 15:00:24 +0100 Subject: [PATCH 02/20] !fixup address review comments, thanks --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++--- llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 4 ++-- .../Transforms/LoopVectorize/AArch64/select-index.ll | 12 ++++-------- .../LoopVectorize/select-index-interleaving.ll | 12 ++++-------- .../LoopVectorize/select-smax-last-index.ll | 12 ++++-------- .../LoopVectorize/select-smin-last-index.ll | 12 ++++-------- .../LoopVectorize/select-umax-last-index.ll | 12 ++++-------- .../LoopVectorize/select-umin-last-index.ll | 12 ++++-------- 8 files changed, 29 insertions(+), 53 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cfd09dce3e6a4..ef0d6ec3ee04d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7263,9 +7263,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp)))); assert(IsExpectedPattern && "Unexpected reduction resume pattern"); MainResumeValue = OrigResumeV; - } else { - if (auto *VPI = dyn_cast(EpiRedHeaderPhi->getStartValue())) - MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); + } else if (auto *VPI = + dyn_cast(EpiRedHeaderPhi->getStartValue())) { + MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); } PHINode *MainResumePhi = cast(MainResumeValue); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index ad67b107562ae..6a8c5daa99245 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -835,9 +835,9 @@ bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { // The incoming value must be a min/max instrinsic. // TODO: Also handle the select variant. Intrinsic::ID ID = Intrinsic::not_intrinsic; - if (auto *WideInt = dyn_cast(MinMaxOp)) + if (auto *WideInt = dyn_cast(MinMaxOp)) { ID = WideInt->getVectorIntrinsicID(); - else { + } else { auto *RepR = dyn_cast(MinMaxOp); if (!RepR || !isa(RepR->getUnderlyingInstr())) return false; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index 05a79678a4de9..09302a310a397 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -62,9 +62,8 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -194,9 +193,8 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -326,9 +324,8 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -458,9 +455,8 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <2 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 9298cf24013b1..2ce99341b5215 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -62,9 +62,8 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -194,9 +193,8 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -326,9 +324,8 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] @@ -458,9 +455,8 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD4]] diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index a1a825fbc473e..3d36133b8cd9d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -17,8 +17,7 @@ define i64 @test_vectorize_select_smax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -95,8 +94,7 @@ define i64 @test_vectorize_select_smax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -303,8 +301,7 @@ define i64 @test_vectorize_select_smax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -425,8 +422,7 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index d9d241213ba4f..6a43810ffb051 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -19,8 +19,7 @@ define i64 @test_vectorize_select_smin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -97,8 +96,7 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -305,8 +303,7 @@ define i64 @test_vectorize_select_smin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -427,8 +424,7 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index 696264238a860..2b7bec09b9eb0 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -17,8 +17,7 @@ define i64 @test_vectorize_select_umax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -95,8 +94,7 @@ define i64 @test_vectorize_select_umax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -303,8 +301,7 @@ define i64 @test_vectorize_select_umax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -425,8 +422,7 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index 9d36af3216520..2970f02b5c800 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -19,8 +19,7 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -97,8 +96,7 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -305,8 +303,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] @@ -427,8 +424,7 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] From 26ebf5aa93bc987674469109bcbbee3d838f1e32 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 1 Aug 2025 11:54:42 +0100 Subject: [PATCH 03/20] !fixup remove stray new line --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ef0d6ec3ee04d..4d6ba39997a5c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9332,7 +9332,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( PhiR->setOperand(0, StartV); } } - for (VPRecipeBase *R : ToDelete) R->eraseFromParent(); From 2871d6cd0aecf62b475712cdca7bc3b8895e2233 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Sep 2025 19:15:33 +0100 Subject: [PATCH 04/20] !fixup fix build after merge --- .../Vectorize/LoopVectorizationLegality.cpp | 18 ++++++++++++++++-- .../lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++- .../Transforms/Vectorize/VPlanConstruction.cpp | 4 ++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 55aa099b713fc..3abc8e4b496a4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -801,7 +801,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. - PHINode *UnclassifiedPhi = nullptr; for (Instruction &I : *BB) { Result &= canVectorizeInstr(I); if (!DoExtraAnalysis && !Result) @@ -809,7 +808,22 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } } - if (size(TheLoop->getHeader()->phis()): + if (range_size(TheLoop->getHeader()->phis()) != FixedOrderRecurrences.size() + + Inductions.size() + + Reductions.size() && + none_of(TheLoop->getHeader()->phis(), [this](PHINode &P) { + auto I = Reductions.find(&P); + return I != Reductions.end() && + RecurrenceDescriptor::isFindLastIVRecurrenceKind( + I->second.getRecurrenceKind()); + })) { + reportVectorizationFailure("Found an unidentified PHI", + "value that could not be identified as " + "reduction is used outside the loop", + "NonReductionValueUsedOutsideLoop", ORE, TheLoop, + &*TheLoop->getHeader()->phis().begin()); + return false; + } if (!PrimaryInduction) { if (Inductions.empty()) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7cd5702b49e5f..fe71baa3bb158 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8593,9 +8593,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range); if (!Recipe) { - if (isa(SingleDef)) + if (isa(SingleDef)) continue; Recipe = RecipeBuilder.handleReplication(Instr, R.operands(), Range); + } RecipeBuilder.setRecipe(Instr, Recipe); if (isa(Recipe) && isa(Instr)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 74c1e67b5e8e6..d692e07ea59c5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -920,13 +920,13 @@ bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { using namespace VPlanPatternMatch; for (auto &PhiR : make_early_inc_range( Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { - if (!isa(&PhiR)) + if (!isa(&PhiR)) continue; // Check if PhiR is a min/max reduction that has a user inside the loop // outside the min/max reduction chain. The other user must be the compare // of a FindLastIV reduction chain. - auto *MinMaxPhiR = cast(&PhiR); + auto *MinMaxPhiR = cast(&PhiR); auto *MinMaxOp = dyn_cast_or_null( MinMaxPhiR->getOperand(1)->getDefiningRecipe()); if (!MinMaxOp) From 98fb1f7d20bd372fa6d733c7f2a75d08c88be46a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Sep 2025 22:05:58 +0100 Subject: [PATCH 05/20] !fixup detect multi-use min/max recurrences. --- llvm/include/llvm/Analysis/IVDescriptors.h | 24 +++++++- llvm/lib/Analysis/IVDescriptors.cpp | 39 ++++++++++++ .../Vectorize/LoopVectorizationLegality.cpp | 26 +++----- .../Transforms/Vectorize/LoopVectorize.cpp | 3 + .../Vectorize/VPlanConstruction.cpp | 59 +++---------------- .../LoopVectorize/AArch64/select-index.ll | 24 +++++--- .../select-index-interleaving.ll | 24 +++++--- .../LoopVectorize/select-smax-last-index.ll | 12 ++-- .../LoopVectorize/select-umax-last-index.ll | 12 ++-- .../LoopVectorize/select-umin-last-index.ll | 58 +++++++++--------- 10 files changed, 159 insertions(+), 122 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index f9e6da6d0846a..e183db5349d96 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -42,9 +42,13 @@ enum class RecurKind { And, ///< Bitwise or logical AND of integers. Xor, ///< Bitwise or logical XOR of integers. SMin, ///< Signed integer min implemented in terms of select(cmp()). + SMinMultiUse, ///< Signed integer min implemented in terms of select(cmp()). SMax, ///< Signed integer max implemented in terms of select(cmp()). + SMaxMultiUse, ///< Signed integer max implemented in terms of select(cmp()). UMin, ///< Unsigned integer min implemented in terms of select(cmp()). + UMinMultiUse, ///< Unsigned integer min implemented in terms of select(cmp()). UMax, ///< Unsigned integer max implemented in terms of select(cmp()). + UMaxMultiUse, ///< Unsigned integer max implemented in terms of select(cmp()). FAdd, ///< Sum of floats. FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). @@ -247,8 +251,24 @@ class RecurrenceDescriptor { /// Returns true if the recurrence kind is an integer min/max kind. static bool isIntMinMaxRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::UMin || Kind == RecurKind::UMax || - Kind == RecurKind::SMin || Kind == RecurKind::SMax; + return Kind == RecurKind::UMin || Kind == RecurKind::UMinMultiUse || + Kind == RecurKind::UMax || Kind == RecurKind::UMaxMultiUse || + Kind == RecurKind::SMin || Kind == RecurKind::SMinMultiUse || + Kind == RecurKind::SMax || Kind == RecurKind::SMaxMultiUse; + } + + static RecurKind convertFromMultiUseKind(RecurKind Kind) { + switch (Kind) { + case RecurKind::UMaxMultiUse: + return RecurKind::UMax; + case RecurKind::UMinMultiUse: + return RecurKind::UMin; + case RecurKind::SMinMultiUse: + return RecurKind::SMin; + case RecurKind::SMaxMultiUse: + return RecurKind::SMax; + } + return Kind; } /// Returns true if the recurrence kind is a floating-point min/max kind. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index b8c540ce4b99d..ade2871a6c281 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -214,6 +214,41 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, return true; } +static std::optional +getMultiUseMinMax(PHINode *Phi, RecurKind Kind, Loop *TheLoop) { + BasicBlock *Latch = TheLoop->getLoopLatch(); + if (!Latch) + return std::nullopt; + Value *Inc = Phi->getIncomingValueForBlock(Latch); + RecurKind RK; + if (Phi->hasOneUse() || + !RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) + return std::nullopt; + + Value *A, *B; + if (match(Inc, m_OneUse(m_UMin(m_Value(A), m_Value(B))))) + RK = RecurKind::UMinMultiUse; + else if (match(Inc, m_OneUse(m_UMax(m_Value(A), m_Value(B))))) + RK = RecurKind::UMaxMultiUse; + else if (match(Inc, m_OneUse(m_SMax(m_Value(A), m_Value(B))))) + RK = RecurKind::SMaxMultiUse; + else if (match(Inc, m_OneUse(m_SMin(m_Value(A), m_Value(B))))) + RK = RecurKind::SMinMultiUse; + else + return std::nullopt; + + if (RecurrenceDescriptor::convertFromMultiUseKind(RK) != Kind || A == B || + (A != Phi && B != Phi)) + return std::nullopt; + + SmallPtrSet CastInsts; + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); + RecurrenceDescriptor RD(RdxStart, nullptr, nullptr, RK, FastMathFlags(), + nullptr, Phi->getType(), false, false, CastInsts, + -1U); + return {RD}; +} + bool RecurrenceDescriptor::AddReductionVar( PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC, @@ -225,6 +260,10 @@ bool RecurrenceDescriptor::AddReductionVar( if (Phi->getParent() != TheLoop->getHeader()) return false; + if (auto RD = getMultiUseMinMax(Phi, Kind, TheLoop)) { + RedDes = *RD; + return true; + } // Obtain the reduction start value from the value that comes from the loop // preheader. Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 3abc8e4b496a4..85a6fcaf3ecdd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -808,23 +808,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } } - if (range_size(TheLoop->getHeader()->phis()) != FixedOrderRecurrences.size() + - Inductions.size() + - Reductions.size() && - none_of(TheLoop->getHeader()->phis(), [this](PHINode &P) { - auto I = Reductions.find(&P); - return I != Reductions.end() && - RecurrenceDescriptor::isFindLastIVRecurrenceKind( - I->second.getRecurrenceKind()); - })) { - reportVectorizationFailure("Found an unidentified PHI", - "value that could not be identified as " - "reduction is used outside the loop", - "NonReductionValueUsedOutsideLoop", ORE, TheLoop, - &*TheLoop->getHeader()->phis().begin()); - return false; - } - if (!PrimaryInduction) { if (Inductions.empty()) { reportVectorizationFailure( @@ -944,8 +927,15 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) { if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) && !IsDisallowedStridedPointerInduction(ID)) { addInductionPhi(Phi, ID, AllowedExit); + return true; } - return true; + + reportVectorizationFailure("Found an unidentified PHI", + "value that could not be identified as " + "reduction is used outside the loop", + "NonReductionValueUsedOutsideLoop", ORE, TheLoop, + Phi); + return false; } // end of PHI handling // We handle calls that: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index fe71baa3bb158..567f963648b56 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7929,6 +7929,9 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { SmallVector> PartialReductionChains; for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) { + if (RecurrenceDescriptor::isMinMaxRecurrenceKind( + RdxDesc.getRecurrenceKind())) + continue; getScaledReductions(Phi, RdxDesc.getLoopExitInstr(), Range, PartialReductionChains); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index d692e07ea59c5..f5fa461d1214b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -920,62 +920,19 @@ bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { using namespace VPlanPatternMatch; for (auto &PhiR : make_early_inc_range( Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { - if (!isa(&PhiR)) + auto *MinMaxPhiR = dyn_cast(&PhiR); + if (!MinMaxPhiR || !RecurrenceDescriptor::isIntMinMaxRecurrenceKind( + MinMaxPhiR->getRecurrenceKind())) continue; - // Check if PhiR is a min/max reduction that has a user inside the loop - // outside the min/max reduction chain. The other user must be the compare - // of a FindLastIV reduction chain. - auto *MinMaxPhiR = cast(&PhiR); - auto *MinMaxOp = dyn_cast_or_null( - MinMaxPhiR->getOperand(1)->getDefiningRecipe()); - if (!MinMaxOp) - return false; - - // The incoming value must be a min/max instrinsic. - // TODO: Also handle the select variant. - Intrinsic::ID ID = Intrinsic::not_intrinsic; - if (auto *WideInt = dyn_cast(MinMaxOp)) { - ID = WideInt->getVectorIntrinsicID(); - } else { - auto *RepR = dyn_cast(MinMaxOp); - if (!RepR || !isa(RepR->getUnderlyingInstr())) - return false; - ID = cast(RepR->getUnderlyingInstr())->getIntrinsicID(); - } - RecurKind RdxKind = RecurKind::None; - switch (ID) { - case Intrinsic::umax: - RdxKind = RecurKind::UMax; - break; - case Intrinsic::umin: - RdxKind = RecurKind::UMin; - break; - case Intrinsic::smax: - RdxKind = RecurKind::SMax; - break; - case Intrinsic::smin: - RdxKind = RecurKind::SMin; - break; - default: - return false; - } - - // The min/max intrinsic must use the phi and itself must only be used by - // the phi and a resume-phi in the scalar preheader. - if (MinMaxOp->getOperand(0) != MinMaxPhiR && - MinMaxOp->getOperand(1) != MinMaxPhiR) - return false; - if (MinMaxPhiR->getNumUsers() != 2 || - any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) { - auto *Phi = dyn_cast(U); - return MinMaxPhiR != U && - (!Phi || Phi->getParent() != Plan.getScalarPreheader()); - })) - return false; + RecurKind RdxKind = RecurrenceDescriptor::convertFromMultiUseKind( + MinMaxPhiR->getRecurrenceKind()); + if (RdxKind == MinMaxPhiR->getRecurrenceKind()) + continue; // One user of MinMaxPhiR is MinMaxOp, the other users must be a compare // that's part of a FindLastIV chain. + auto *MinMaxOp = cast(MinMaxPhiR->getBackedgeValue()); auto MinMaxUsers = to_vector(MinMaxPhiR->users()); auto *Cmp = dyn_cast( MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index 09302a310a397..644a332918954 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -88,17 +88,19 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -219,17 +221,19 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] @@ -350,17 +354,19 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] @@ -481,17 +487,19 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 2ce99341b5215..7bc067d3a8abd 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -88,17 +88,19 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -219,17 +221,19 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] @@ -350,17 +354,19 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] @@ -481,17 +487,19 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index 3d36133b8cd9d..1dd383d4a17bd 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -34,12 +34,13 @@ define i64 @test_vectorize_select_smax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -111,12 +112,13 @@ define i64 @test_vectorize_select_smax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -318,12 +320,13 @@ define i64 @test_vectorize_select_smax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -439,12 +442,13 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index 2b7bec09b9eb0..398fc32a5287f 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -34,12 +34,13 @@ define i64 @test_vectorize_select_umax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -111,12 +112,13 @@ define i64 @test_vectorize_select_umax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -318,12 +320,13 @@ define i64 @test_vectorize_select_umax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -439,12 +442,13 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index 2970f02b5c800..a8b9a16f30042 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -36,12 +36,13 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -113,12 +114,13 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -298,45 +300,46 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP3]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: @@ -441,12 +444,13 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] From ad994962f80e4c07f51597d07f794f927cd90fb4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 2 Oct 2025 14:07:07 +0100 Subject: [PATCH 06/20] !fixup add additional uses. --- llvm/include/llvm/Analysis/IVDescriptors.h | 4 +++- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index e183db5349d96..f1dab96fc5f1d 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -267,8 +267,10 @@ class RecurrenceDescriptor { return RecurKind::SMin; case RecurKind::SMaxMultiUse: return RecurKind::SMax; + default: + return Kind; } - return Kind; + llvm_unreachable("all cases must be handled above"); } /// Returns true if the recurrence kind is a floating-point min/max kind. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fedca65d241e8..4d32ebbed3c93 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -24956,6 +24956,10 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: + case RecurKind::UMaxMultiUse: + case RecurKind::UMinMultiUse: + case RecurKind::SMinMultiUse: + case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for repeated scalar."); } @@ -25097,6 +25101,10 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: + case RecurKind::UMaxMultiUse: + case RecurKind::UMinMultiUse: + case RecurKind::SMinMultiUse: + case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for repeated scalar."); } @@ -25203,6 +25211,10 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: + case RecurKind::UMaxMultiUse: + case RecurKind::UMinMultiUse: + case RecurKind::SMinMultiUse: + case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for reused scalars."); } From df7e6b845b18a912d3f664b5f3693ecc7e1d2ce7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 26 Oct 2025 07:07:17 +0000 Subject: [PATCH 07/20] !fixup replace MultiUse kinds with boolean flag --- llvm/include/llvm/Analysis/IVDescriptors.h | 40 +++++++------------ llvm/lib/Analysis/IVDescriptors.cpp | 15 ++++--- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 4 +- .../Vectorize/LoopVectorizationLegality.cpp | 5 +++ .../Transforms/Vectorize/LoopVectorize.cpp | 12 ++++-- .../Transforms/Vectorize/SLPVectorizer.cpp | 12 ------ llvm/lib/Transforms/Vectorize/VPlan.h | 14 +++++-- .../Vectorize/VPlanConstruction.cpp | 38 ++++++++---------- .../Transforms/Vectorize/VPlanTransforms.h | 8 ++-- 9 files changed, 70 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index fe5ae21b56d04..e784bb0a8a587 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -42,13 +42,9 @@ enum class RecurKind { And, ///< Bitwise or logical AND of integers. Xor, ///< Bitwise or logical XOR of integers. SMin, ///< Signed integer min implemented in terms of select(cmp()). - SMinMultiUse, ///< Signed integer min implemented in terms of select(cmp()). SMax, ///< Signed integer max implemented in terms of select(cmp()). - SMaxMultiUse, ///< Signed integer max implemented in terms of select(cmp()). UMin, ///< Unsigned integer min implemented in terms of select(cmp()). - UMinMultiUse, ///< Unsigned integer min implemented in terms of select(cmp()). UMax, ///< Unsigned integer max implemented in terms of select(cmp()). - UMaxMultiUse, ///< Unsigned integer max implemented in terms of select(cmp()). FAdd, ///< Sum of floats. FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). @@ -99,12 +95,15 @@ class RecurrenceDescriptor { RecurKind K, FastMathFlags FMF, Instruction *ExactFP, Type *RT, bool Signed, bool Ordered, SmallPtrSetImpl &CI, - unsigned MinWidthCastToRecurTy) + unsigned MinWidthCastToRecurTy, bool PhiMultiUse = false) : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), - IsSigned(Signed), IsOrdered(Ordered), + IsSigned(Signed), IsOrdered(Ordered), IsPhiMultiUse(PhiMultiUse), MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert_range(CI); + assert( + (!PhiMultiUse || isMinMaxRecurrenceKind(K)) && + "Only min/max recurrences are allowed to have multiple uses currently"); } /// This POD struct holds information about a potential recurrence operation. @@ -251,26 +250,8 @@ class RecurrenceDescriptor { /// Returns true if the recurrence kind is an integer min/max kind. static bool isIntMinMaxRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::UMin || Kind == RecurKind::UMinMultiUse || - Kind == RecurKind::UMax || Kind == RecurKind::UMaxMultiUse || - Kind == RecurKind::SMin || Kind == RecurKind::SMinMultiUse || - Kind == RecurKind::SMax || Kind == RecurKind::SMaxMultiUse; - } - - static RecurKind convertFromMultiUseKind(RecurKind Kind) { - switch (Kind) { - case RecurKind::UMaxMultiUse: - return RecurKind::UMax; - case RecurKind::UMinMultiUse: - return RecurKind::UMin; - case RecurKind::SMinMultiUse: - return RecurKind::SMin; - case RecurKind::SMaxMultiUse: - return RecurKind::SMax; - default: - return Kind; - } - llvm_unreachable("all cases must be handled above"); + return Kind == RecurKind::UMin || Kind == RecurKind::UMax || + Kind == RecurKind::SMin || Kind == RecurKind::SMax; } /// Returns true if the recurrence kind is a floating-point minnum/maxnum @@ -361,6 +342,10 @@ class RecurrenceDescriptor { /// Expose an ordered FP reduction to the instance users. bool isOrdered() const { return IsOrdered; } + /// Returns true if the reduction PHI has multiple in-loop users. This is + /// relevant for min/max reductions that are part of a FindLastIV pattern. + bool isPhiMultiUse() const { return IsPhiMultiUse; } + /// Attempts to find a chain of operations from Phi to LoopExitInst that can /// be treated as a set of reductions instructions for in-loop reductions. LLVM_ABI SmallVector getReductionOpChain(PHINode *Phi, @@ -398,6 +383,9 @@ class RecurrenceDescriptor { // Currently only a non-reassociative FAdd can be considered in-order, // if it is also the only FAdd in the PHI's use chain. bool IsOrdered = false; + // True if the reduction PHI has multiple in-loop users. This is relevant + // for min/max reductions that are part of a FindLastIV pattern. + bool IsPhiMultiUse = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; // The minimum width used by the recurrence. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 70a3c5f735a20..2242339a467cb 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -227,25 +227,24 @@ getMultiUseMinMax(PHINode *Phi, RecurKind Kind, Loop *TheLoop) { Value *A, *B; if (match(Inc, m_OneUse(m_UMin(m_Value(A), m_Value(B))))) - RK = RecurKind::UMinMultiUse; + RK = RecurKind::UMin; else if (match(Inc, m_OneUse(m_UMax(m_Value(A), m_Value(B))))) - RK = RecurKind::UMaxMultiUse; + RK = RecurKind::UMax; else if (match(Inc, m_OneUse(m_SMax(m_Value(A), m_Value(B))))) - RK = RecurKind::SMaxMultiUse; + RK = RecurKind::SMax; else if (match(Inc, m_OneUse(m_SMin(m_Value(A), m_Value(B))))) - RK = RecurKind::SMinMultiUse; + RK = RecurKind::SMin; else return std::nullopt; - if (RecurrenceDescriptor::convertFromMultiUseKind(RK) != Kind || A == B || - (A != Phi && B != Phi)) + if (A == B || (A != Phi && B != Phi)) return std::nullopt; SmallPtrSet CastInsts; Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); RecurrenceDescriptor RD(RdxStart, nullptr, nullptr, RK, FastMathFlags(), - nullptr, Phi->getType(), false, false, CastInsts, - -1U); + nullptr, Phi->getType(), false, false, CastInsts, -1U, + true); return {RD}; } diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 4fe736ac29b0a..62dabeb7283ec 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1217,7 +1217,9 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, RecurrenceDescriptor RdxDesc; if (!RecurrenceDescriptor::isReductionPHI(&Phi, L, RdxDesc, /*DemandedBits=*/nullptr, - /*AC=*/nullptr, /*DT=*/nullptr, SE)) + /*AC=*/nullptr, /*DT=*/nullptr, + SE) || + RdxDesc.isPhiMultiUse()) return std::nullopt; RecurKind RK = RdxDesc.getRecurrenceKind(); // Skip unsupported reductions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index fdfff16132093..aa2ee5355a1fc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -878,6 +878,11 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) { Requirements->addExactFPMathInst(RedDes.getExactFPMathInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; + assert((!RedDes.isPhiMultiUse() || + RecurrenceDescriptor::isMinMaxRecurrenceKind( + RedDes.getRecurrenceKind())) && + "Only min/max recurrences are allowed to have multiple uses " + "currently"); return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d90b8cc40e30f..addf7cf7d9044 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6515,6 +6515,11 @@ void LoopVectorizationCostModel::collectInLoopReductions() { PHINode *Phi = Reduction.first; const RecurrenceDescriptor &RdxDesc = Reduction.second; + // Multi-use reductions (e.g., used in FindLastIV patterns) are handled + // separately and should not be considered for in-loop reductions. + if (RdxDesc.isPhiMultiUse()) + continue; + // We don't collect reductions that are type promoted (yet). if (RdxDesc.getRecurrenceType() != Phi->getType()) continue; @@ -8072,7 +8077,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1); PhiRecipe = new VPReductionPHIRecipe( Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), - CM.useOrderedReductions(RdxDesc), ScaleFactor); + CM.useOrderedReductions(RdxDesc), ScaleFactor, + RdxDesc.isPhiMultiUse()); } else if (Legal->isFixedOrderRecurrence(Phi)) { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate @@ -8429,8 +8435,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); - // Try to convert remaining VPWidenPHIRecipes to reduction recipes. - if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis, + // Try to legalize reductions with multiple in-loop uses. + if (!VPlanTransforms::runPass(VPlanTransforms::legalizeMultiUseReductions, *Plan)) return nullptr; // Apply mandatory transformation to handle FP maxnum/minnum reduction with diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 89d8737d1ab5c..cdb9e7e381121 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -25091,10 +25091,6 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: - case RecurKind::UMaxMultiUse: - case RecurKind::UMinMultiUse: - case RecurKind::SMinMultiUse: - case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for repeated scalar."); } @@ -25236,10 +25232,6 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: - case RecurKind::UMaxMultiUse: - case RecurKind::UMinMultiUse: - case RecurKind::SMinMultiUse: - case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for repeated scalar."); } @@ -25346,10 +25338,6 @@ class HorizontalReduction { case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: - case RecurKind::UMaxMultiUse: - case RecurKind::UMinMultiUse: - case RecurKind::SMinMultiUse: - case RecurKind::SMaxMultiUse: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for reused scalars."); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8f99dd7141eba..943a38cfc7152 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2346,6 +2346,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// The phi is part of an ordered reduction. Requires IsInLoop to be true. bool IsOrdered; + /// The phi is part of a multi-use reduction (e.g., used in FindLastIV + /// patterns). + bool IsPhiMultiUse; + /// When expanding the reduction PHI, the plan's VF element count is divided /// by this factor to form the reduction phi's VF. unsigned VFScaleFactor = 1; @@ -2354,9 +2358,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Create a new VPReductionPHIRecipe for the reduction \p Phi. VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop = false, bool IsOrdered = false, - unsigned VFScaleFactor = 1) + unsigned VFScaleFactor = 1, bool IsPhiMultiUse = false) : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind), - IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) { + IsInLoop(IsInLoop), IsOrdered(IsOrdered), IsPhiMultiUse(IsPhiMultiUse), + VFScaleFactor(VFScaleFactor) { assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); } @@ -2365,7 +2370,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, VPReductionPHIRecipe *clone() override { auto *R = new VPReductionPHIRecipe( dyn_cast_or_null(getUnderlyingValue()), getRecurrenceKind(), - *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor); + *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor, IsPhiMultiUse); R->addOperand(getBackedgeValue()); return R; } @@ -2398,6 +2403,9 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Returns true, if the phi is part of an in-loop reduction. bool isInLoop() const { return IsInLoop; } + /// Returns true, if the phi is part of a multi-use reduction. + bool isPhiMultiUse() const { return IsPhiMultiUse; } + /// Returns true if the recipe only uses the first lane of operand \p Op. bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 9106be4df83c8..d3886fd7b7bba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -950,23 +950,25 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { return true; } -bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { - using namespace VPlanPatternMatch; +bool VPlanTransforms::legalizeMultiUseReductions(VPlan &Plan) { for (auto &PhiR : make_early_inc_range( Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { auto *MinMaxPhiR = dyn_cast(&PhiR); - if (!MinMaxPhiR || !RecurrenceDescriptor::isIntMinMaxRecurrenceKind( - MinMaxPhiR->getRecurrenceKind())) + if (!MinMaxPhiR) continue; - RecurKind RdxKind = RecurrenceDescriptor::convertFromMultiUseKind( - MinMaxPhiR->getRecurrenceKind()); - if (RdxKind == MinMaxPhiR->getRecurrenceKind()) + RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind(); + // TODO: check for multi-uses in VPlan directly. + if (!RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) || + !MinMaxPhiR->isPhiMultiUse()) continue; // One user of MinMaxPhiR is MinMaxOp, the other users must be a compare // that's part of a FindLastIV chain. - auto *MinMaxOp = cast(MinMaxPhiR->getBackedgeValue()); + auto *MinMaxOp = + dyn_cast(MinMaxPhiR->getBackedgeValue()); + if (!MinMaxOp || MinMaxOp->getNumUsers() != 2) + return false; auto MinMaxUsers = to_vector(MinMaxPhiR->users()); auto *Cmp = dyn_cast( MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]); @@ -1019,14 +1021,6 @@ bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() && "cannot handle inloop/ordered reductions yet"); - auto NewPhiR = new VPReductionPHIRecipe( - cast(MinMaxPhiR->getUnderlyingInstr()), RdxKind, - *MinMaxPhiR->getOperand(0), false, false, 1); - NewPhiR->insertBefore(MinMaxPhiR); - MinMaxPhiR->replaceAllUsesWith(NewPhiR); - NewPhiR->addOperand(MinMaxPhiR->getOperand(1)); - MinMaxPhiR->eraseFromParent(); - // The reduction using MinMaxPhiR needs adjusting to compute the correct // result: // 1. We need to find the last IV for which the condition based on the @@ -1034,13 +1028,15 @@ bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) { // 2. Compare the partial min/max reduction result to its final value and, // 3. Select the lanes of the partial FindLastIV reductions which // correspond to the lanes matching the min/max reduction result. - VPInstruction *FindIVResult = cast( + VPInstruction *FindIVResult = dyn_cast( *(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0))); + if (!FindIVResult) + return false; VPBuilder B(FindIVResult); - VPInstruction *MinMaxResult = - B.createNaryOp(VPInstruction::ComputeReductionResult, - {NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {}); - NewPhiR->getBackedgeValue()->replaceUsesWithIf( + VPInstruction *MinMaxResult = B.createNaryOp( + VPInstruction::ComputeReductionResult, + {MinMaxPhiR, MinMaxPhiR->getBackedgeValue()}, VPIRFlags(), {}); + MinMaxPhiR->getBackedgeValue()->replaceUsesWithIf( MinMaxResult, [](VPUser &U, unsigned) { return isa(&U); }); auto *FinalMinMaxCmp = B.createICmp( CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 3759c1c5d1358..18a0f8f9af0ef 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -143,10 +143,10 @@ struct VPlanTransforms { GetIntOrFpInductionDescriptor, const TargetLibraryInfo &TLI); - /// Try to legalize unclassified phis by converting VPWidenPHIRecipes to - /// min-max reductions used by FindLastIV reductions if possible. Returns - /// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized. - static bool legalizeUnclassifiedPhis(VPlan &Plan); + /// Try to legalize reductions with multiple in-loop uses. Currently only + /// min/max reductions used by FindLastIV reductions are supported. Otherwise + /// return false. + static bool legalizeMultiUseReductions(VPlan &Plan); /// Try to have all users of fixed-order recurrences appear after the recipe /// defining their previous value, by either sinking users or hoisting recipes From 76e661ac8ca1e2ce474cb6933e74dbc9b8a0bb81 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 19 Nov 2025 15:02:12 +0000 Subject: [PATCH 08/20] !fixup address comments, thanks --- llvm/include/llvm/Analysis/IVDescriptors.h | 20 ++++--- llvm/lib/Analysis/IVDescriptors.cpp | 57 +++++++++++-------- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 6 +- .../Vectorize/LoopVectorizationLegality.cpp | 2 +- .../Transforms/Vectorize/LoopVectorize.cpp | 36 ++++-------- llvm/lib/Transforms/Vectorize/VPlan.h | 20 ++++--- .../Vectorize/VPlanConstruction.cpp | 41 ++++++------- .../Transforms/Vectorize/VPlanTransforms.h | 2 +- .../LoopVectorize/AArch64/select-index.ll | 8 +-- .../select-index-interleaving.ll | 8 +-- .../LoopVectorize/select-smax-last-index.ll | 8 +-- .../LoopVectorize/select-smin-last-index.ll | 8 +-- .../LoopVectorize/select-umax-last-index.ll | 8 +-- .../LoopVectorize/select-umin-last-index.ll | 8 +-- 14 files changed, 116 insertions(+), 116 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index e784bb0a8a587..3df2146ab0b71 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -95,14 +95,17 @@ class RecurrenceDescriptor { RecurKind K, FastMathFlags FMF, Instruction *ExactFP, Type *RT, bool Signed, bool Ordered, SmallPtrSetImpl &CI, - unsigned MinWidthCastToRecurTy, bool PhiMultiUse = false) + unsigned MinWidthCastToRecurTy, + bool PhiHasLoopUsesOutsideReductionChain = false) : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), - IsSigned(Signed), IsOrdered(Ordered), IsPhiMultiUse(PhiMultiUse), + IsSigned(Signed), IsOrdered(Ordered), + PhiHasLoopUsesOutsideReductionChain( + PhiHasLoopUsesOutsideReductionChain), MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert_range(CI); assert( - (!PhiMultiUse || isMinMaxRecurrenceKind(K)) && + (!PhiHasLoopUsesOutsideReductionChain || isMinMaxRecurrenceKind(K)) && "Only min/max recurrences are allowed to have multiple uses currently"); } @@ -344,7 +347,9 @@ class RecurrenceDescriptor { /// Returns true if the reduction PHI has multiple in-loop users. This is /// relevant for min/max reductions that are part of a FindLastIV pattern. - bool isPhiMultiUse() const { return IsPhiMultiUse; } + bool hasLoopUsesOutsideReductionChain() const { + return PhiHasLoopUsesOutsideReductionChain; + } /// Attempts to find a chain of operations from Phi to LoopExitInst that can /// be treated as a set of reductions instructions for in-loop reductions. @@ -383,9 +388,10 @@ class RecurrenceDescriptor { // Currently only a non-reassociative FAdd can be considered in-order, // if it is also the only FAdd in the PHI's use chain. bool IsOrdered = false; - // True if the reduction PHI has multiple in-loop users. This is relevant - // for min/max reductions that are part of a FindLastIV pattern. - bool IsPhiMultiUse = false; + // True if the reduction PHI has in-loop users outside the reduction chain. + // This is relevant for min/max reductions that are part of a FindLastIV + // pattern. + bool PhiHasLoopUsesOutsideReductionChain = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; // The minimum width used by the recurrence. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index a022a6a3e70b6..8a495a5fb6ee2 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -214,38 +214,49 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, return true; } -static std::optional -getMultiUseMinMax(PHINode *Phi, RecurKind Kind, Loop *TheLoop) { +/// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi +/// is used in the loop outside the reduction chain. This is common for +static bool isMinMaxReductionWithLoopUsersOutsideReductionChain( + PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) { BasicBlock *Latch = TheLoop->getLoopLatch(); if (!Latch) - return std::nullopt; + return false; + assert(Phi->getNumIncomingValues() == 2 && + "phi must have exactly 2 incoming values"); Value *Inc = Phi->getIncomingValueForBlock(Latch); - RecurKind RK; if (Phi->hasOneUse() || !RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) - return std::nullopt; + return false; Value *A, *B; - if (match(Inc, m_OneUse(m_UMin(m_Value(A), m_Value(B))))) - RK = RecurKind::UMin; - else if (match(Inc, m_OneUse(m_UMax(m_Value(A), m_Value(B))))) - RK = RecurKind::UMax; - else if (match(Inc, m_OneUse(m_SMax(m_Value(A), m_Value(B))))) - RK = RecurKind::SMax; - else if (match(Inc, m_OneUse(m_SMin(m_Value(A), m_Value(B))))) - RK = RecurKind::SMin; - else - return std::nullopt; + bool Matched = [&]() { + switch (Kind) { + case RecurKind::UMax: + return match(Inc, m_OneUse(m_UMax(m_Value(A), m_Value(B)))); + case RecurKind::UMin: + return match(Inc, m_OneUse(m_UMin(m_Value(A), m_Value(B)))); + case RecurKind::SMax: + return match(Inc, m_OneUse(m_SMax(m_Value(A), m_Value(B)))); + case RecurKind::SMin: + return match(Inc, m_OneUse(m_SMin(m_Value(A), m_Value(B)))); + default: + llvm_unreachable("all min/max kinds must be handled"); + } + }(); + if (!Matched) + return false; if (A == B || (A != Phi && B != Phi)) - return std::nullopt; + return false; SmallPtrSet CastInsts; Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); - RecurrenceDescriptor RD(RdxStart, nullptr, nullptr, RK, FastMathFlags(), - nullptr, Phi->getType(), false, false, CastInsts, -1U, - true); - return {RD}; + RedDes = + RecurrenceDescriptor(RdxStart, /*Exit=*/nullptr, /*Store=*/nullptr, Kind, + FastMathFlags(), /*ExactFP=*/nullptr, Phi->getType(), + /*Signed=*/false, /*Ordered=*/false, CastInsts, + /*MinWidthCastToRecurTy=*/-1U, /*PhiMultiUse=*/true); + return true; } bool RecurrenceDescriptor::AddReductionVar( @@ -259,10 +270,10 @@ bool RecurrenceDescriptor::AddReductionVar( if (Phi->getParent() != TheLoop->getHeader()) return false; - if (auto RD = getMultiUseMinMax(Phi, Kind, TheLoop)) { - RedDes = *RD; + if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop, + RedDes)) return true; - } + // Obtain the reduction start value from the value that comes from the loop // preheader. Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 4ff5f174e2d38..1795591763c6d 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1251,9 +1251,9 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, RecurrenceDescriptor RdxDesc; if (!RecurrenceDescriptor::isReductionPHI(&Phi, L, RdxDesc, /*DemandedBits=*/nullptr, - /*AC=*/nullptr, /*DT=*/nullptr, - SE) || - RdxDesc.isPhiMultiUse()) + /*AC=*/nullptr, /*DT=*/nullptr, SE)) + return std::nullopt; + if (RdxDesc.hasLoopUsesOutsideReductionChain()) return std::nullopt; RecurKind RK = RdxDesc.getRecurrenceKind(); // Skip unsupported reductions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 99cc1c61aeec3..1b7ccc07f305c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -877,7 +877,7 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) { Requirements->addExactFPMathInst(RedDes.getExactFPMathInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; - assert((!RedDes.isPhiMultiUse() || + assert((!RedDes.hasLoopUsesOutsideReductionChain() || RecurrenceDescriptor::isMinMaxRecurrenceKind( RedDes.getRecurrenceKind())) && "Only min/max recurrences are allowed to have multiple uses " diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f124dc77e7177..1f5f792bfbe93 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6591,7 +6591,7 @@ void LoopVectorizationCostModel::collectInLoopReductions() { // Multi-use reductions (e.g., used in FindLastIV patterns) are handled // separately and should not be considered for in-loop reductions. - if (RdxDesc.isPhiMultiUse()) + if (RdxDesc.hasLoopUsesOutsideReductionChain()) continue; // We don't collect reductions that are type promoted (yet). @@ -7253,9 +7253,6 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( Value *StartV = getStartValueFromReductionResult(EpiRedResult); Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue(); using namespace llvm::PatternMatch; - MainResumeValue = cast(EpiRedHeaderPhi->getStartValue()) - ->getOperand(0) - ->getUnderlyingValue(); Value *Cmp, *OrigResumeV, *CmpOp; [[maybe_unused]] bool IsExpectedPattern = match(MainResumeValue, @@ -7266,9 +7263,6 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp)))); assert(IsExpectedPattern && "Unexpected reduction resume pattern"); MainResumeValue = OrigResumeV; - } else if (auto *VPI = - dyn_cast(EpiRedHeaderPhi->getStartValue())) { - MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); } PHINode *MainResumePhi = cast(MainResumeValue); @@ -8008,11 +8002,9 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { SmallVector> PartialReductionChains; for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) { - if (RecurrenceDescriptor::isMinMaxRecurrenceKind( - RdxDesc.getRecurrenceKind())) - continue; - getScaledReductions(Phi, RdxDesc.getLoopExitInstr(), Range, - PartialReductionChains); + if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr()) { + getScaledReductions(Phi, RdxExitInstr, Range, PartialReductionChains); + } } // A partial reduction is invalid if any of its extends are used by @@ -8193,6 +8185,9 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; + assert((Legal->isReductionVariable(Phi) || + Legal->isFixedOrderRecurrence(Phi)) && + "can only widen reductions and fixed-order recurrences here"); VPValue *StartV = R->getOperand(0); if (Legal->isReductionVariable(Phi)) { const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi); @@ -8205,18 +8200,13 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, PhiRecipe = new VPReductionPHIRecipe( Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc), ScaleFactor, - RdxDesc.isPhiMultiUse()); + RdxDesc.hasLoopUsesOutsideReductionChain()); } else if (Legal->isFixedOrderRecurrence(Phi)) { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate // recurrences in the chain, the fixed order recurrence should be modeled // directly, enabling more efficient codegen. PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV); - } else { - // Failed to identify phi as reduction or fixed-order recurrence. Keep the - // original VPWidenPHIRecipe for now, to be legalized later if possible. - setRecipe(Phi, R); - return nullptr; } // Add backedge value. PhiRecipe->addOperand(R->getOperand(1)); @@ -8490,12 +8480,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range); - if (!Recipe) { - if (isa(SingleDef)) - continue; + if (!Recipe) Recipe = RecipeBuilder.handleReplication(cast(SingleDef), Range); - } RecipeBuilder.setRecipe(Instr, Recipe); if (isa(Recipe) && isa(Instr)) { @@ -8543,8 +8530,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); - // Try to legalize reductions with multiple in-loop uses. - if (!VPlanTransforms::runPass(VPlanTransforms::legalizeMultiUseReductions, + // Apply mandatory transformation to handle reductions with multiple in-loop + // uses if possible, bail out otherwise. + if (!VPlanTransforms::runPass(VPlanTransforms::handleMultiUseReductions, *Plan)) return nullptr; // Apply mandatory transformation to handle FP maxnum/minnum reduction with diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 44deed712a91e..5cbc309496f83 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2072,9 +2072,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe, static inline bool classof(const VPValue *V) { return isa(V->getDefiningRecipe()); } - static inline bool classof(const VPSingleDefRecipe *B) { - return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC && - B->getVPDefID() <= VPDef::VPLastHeaderPHISC; + static inline bool classof(const VPSingleDefRecipe *R) { + return isa(static_cast(R)); } /// Generate the phi nodes. @@ -2425,7 +2424,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// The phi is part of a multi-use reduction (e.g., used in FindLastIV /// patterns). - bool IsPhiMultiUse; + bool HasLoopUsesOutsideReductionChain; /// When expanding the reduction PHI, the plan's VF element count is divided /// by this factor to form the reduction phi's VF. @@ -2435,9 +2434,11 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// Create a new VPReductionPHIRecipe for the reduction \p Phi. VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop = false, bool IsOrdered = false, - unsigned VFScaleFactor = 1, bool IsPhiMultiUse = false) + unsigned VFScaleFactor = 1, + bool HasLoopUsesOutsideReductionChain = false) : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind), - IsInLoop(IsInLoop), IsOrdered(IsOrdered), IsPhiMultiUse(IsPhiMultiUse), + IsInLoop(IsInLoop), IsOrdered(IsOrdered), + HasLoopUsesOutsideReductionChain(HasLoopUsesOutsideReductionChain), VFScaleFactor(VFScaleFactor) { assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); } @@ -2447,7 +2448,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, VPReductionPHIRecipe *clone() override { auto *R = new VPReductionPHIRecipe( dyn_cast_or_null(getUnderlyingValue()), getRecurrenceKind(), - *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor, IsPhiMultiUse); + *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor, + HasLoopUsesOutsideReductionChain); R->addOperand(getBackedgeValue()); return R; } @@ -2475,7 +2477,9 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, bool isInLoop() const { return IsInLoop; } /// Returns true, if the phi is part of a multi-use reduction. - bool isPhiMultiUse() const { return IsPhiMultiUse; } + bool hasLoopUsesOutsideReductionChain() const { + return HasLoopUsesOutsideReductionChain; + } /// Returns true if the recipe only uses the first lane of operand \p Op. bool usesFirstLaneOnly(const VPValue *Op) const override { diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 8fbf0ba9c6491..2e3eda40cd789 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -993,7 +993,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { return true; } -bool VPlanTransforms::legalizeMultiUseReductions(VPlan &Plan) { +bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { for (auto &PhiR : make_early_inc_range( Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { auto *MinMaxPhiR = dyn_cast(&PhiR); @@ -1002,10 +1002,13 @@ bool VPlanTransforms::legalizeMultiUseReductions(VPlan &Plan) { RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind(); // TODO: check for multi-uses in VPlan directly. - if (!RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) || - !MinMaxPhiR->isPhiMultiUse()) + if (!MinMaxPhiR->hasLoopUsesOutsideReductionChain()) continue; + assert( + RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) && + "only min/max recurrences support users outside the reduction chain"); + // One user of MinMaxPhiR is MinMaxOp, the other users must be a compare // that's part of a FindLastIV chain. auto *MinMaxOp = @@ -1022,45 +1025,33 @@ bool VPlanTransforms::legalizeMultiUseReductions(VPlan &Plan) { m_VPValue(CmpOpB)))) return false; - // Normalize the predicate so MinMaxPhiR is on the right side. CmpInst::Predicate Pred = Cmp->getPredicate(); - if (CmpOpA == MinMaxPhiR) - Pred = CmpInst::getSwappedPredicate(Pred); - - // Determine if the predicate is not strict. - bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred); - // Account for a mis-match between RdxKind and the predicate. - switch (RdxKind) { - case RecurKind::UMin: - case RecurKind::SMin: - IsNonStrictPred |= ICmpInst::isGT(Pred); - break; - case RecurKind::UMax: - case RecurKind::SMax: - IsNonStrictPred |= ICmpInst::isLT(Pred); - break; - default: - llvm_unreachable("unsupported kind"); - } - // TODO: Strict predicates need to find the first IV value for which the // predicate holds, not the last. - if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred) + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || + ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred)) return false; + // Normalize the predicate so MinMaxPhiR is on the right side. + if (CmpOpA == MinMaxPhiR) + Pred = CmpInst::getSwappedPredicate(Pred); + // Cmp must be used by the select of a FindLastIV chain. VPValue *Sel = dyn_cast(*Cmp->user_begin()); VPValue *IVOp, *FindIV; if (!Sel || !match(Sel, m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) || - Sel->getNumUsers() != 2 || !isa(IVOp)) + Sel->getNumUsers() != 2) return false; + auto *FindIVPhiR = dyn_cast(FindIV); if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind( FindIVPhiR->getRecurrenceKind())) return false; + assert(isa(IVOp) && + "IVOp must be a wide induction"); assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() && "cannot handle inloop/ordered reductions yet"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index be9d615b97278..b98d21866dfd2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -148,7 +148,7 @@ struct VPlanTransforms { /// Try to legalize reductions with multiple in-loop uses. Currently only /// min/max reductions used by FindLastIV reductions are supported. Otherwise /// return false. - static bool legalizeMultiUseReductions(VPlan &Plan); + static bool handleMultiUseReductions(VPlan &Plan); /// Try to have all users of fixed-order recurrences appear after the recipe /// defining their previous value, by either sinking users or hoisting recipes diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index 644a332918954..3a50995b3b94c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -72,7 +72,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -205,7 +205,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -338,7 +338,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -471,7 +471,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_IND]], <2 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <2 x i1> [[TMP4]], <2 x i64> [[STEP_ADD]], <2 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i64> [[STEP_ADD]], splat (i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 7bc067d3a8abd..9ec16311744c9 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -72,7 +72,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -205,7 +205,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -338,7 +338,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -471,7 +471,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index 1dd383d4a17bd..cd18114ce5ea0 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -22,7 +22,7 @@ define i64 @test_vectorize_select_smax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -100,7 +100,7 @@ define i64 @test_vectorize_select_smax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -308,7 +308,7 @@ define i64 @test_vectorize_select_smax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -430,7 +430,7 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index dd441abf5576a..5bbb8266e050d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -24,7 +24,7 @@ define i64 @test_vectorize_select_smin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -102,7 +102,7 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -310,7 +310,7 @@ define i64 @test_vectorize_select_smin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -432,7 +432,7 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index 398fc32a5287f..f719a8e9840ba 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -22,7 +22,7 @@ define i64 @test_vectorize_select_umax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -100,7 +100,7 @@ define i64 @test_vectorize_select_umax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -308,7 +308,7 @@ define i64 @test_vectorize_select_umax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -430,7 +430,7 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index a8b9a16f30042..693216a97bddc 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -24,7 +24,7 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -102,7 +102,7 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -310,7 +310,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -432,7 +432,7 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: From a057dd3f769d7ad1e2f62cc93db7ce3e8ccfd4d7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 20 Nov 2025 20:07:29 +0000 Subject: [PATCH 09/20] !fixup address comments, thanks --- llvm/lib/Analysis/IVDescriptors.cpp | 21 ++++++------- .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../Vectorize/VPlanConstruction.cpp | 30 +++++++++---------- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 8a495a5fb6ee2..d94bed88f50c7 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -215,35 +215,35 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, } /// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi -/// is used in the loop outside the reduction chain. This is common for +/// is used in the loop outside the reduction chain. This is common for loops +/// selecting the index of a minimum/maximum value (argmax). static bool isMinMaxReductionWithLoopUsersOutsideReductionChain( PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) { BasicBlock *Latch = TheLoop->getLoopLatch(); if (!Latch) return false; - assert(Phi->getNumIncomingValues() == 2 && - "phi must have exactly 2 incoming values"); + assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values"); Value *Inc = Phi->getIncomingValueForBlock(Latch); - if (Phi->hasOneUse() || + if (Phi->hasOneUse() || !Inc->hasOneUse() || !RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) return false; Value *A, *B; - bool Matched = [&]() { + bool IsMinMax = [&]() { switch (Kind) { case RecurKind::UMax: - return match(Inc, m_OneUse(m_UMax(m_Value(A), m_Value(B)))); + return match(Inc, m_UMax(m_Value(A), m_Value(B))); case RecurKind::UMin: - return match(Inc, m_OneUse(m_UMin(m_Value(A), m_Value(B)))); + return match(Inc, m_UMin(m_Value(A), m_Value(B))); case RecurKind::SMax: - return match(Inc, m_OneUse(m_SMax(m_Value(A), m_Value(B)))); + return match(Inc, m_SMax(m_Value(A), m_Value(B))); case RecurKind::SMin: - return match(Inc, m_OneUse(m_SMin(m_Value(A), m_Value(B)))); + return match(Inc, m_SMin(m_Value(A), m_Value(B))); default: llvm_unreachable("all min/max kinds must be handled"); } }(); - if (!Matched) + if (!IsMinMax) return false; if (A == B || (A != Phi && B != Phi)) @@ -270,6 +270,7 @@ bool RecurrenceDescriptor::AddReductionVar( if (Phi->getParent() != TheLoop->getHeader()) return false; + // Check for min/max reduction variables that feed other users in the loop. if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop, RedDes)) return true; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8ccf1918e1b26..1f09a0110aa32 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7264,7 +7264,6 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( assert(IsExpectedPattern && "Unexpected reduction resume pattern"); MainResumeValue = OrigResumeV; } - PHINode *MainResumePhi = cast(MainResumeValue); // When fixing reductions in the epilogue loop we should already have @@ -8227,7 +8226,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc), ScaleFactor, RdxDesc.hasLoopUsesOutsideReductionChain()); - } else if (Legal->isFixedOrderRecurrence(Phi)) { + } else { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate // recurrences in the chain, the fixed order recurrence should be modeled diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 2e3eda40cd789..eb61081b0efcc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -818,13 +818,14 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( Branch->setMetadata(LLVMContext::MD_prof, BranchWeights); } -/// If \p RedPhiR is used by a ComputeReductionResult recipe, return it. -/// Otherwise return nullptr. -static VPInstruction * -findComputeReductionResult(VPReductionPHIRecipe *RedPhiR) { - auto It = find_if(RedPhiR->users(), [](VPUser *U) { +/// If \p RedPhiR is used to compute are reduction result via a +/// ComputeReductionResult, ComputeFindIVResult or ComputeAnyOfResult +/// VPInstruction, return it. Otherwise return nullptr. +static VPInstruction *findComputeReductionResult(VPReductionPHIRecipe *RedPhiR, + unsigned Opcode) { + auto It = find_if(RedPhiR->users(), [Opcode](VPUser *U) { auto *VPI = dyn_cast(U); - return VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult; + return VPI && VPI->getOpcode() == Opcode; }); return It == RedPhiR->user_end() ? nullptr : cast(*It); } @@ -933,7 +934,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { // If we exit early due to NaNs, compute the final reduction result based on // the reduction phi at the beginning of the last vector iteration. - auto *RdxResult = findComputeReductionResult(RedPhiR); + auto *RdxResult = findComputeReductionResult( + RedPhiR, VPInstruction::ComputeReductionResult); auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR, RdxResult->getOperand(1)); @@ -997,14 +999,11 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { for (auto &PhiR : make_early_inc_range( Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { auto *MinMaxPhiR = dyn_cast(&PhiR); - if (!MinMaxPhiR) - continue; - - RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind(); // TODO: check for multi-uses in VPlan directly. - if (!MinMaxPhiR->hasLoopUsesOutsideReductionChain()) + if (!MinMaxPhiR || !MinMaxPhiR->hasLoopUsesOutsideReductionChain()) continue; + RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind(); assert( RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) && "only min/max recurrences support users outside the reduction chain"); @@ -1062,10 +1061,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // 2. Compare the partial min/max reduction result to its final value and, // 3. Select the lanes of the partial FindLastIV reductions which // correspond to the lanes matching the min/max reduction result. - VPInstruction *FindIVResult = dyn_cast( - *(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0))); - if (!FindIVResult) - return false; + VPInstruction *FindIVResult = + cast(findComputeReductionResult( + FindIVPhiR, VPInstruction::ComputeFindIVResult)); VPBuilder B(FindIVResult); VPInstruction *MinMaxResult = B.createNaryOp( VPInstruction::ComputeReductionResult, From fd90ad91ede7f4dd855f84cecd7fa852bca45e5f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 21 Nov 2025 11:40:39 +0000 Subject: [PATCH 10/20] !fixup address comments, thanks --- llvm/lib/Analysis/IVDescriptors.cpp | 2 +- .../Transforms/Vectorize/LoopVectorize.cpp | 6 +- llvm/lib/Transforms/Vectorize/VPlan.h | 4 +- .../Vectorize/VPlanConstruction.cpp | 30 +++++----- .../LoopVectorize/AArch64/select-index.ll | 58 ++++++++----------- .../select-index-interleaving.ll | 34 +++++------ .../LoopVectorize/select-smax-last-index.ll | 12 ++-- .../LoopVectorize/select-smin-last-index.ll | 12 ++-- .../LoopVectorize/select-umax-last-index.ll | 12 ++-- .../LoopVectorize/select-umin-last-index.ll | 12 ++-- 10 files changed, 74 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index d94bed88f50c7..fd562a4ca93fd 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -216,7 +216,7 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, /// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi /// is used in the loop outside the reduction chain. This is common for loops -/// selecting the index of a minimum/maximum value (argmax). +/// selecting the index of a minimum/maximum value (argmin/argmax). static bool isMinMaxReductionWithLoopUsersOutsideReductionChain( PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) { BasicBlock *Latch = TheLoop->getLoopLatch(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d2ac4e11e800a..794991a44016e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7999,10 +7999,8 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { SmallVector>> ChainsByPhi; for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) - if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr()) { - getScaledReductions(Phi, RdxDesc.getLoopExitInstr(), Range, - ChainsByPhi[Phi]); - } + if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr()) + getScaledReductions(Phi, RdxExitInstr, Range, ChainsByPhi[Phi]); // A partial reduction is invalid if any of its extends are used by // something that isn't another partial reduction. This is because the diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 778c94c9f6c39..5c182742828b8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2418,7 +2418,9 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, bool IsOrdered; /// The phi is part of a multi-use reduction (e.g., used in FindLastIV - /// patterns). + /// patterns for argmin/argmax). + /// TODO: Also support cases where the phi itself has a single use, but its + /// compare has multiple uses. bool HasLoopUsesOutsideReductionChain; /// When expanding the reduction PHI, the plan's VF element count is divided diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index eb61081b0efcc..85596ac61f560 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -818,11 +818,10 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( Branch->setMetadata(LLVMContext::MD_prof, BranchWeights); } -/// If \p RedPhiR is used to compute are reduction result via a -/// ComputeReductionResult, ComputeFindIVResult or ComputeAnyOfResult -/// VPInstruction, return it. Otherwise return nullptr. -static VPInstruction *findComputeReductionResult(VPReductionPHIRecipe *RedPhiR, - unsigned Opcode) { +/// If \p RedPhiR is used by a VPInstruction with \p Opcode, return it. +/// Otherwise return nullptr; +static VPInstruction *findReductionUser(VPReductionPHIRecipe *RedPhiR, + unsigned Opcode) { auto It = find_if(RedPhiR->users(), [Opcode](VPUser *U) { auto *VPI = dyn_cast(U); return VPI && VPI->getOpcode() == Opcode; @@ -934,8 +933,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { // If we exit early due to NaNs, compute the final reduction result based on // the reduction phi at the beginning of the last vector iteration. - auto *RdxResult = findComputeReductionResult( - RedPhiR, VPInstruction::ComputeReductionResult); + auto *RdxResult = + findReductionUser(RedPhiR, VPInstruction::ComputeReductionResult); auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR, RdxResult->getOperand(1)); @@ -1008,7 +1007,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) && "only min/max recurrences support users outside the reduction chain"); - // One user of MinMaxPhiR is MinMaxOp, the other users must be a compare + // One user of MinMaxPhiR is MinMaxOp, the other user must be a compare // that's part of a FindLastIV chain. auto *MinMaxOp = dyn_cast(MinMaxPhiR->getBackedgeValue()); @@ -1061,15 +1060,14 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // 2. Compare the partial min/max reduction result to its final value and, // 3. Select the lanes of the partial FindLastIV reductions which // correspond to the lanes matching the min/max reduction result. - VPInstruction *FindIVResult = - cast(findComputeReductionResult( - FindIVPhiR, VPInstruction::ComputeFindIVResult)); + auto *FindIVResult = cast( + findReductionUser(FindIVPhiR, VPInstruction::ComputeFindIVResult)); + auto *MinMaxResult = cast( + findReductionUser(MinMaxPhiR, VPInstruction::ComputeReductionResult)); + MinMaxResult->moveBefore(*FindIVResult->getParent(), + FindIVResult->getIterator()); + VPBuilder B(FindIVResult); - VPInstruction *MinMaxResult = B.createNaryOp( - VPInstruction::ComputeReductionResult, - {MinMaxPhiR, MinMaxPhiR->getBackedgeValue()}, VPIRFlags(), {}); - MinMaxPhiR->getBackedgeValue()->replaceUsesWithIf( - MinMaxResult, [](VPUser &U, unsigned) { return isa(&U); }); auto *FinalMinMaxCmp = B.createICmp( CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult); auto *FinalIVSelect = diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index 3a50995b3b94c..559f72f881678 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -9,7 +9,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -28,7 +28,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -58,8 +58,8 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 @@ -88,19 +88,17 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -119,7 +117,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -142,7 +140,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MIN_VAL]], [[L]] @@ -161,7 +159,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp sgt i64 %min.val, %l @@ -221,19 +219,17 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] @@ -275,7 +271,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[MIN_VAL]], [[L]] @@ -294,7 +290,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ult i64 %min.val, %l @@ -324,8 +320,8 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 @@ -354,19 +350,17 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] @@ -385,7 +379,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ule i64 %min.val, %l @@ -408,7 +402,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[MIN_VAL]], [[L]] @@ -427,7 +421,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp slt i64 %min.val, %l @@ -457,8 +451,8 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 @@ -487,19 +481,17 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[TMP5]], <2 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] @@ -518,7 +510,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp sle i64 %min.val, %l diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 9ec16311744c9..48ea761f7365d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -9,7 +9,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 50, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]] @@ -28,7 +28,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 50, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ugt i64 %min.val, %l @@ -58,8 +58,8 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 @@ -88,19 +88,17 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 50, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -119,7 +117,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 50, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -221,19 +219,17 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] @@ -354,19 +350,17 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[L]] @@ -487,19 +481,17 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX5]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 -; CHECK-NEXT: [[RDX_MINMAX6:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP6]]) -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX6]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX7:%.*]] = phi i64 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX7]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[L]] diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index cd18114ce5ea0..9634e4d736f0d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -34,13 +34,12 @@ define i64 @test_vectorize_select_smax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -112,13 +111,12 @@ define i64 @test_vectorize_select_smax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -320,13 +318,12 @@ define i64 @test_vectorize_select_smax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -442,13 +439,12 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index 5bbb8266e050d..7e4a232d8dadf 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -36,13 +36,12 @@ define i64 @test_vectorize_select_smin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -114,13 +113,12 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -322,13 +320,12 @@ define i64 @test_vectorize_select_smin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -444,13 +441,12 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index f719a8e9840ba..e8fa9f8e353eb 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -34,13 +34,12 @@ define i64 @test_vectorize_select_umax_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -112,13 +111,12 @@ define i64 @test_vectorize_select_umax_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -320,13 +318,12 @@ define i64 @test_vectorize_select_umax_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -442,13 +439,12 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index 693216a97bddc..c6722f53f1a78 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -36,13 +36,12 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -114,13 +113,12 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -322,13 +320,12 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -444,13 +441,12 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) ; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] From da55075c3f643a9ea20d48b861a644ce9580bf91 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 21 Nov 2025 16:41:54 +0000 Subject: [PATCH 11/20] !fixup address latest comments, thanks --- .../Vectorize/VPlanConstruction.cpp | 47 ++++++++-------- .../LoopVectorize/select-umin-last-index.ll | 54 +++++++++++++++---- 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 85596ac61f560..a05586df33c6f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -818,15 +818,19 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( Branch->setMetadata(LLVMContext::MD_prof, BranchWeights); } -/// If \p RedPhiR is used by a VPInstruction with \p Opcode, return it. -/// Otherwise return nullptr; -static VPInstruction *findReductionUser(VPReductionPHIRecipe *RedPhiR, - unsigned Opcode) { - auto It = find_if(RedPhiR->users(), [Opcode](VPUser *U) { - auto *VPI = dyn_cast(U); - return VPI && VPI->getOpcode() == Opcode; - }); - return It == RedPhiR->user_end() ? nullptr : cast(*It); +/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise +/// return nullptr; +template +static VPRecipeBase *findUser(VPValue *V, const MatchT &P) { + auto It = find_if(V->users(), match_fn(P)); + return It == V->user_end() ? nullptr : cast(*It); +} + +/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise +/// return nullptr. +template +static VPInstruction *findUserVPInstruction(VPValue *V) { + return cast_or_null(findUser(V, m_VPInstruction())); } bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { @@ -934,7 +938,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { // If we exit early due to NaNs, compute the final reduction result based on // the reduction phi at the beginning of the last vector iteration. auto *RdxResult = - findReductionUser(RedPhiR, VPInstruction::ComputeReductionResult); + findUserVPInstruction(RedPhiR); auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR, RdxResult->getOperand(1)); @@ -1013,17 +1017,15 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { dyn_cast(MinMaxPhiR->getBackedgeValue()); if (!MinMaxOp || MinMaxOp->getNumUsers() != 2) return false; - auto MinMaxUsers = to_vector(MinMaxPhiR->users()); - auto *Cmp = dyn_cast( - MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]); VPValue *CmpOpA; VPValue *CmpOpB; - if (!Cmp || Cmp->getNumUsers() != 1 || - !match(Cmp, m_Binary(m_VPValue(CmpOpA), - m_VPValue(CmpOpB)))) + CmpInst::Predicate Pred; + auto *Cmp = dyn_cast_or_null( + findUser(MinMaxPhiR, m_Cmp(m_VPValue(CmpOpA), m_VPValue(CmpOpB)))); + if (!Cmp || Cmp->getNumUsers() != 1) return false; - CmpInst::Predicate Pred = Cmp->getPredicate(); + Pred = Cmp->getPredicate(); // TODO: Strict predicates need to find the first IV value for which the // predicate holds, not the last. if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || @@ -1035,7 +1037,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { Pred = CmpInst::getSwappedPredicate(Pred); // Cmp must be used by the select of a FindLastIV chain. - VPValue *Sel = dyn_cast(*Cmp->user_begin()); + VPValue *Sel = dyn_cast(Cmp->getSingleUser()); VPValue *IVOp, *FindIV; if (!Sel || !match(Sel, @@ -1060,10 +1062,11 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // 2. Compare the partial min/max reduction result to its final value and, // 3. Select the lanes of the partial FindLastIV reductions which // correspond to the lanes matching the min/max reduction result. - auto *FindIVResult = cast( - findReductionUser(FindIVPhiR, VPInstruction::ComputeFindIVResult)); - auto *MinMaxResult = cast( - findReductionUser(MinMaxPhiR, VPInstruction::ComputeReductionResult)); + VPInstruction *FindIVResult = + findUserVPInstruction(FindIVPhiR); + VPInstruction *MinMaxResult = + findUserVPInstruction( + MinMaxPhiR); MinMaxResult->moveBefore(*FindIVResult->getParent(), FindIVResult->getIterator()); diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index c6722f53f1a78..4299082701244 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -699,22 +699,58 @@ define i64 @test_cmp_use_find_iv_phi_twice(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_use_find_iv_phi_twice( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[TMP1]] +; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[ADD]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: From 5a442b276e2e33fbaf54a0b2685481cec2ed881e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 21 Nov 2025 19:37:50 +0000 Subject: [PATCH 12/20] !fixup add argmin/argmax tests with fmin/fmax. --- .../LoopVectorize/select-fmax-last-index.ll | 132 ++++++++++++++++++ .../LoopVectorize/select-fmin-last-index.ll | 132 ++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll create mode 100644 llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll diff --git a/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll new file mode 100644 index 0000000000000..a115775c50486 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +define i64 @test_vectorize_select_fmaxnum_idx(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmaxnum_idx( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]] +; CHECK-NEXT: [[MAX_VAL_NEXT]] = call float @llvm.maxnum.f32(float [[L]], float [[MAX_VAL]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %max.val, %l + %max.val.next = call float @llvm.maxnum.f32(float %l, float %max.val) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + +define i64 @test_vectorize_select_fmax_idx_without_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_without_fmfs( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]] +; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ogt float [[L]], [[MAX_VAL]] +; CHECK-NEXT: [[MAX_VAL_NEXT]] = select i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]] +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %max.val, %l + %max.cmp = fcmp ogt float %l, %max.val + %max.val.next = select i1 %max.cmp, float %l, float %max.val + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + +define i64 @test_vectorize_select_fmax_idx_with_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_with_fmfs( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]] +; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp fast ogt float [[L]], [[MAX_VAL]] +; CHECK-NEXT: [[MAX_VAL_NEXT]] = select fast i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]] +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %max.val, %l + %max.cmp = fcmp fast ogt float %l, %max.val + %max.val.next = select fast i1 %max.cmp, float %l, float %max.val + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} diff --git a/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll new file mode 100644 index 0000000000000..0c0dd43338442 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +define i64 @test_vectorize_select_fminnum_idx(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fminnum_idx( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = call float @llvm.minnum.f32(float [[L]], float [[MIN_VAL]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi float [ -3.0000, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %min.val, %l + %min.val.next = call float @llvm.minnum.f32(float %l, float %min.val) + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + +define i64 @test_vectorize_select_fmin_idx_without_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmin_idx_without_fmfs( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi float [ 1.000000e+01, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_CMP:%.*]] = fcmp olt float [[L]], [[MIN_VAL]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = select i1 [[MIN_CMP]], float [[L]], float [[MIN_VAL]] +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi float [ 10.0000, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %min.val, %l + %min.cmp = fcmp olt float %l, %min.val + %min.val.next = select i1 %min.cmp, float %l, float %min.val + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} + +define i64 @test_vectorize_select_fmin_idx_with_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmin_idx_with_fmfs( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi float [ -5.000000e+00, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MIN_VAL]], [[L]] +; CHECK-NEXT: [[MIN_CMP:%.*]] = fcmp fast olt float [[L]], [[MIN_VAL]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = select fast i1 [[MIN_CMP]], float [[L]], float [[MIN_VAL]] +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %min.val = phi float [ -5.0000, %entry ], [ %min.val.next, %loop ] + %gep = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep + %cmp = fcmp uge float %min.val, %l + %min.cmp = fcmp fast olt float %l, %min.val + %min.val.next = select fast i1 %min.cmp, float %l, float %min.val + %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + %res = phi i64 [ %min.idx.next, %loop ] + ret i64 %res +} From 87325fdfeff11197a6410aaf81c6f87bfdaa5dcf Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 22 Nov 2025 14:50:04 +0000 Subject: [PATCH 13/20] !fixup address latest comments, thanks --- llvm/lib/Analysis/IVDescriptors.cpp | 1 + .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../Vectorize/VPlanConstruction.cpp | 37 +++-- .../LoopVectorize/AArch64/select-index.ll | 28 ++-- .../LoopVectorize/select-fmax-last-index.ll | 58 ++++--- .../LoopVectorize/select-fmin-last-index.ll | 24 ++- .../LoopVectorize/select-smax-last-index.ll | 47 +----- .../LoopVectorize/select-smin-last-index.ll | 47 +----- .../LoopVectorize/select-umax-last-index.ll | 47 +----- .../LoopVectorize/select-umin-last-index.ll | 151 +++++------------- 10 files changed, 132 insertions(+), 311 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index fd562a4ca93fd..dfadcdb075098 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -222,6 +222,7 @@ static bool isMinMaxReductionWithLoopUsersOutsideReductionChain( BasicBlock *Latch = TheLoop->getLoopLatch(); if (!Latch) return false; + assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values"); Value *Inc = Phi->getIncomingValueForBlock(Latch); if (Phi->hasOneUse() || !Inc->hasOneUse() || diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 794991a44016e..05fd837f516d3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7998,9 +7998,10 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) { MapVector>> ChainsByPhi; - for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) + for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) { if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr()) getScaledReductions(Phi, RdxExitInstr, Range, ChainsByPhi[Phi]); + } // A partial reduction is invalid if any of its extends are used by // something that isn't another partial reduction. This is because the diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index a05586df33c6f..2ac9951bb3bfb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -821,16 +821,15 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck( /// If \p V is used by a recipe matching pattern \p P, return it. Otherwise /// return nullptr; template -static VPRecipeBase *findUser(VPValue *V, const MatchT &P) { +static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) { auto It = find_if(V->users(), match_fn(P)); return It == V->user_end() ? nullptr : cast(*It); } /// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise /// return nullptr. -template -static VPInstruction *findUserVPInstruction(VPValue *V) { - return cast_or_null(findUser(V, m_VPInstruction())); +template static VPInstruction *findUserOf(VPValue *V) { + return cast_or_null(findUserOf(V, m_VPInstruction())); } bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { @@ -938,7 +937,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { // If we exit early due to NaNs, compute the final reduction result based on // the reduction phi at the beginning of the last vector iteration. auto *RdxResult = - findUserVPInstruction(RedPhiR); + findUserOf(RedPhiR); auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR, RdxResult->getOperand(1)); @@ -1017,15 +1016,28 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { dyn_cast(MinMaxPhiR->getBackedgeValue()); if (!MinMaxOp || MinMaxOp->getNumUsers() != 2) return false; + + assert((isa(MinMaxOp) || + (isa(MinMaxOp) && + isa( + cast(MinMaxOp)->getUnderlyingValue()))) && + "MinMaxOp must be a wide or scalar intrinsic"); + VPValue *MinMaxOpA = MinMaxOp->getOperand(0); + VPValue *MinMaxOpB = MinMaxOp->getOperand(1); + if (MinMaxOpA != MinMaxPhiR) + std::swap(MinMaxOpA, MinMaxOpB); + if (MinMaxOpA != MinMaxPhiR) + return false; + VPValue *CmpOpA; VPValue *CmpOpB; - CmpInst::Predicate Pred; - auto *Cmp = dyn_cast_or_null( - findUser(MinMaxPhiR, m_Cmp(m_VPValue(CmpOpA), m_VPValue(CmpOpB)))); - if (!Cmp || Cmp->getNumUsers() != 1) + CmpPredicate Pred; + auto *Cmp = dyn_cast_or_null(findUserOf( + MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB)))); + if (!Cmp || Cmp->getNumUsers() != 1 || + (CmpOpA != MinMaxOpB && CmpOpB != MinMaxOpB)) return false; - Pred = Cmp->getPredicate(); // TODO: Strict predicates need to find the first IV value for which the // predicate holds, not the last. if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || @@ -1063,10 +1075,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // 3. Select the lanes of the partial FindLastIV reductions which // correspond to the lanes matching the min/max reduction result. VPInstruction *FindIVResult = - findUserVPInstruction(FindIVPhiR); + findUserOf(FindIVPhiR); VPInstruction *MinMaxResult = - findUserVPInstruction( - MinMaxPhiR); + findUserOf(MinMaxPhiR); MinMaxResult->moveBefore(*FindIVResult->getParent(), FindIVResult->getIterator()); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index 559f72f881678..fea09221a4a80 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -140,7 +140,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MIN_VAL]], [[L]] @@ -159,7 +159,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp sgt i64 %min.val, %l @@ -271,7 +271,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[MIN_VAL]], [[L]] @@ -290,7 +290,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ult i64 %min.val, %l @@ -320,8 +320,8 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 @@ -355,7 +355,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -379,7 +379,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ule i64 %min.val, %l @@ -402,7 +402,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[MIN_VAL]], [[L]] @@ -421,7 +421,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp slt i64 %min.val, %l @@ -451,8 +451,8 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 @@ -486,7 +486,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -510,7 +510,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp sle i64 %min.val, %l diff --git a/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll index a115775c50486..4d8784836b00f 100644 --- a/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-fmax-last-index.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s -define i64 @test_vectorize_select_fmaxnum_idx(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fmaxnum_idx( +define i64 @test_vectorize_select_fmaxnum_last_idx(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmaxnum_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -27,42 +27,41 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ] %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] %gep = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep %cmp = fcmp uge float %max.val, %l %max.val.next = call float @llvm.maxnum.f32(float %l, float %max.val) - %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %exit, label %loop exit: - %res = phi i64 [ %min.idx.next, %loop ] + %res = phi i64 [ %max.idx.next, %loop ] ret i64 %res } -define i64 @test_vectorize_select_fmax_idx_without_fmfs(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_without_fmfs( +define i64 @test_vectorize_select_fmax_last_idx_without_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmax_last_idx_without_fmfs( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]] -; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ogt float [[L]], [[MAX_VAL]] +; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ule float [[MAX_VAL]], [[L]] ; CHECK-NEXT: [[MAX_VAL_NEXT]] = select i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]] -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[MAX_CMP]], i64 [[IV]], i64 [[MAX_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -70,43 +69,41 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ] %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] %gep = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep - %cmp = fcmp uge float %max.val, %l - %max.cmp = fcmp ogt float %l, %max.val - %max.val.next = select i1 %max.cmp, float %l, float %max.val - %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %cmp = fcmp ule float %max.val, %l + %max.val.next = select i1 %cmp, float %l, float %max.val + %max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %exit, label %loop exit: - %res = phi i64 [ %min.idx.next, %loop ] + %res = phi i64 [ %max.idx.next, %loop ] ret i64 %res } -define i64 @test_vectorize_select_fmax_idx_with_fmfs(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_with_fmfs( +define i64 @test_vectorize_select_fmax_last_idx_with_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmax_last_idx_with_fmfs( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]] -; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp fast ogt float [[L]], [[MAX_VAL]] +; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ule float [[MAX_VAL]], [[L]] ; CHECK-NEXT: [[MAX_VAL_NEXT]] = select fast i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]] -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[MAX_CMP]], i64 [[IV]], i64 [[MAX_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -114,19 +111,18 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] + %max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ] %max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ] %gep = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep - %cmp = fcmp uge float %max.val, %l - %max.cmp = fcmp fast ogt float %l, %max.val - %max.val.next = select fast i1 %max.cmp, float %l, float %max.val - %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx + %cmp = fcmp ule float %max.val, %l + %max.val.next = select fast i1 %cmp, float %l, float %max.val + %max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n br i1 %exitcond.not, label %exit, label %loop exit: - %res = phi i64 [ %min.idx.next, %loop ] + %res = phi i64 [ %max.idx.next, %loop ] ret i64 %res } diff --git a/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll index 0c0dd43338442..59e31f74e0609 100644 --- a/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-fmin-last-index.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s -define i64 @test_vectorize_select_fminnum_idx(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fminnum_idx( +define i64 @test_vectorize_select_fminnum_last_idx(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fminnum_last_idx( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -43,8 +43,8 @@ exit: ret i64 %res } -define i64 @test_vectorize_select_fmin_idx_without_fmfs(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fmin_idx_without_fmfs( +define i64 @test_vectorize_select_fmin_last_idx_without_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmin_last_idx_without_fmfs( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -55,8 +55,7 @@ define i64 @test_vectorize_select_fmin_idx_without_fmfs(ptr %src, i64 %n) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MIN_VAL]], [[L]] -; CHECK-NEXT: [[MIN_CMP:%.*]] = fcmp olt float [[L]], [[MIN_VAL]] -; CHECK-NEXT: [[MIN_VAL_NEXT]] = select i1 [[MIN_CMP]], float [[L]], float [[MIN_VAL]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = select i1 [[CMP]], float [[L]], float [[MIN_VAL]] ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] @@ -75,8 +74,7 @@ loop: %gep = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep %cmp = fcmp uge float %min.val, %l - %min.cmp = fcmp olt float %l, %min.val - %min.val.next = select i1 %min.cmp, float %l, float %min.val + %min.val.next = select i1 %cmp, float %l, float %min.val %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n @@ -87,8 +85,8 @@ exit: ret i64 %res } -define i64 @test_vectorize_select_fmin_idx_with_fmfs(ptr %src, i64 %n) { -; CHECK-LABEL: define i64 @test_vectorize_select_fmin_idx_with_fmfs( +define i64 @test_vectorize_select_fmin_last_idx_with_fmfs(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_vectorize_select_fmin_last_idx_with_fmfs( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -99,8 +97,7 @@ define i64 @test_vectorize_select_fmin_idx_with_fmfs(ptr %src, i64 %n) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MIN_VAL]], [[L]] -; CHECK-NEXT: [[MIN_CMP:%.*]] = fcmp fast olt float [[L]], [[MIN_VAL]] -; CHECK-NEXT: [[MIN_VAL_NEXT]] = select fast i1 [[MIN_CMP]], float [[L]], float [[MIN_VAL]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = select fast i1 [[CMP]], float [[L]], float [[MIN_VAL]] ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] @@ -119,8 +116,7 @@ loop: %gep = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep %cmp = fcmp uge float %min.val, %l - %min.cmp = fcmp fast olt float %l, %min.val - %min.val.next = select fast i1 %min.cmp, float %l, float %min.val + %min.val.next = select fast i1 %cmp, float %l, float %min.val %min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx %iv.next = add nuw nsw i64 %iv, 1 %exitcond.not = icmp eq i64 %iv.next, %n diff --git a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll index 9634e4d736f0d..a6ff83e926140 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll @@ -408,48 +408,11 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_smax_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MIN_VAL]], [[X]] @@ -457,9 +420,9 @@ define i64 @test_cmp_and_smax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index 7e4a232d8dadf..b133d4e5843d4 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -410,48 +410,11 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_smin_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[X]] @@ -459,9 +422,9 @@ define i64 @test_cmp_and_smin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll index e8fa9f8e353eb..b7638dda4a037 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umax-last-index.ll @@ -408,48 +408,11 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_umax_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MIN_VAL]], [[X]] @@ -457,9 +420,9 @@ define i64 @test_cmp_and_umax_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MAX_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index 4299082701244..dd23634f73402 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -17,7 +17,7 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 140), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] @@ -41,7 +41,7 @@ define i64 @test_vectorize_select_umin_idx(ptr %src, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 140, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -65,7 +65,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 140, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -94,7 +94,7 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 130), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] @@ -118,7 +118,7 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 130, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -142,7 +142,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 130, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ule i64 %l, %min.val @@ -165,7 +165,7 @@ define i64 @test_vectorize_select_umin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 120, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] @@ -184,7 +184,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 120, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp ule i64 %l, %min.val @@ -207,7 +207,7 @@ define i64 @test_vectorize_select_umin_via_select_idx(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 110, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -226,7 +226,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 110, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -249,7 +249,7 @@ define i64 @test_vectorize_select_umin_idx_all_exit_inst(ptr %src, ptr %umin, i6 ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -270,7 +270,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -301,7 +301,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 90), %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] @@ -325,7 +325,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 90, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -349,7 +349,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 90, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -372,7 +372,7 @@ define i64 @test_not_vectorize_select_no_min_reduction(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RED_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED_VAL:%.*]] = phi i64 [ 80, %[[ENTRY]] ], [ [[RED_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[RED_VAL]], [[L]] @@ -391,7 +391,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %red.val = phi i64 [ 0, %entry ], [ %red.val.next, %loop ] + %red.val = phi i64 [ 80, %entry ], [ %red.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %red.val, %l @@ -410,48 +410,11 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_and_umin_use_different_values( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX4:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX4]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 70, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[X]] @@ -459,9 +422,9 @@ define i64 @test_cmp_and_umin_use_different_values(ptr %src, i64 %x, i64 %n) { ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -470,7 +433,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 70, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %x @@ -493,7 +456,7 @@ define i32 @test_vectorize_select_umin_idx_with_trunc(ptr %src, i64 %n) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 60, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -513,7 +476,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i32 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 60, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -537,7 +500,7 @@ define ptr @test_with_ptr_index(ptr %start, ptr %end) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi ptr [ null, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 60, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[IV]], align 4 ; CHECK-NEXT: [[CMP7_US:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) @@ -555,7 +518,7 @@ entry: loop: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] %min.idx = phi ptr [ null, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 60, %entry ], [ %min.val.next, %loop ] %l = load i64, ptr %iv %cmp7.us = icmp ule i64 %l, %min.val %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) @@ -577,7 +540,7 @@ define i64 @test_no_vectorize_select_iv_decrement(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 50, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -596,7 +559,7 @@ entry: loop: %iv = phi i64 [ 1000, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 50, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -619,7 +582,7 @@ define i64 @test_no_vectorize_select_iv_sub(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1000, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 40, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -638,7 +601,7 @@ entry: loop: %iv = phi i64 [ 1000, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 40, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -661,7 +624,7 @@ define i64 @test_no_vectorize_select_iv_mul(ptr %src) { ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 30, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[MIN_VAL]], [[L]] @@ -680,7 +643,7 @@ entry: loop: %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 30, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %cmp = icmp uge i64 %min.val, %l @@ -699,47 +662,11 @@ define i64 @test_cmp_use_find_iv_phi_twice(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_cmp_use_find_iv_phi_twice( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i64> [[VEC_PHI1]], [[TMP1]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 20, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[MIN_VAL]], [[L]] @@ -748,9 +675,9 @@ define i64 @test_cmp_use_find_iv_phi_twice(ptr %src, i64 %n) { ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -759,7 +686,7 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ] - %min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ] + %min.val = phi i64 [ 20, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep %add = add i64 %min.val, %l From 6cc3953a9975b683e785e2e8d94b0b5fc9d4d057 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 24 Nov 2025 18:45:53 +0000 Subject: [PATCH 14/20] !fixup address latest comments, thanks --- .../Vectorize/VPlanConstruction.cpp | 74 ++++++++++++++----- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 89c0bab26a345..9bc2e9216690c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1014,18 +1014,27 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { if (!MinMaxPhiR || !MinMaxPhiR->hasLoopUsesOutsideReductionChain()) continue; + // MinMaxPhiR has users outside the reduction cycle in the loop. Check if + // the only other user is a FindLastIV reduction. MinMaxPhiR must have + // exactly 3 users: 1) the min/max operation, the compare of a FindLastIV + // reduction and ComputeReductionResult. The comparisom must compare + // MinMaxPhiR against the min/max operand used for the min/max reduction + // and only be used by the select of the FindLastIV reduction. RecurKind RdxKind = MinMaxPhiR->getRecurrenceKind(); assert( RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind) && "only min/max recurrences support users outside the reduction chain"); - // One user of MinMaxPhiR is MinMaxOp, the other user must be a compare - // that's part of a FindLastIV chain. auto *MinMaxOp = dyn_cast(MinMaxPhiR->getBackedgeValue()); - if (!MinMaxOp || MinMaxOp->getNumUsers() != 2) + if (!MinMaxOp) return false; + // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult + // (asserted below). + assert(MinMaxOp->getNumUsers() == 2 && + "MinMaxOp must have exactly 2 users"); + assert((isa(MinMaxOp) || (isa(MinMaxOp) && isa( @@ -1035,8 +1044,8 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { VPValue *MinMaxOpB = MinMaxOp->getOperand(1); if (MinMaxOpA != MinMaxPhiR) std::swap(MinMaxOpA, MinMaxOpB); - if (MinMaxOpA != MinMaxPhiR) - return false; + assert(MinMaxOpA == MinMaxPhiR && + "one of MinMaxOp's operands must be the phi"); VPValue *CmpOpA; VPValue *CmpOpB; @@ -1047,16 +1056,31 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { (CmpOpA != MinMaxOpB && CmpOpB != MinMaxOpB)) return false; + // MinMaxPhiR must have exactly 3 users: + // * MinMaxOp, + // * Cmp (that's part of a FindLastIV chain), + // * ComputeReductionResult. + if (MinMaxPhiR->getNumUsers() != 3) + return false; + + VPInstruction *MinMaxResult = + findUserOf(MinMaxPhiR); + assert(is_contained(MinMaxPhiR->users(), MinMaxOp) && + "one user must be MinMaxOp"); + assert(is_contained(MinMaxPhiR->users(), Cmp) && "one user must be Cmp"); + assert(is_contained(MinMaxPhiR->users(), MinMaxResult) && + "one user must be MinMaxResult"); + assert(is_contained(MinMaxOp->users(), MinMaxPhiR) && + "one user must be MinMaxPhiR"); + assert(is_contained(MinMaxOp->users(), MinMaxResult) && + "one user must be MinMaxResult"); + // TODO: Strict predicates need to find the first IV value for which the // predicate holds, not the last. if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred)) return false; - // Normalize the predicate so MinMaxPhiR is on the right side. - if (CmpOpA == MinMaxPhiR) - Pred = CmpInst::getSwappedPredicate(Pred); - // Cmp must be used by the select of a FindLastIV chain. VPValue *Sel = dyn_cast(Cmp->getSingleUser()); VPValue *IVOp, *FindIV; @@ -1079,23 +1103,39 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // The reduction using MinMaxPhiR needs adjusting to compute the correct // result: // 1. We need to find the last IV for which the condition based on the - // min/max recurrence is true, + // min/max recurrence is true, // 2. Compare the partial min/max reduction result to its final value and, // 3. Select the lanes of the partial FindLastIV reductions which - // correspond to the lanes matching the min/max reduction result. + // correspond to the lanes matching the min/max reduction result. + // + // For example, this transforms + // vp<%min.result> = compute-reduction-result ir<%min.val>, + // ir<%min.val.next> + // vp<%find.iv.result = compute-find-iv-result ir<%min.idx>, ir<0>, + // SENTINEL, vp<%min.idx.next> + // + // into: + // + // vp = compute-reduction-result ir<%min.val>, ir<%min.val.next> + // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp + // vp<%final.iv> = select vp<%final.min.cmp>, ir<%min.idx.next>, SENTINEL + // vp<%find.iv.result> = compute-find-iv-result ir<%min.idx>, ir<0>, + // SENTINEL, vp<%final.iv> VPInstruction *FindIVResult = findUserOf(FindIVPhiR); - VPInstruction *MinMaxResult = - findUserOf(MinMaxPhiR); + assert(FindIVResult->getParent() == MinMaxResult->getParent() && + "both results must be computed in the same block"); MinMaxResult->moveBefore(*FindIVResult->getParent(), FindIVResult->getIterator()); VPBuilder B(FindIVResult); - auto *FinalMinMaxCmp = B.createICmp( - CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult); + VPValue *MinMaxExiting = MinMaxResult->getOperand(1); + auto *FinalMinMaxCmp = + B.createICmp(CmpInst::ICMP_EQ, MinMaxExiting, MinMaxResult); + VPValue *Sentinel = FindIVResult->getOperand(2); + VPValue *LastIVExiting = FindIVResult->getOperand(3); auto *FinalIVSelect = - B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3), - FindIVResult->getOperand(2)); + B.createSelect(FinalMinMaxCmp, LastIVExiting, Sentinel); FindIVResult->setOperand(3, FinalIVSelect); } return true; From b1ff1a48b51c9fdd7d81aa4864edb8be6eeeb1e5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 25 Nov 2025 21:51:21 +0000 Subject: [PATCH 15/20] !fixup address comments, thanks --- llvm/include/llvm/Analysis/IVDescriptors.h | 17 +++++++-------- llvm/lib/Analysis/IVDescriptors.cpp | 8 +++---- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 2 +- .../Vectorize/LoopVectorizationLegality.cpp | 2 +- .../Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- llvm/lib/Transforms/Vectorize/VPlan.h | 12 +++++------ .../Vectorize/VPlanConstruction.cpp | 21 +++++++------------ 7 files changed, 29 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 3ec872607f45c..5e32e9a2fc83a 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -96,16 +96,16 @@ class RecurrenceDescriptor { Type *RT, bool Signed, bool Ordered, SmallPtrSetImpl &CI, unsigned MinWidthCastToRecurTy, - bool PhiHasLoopUsesOutsideReductionChain = false) + bool PhiHasUsesOutsideReductionChain = false) : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), IsOrdered(Ordered), - PhiHasLoopUsesOutsideReductionChain( - PhiHasLoopUsesOutsideReductionChain), + PhiHasUsesOutsideReductionChain( + PhiHasUsesOutsideReductionChain), MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert_range(CI); assert( - (!PhiHasLoopUsesOutsideReductionChain || isMinMaxRecurrenceKind(K)) && + (!PhiHasUsesOutsideReductionChain || isMinMaxRecurrenceKind(K)) && "Only min/max recurrences are allowed to have multiple uses currently"); } @@ -345,10 +345,9 @@ class RecurrenceDescriptor { /// Expose an ordered FP reduction to the instance users. bool isOrdered() const { return IsOrdered; } - /// Returns true if the reduction PHI has multiple in-loop users. This is - /// relevant for min/max reductions that are part of a FindLastIV pattern. - bool hasLoopUsesOutsideReductionChain() const { - return PhiHasLoopUsesOutsideReductionChain; + /// Returns true if the reduction PHI has any uses outside the reduction chain. This is relevant for min/max reductions that are part of a FindLastIV pattern. + bool hasUsesOutsideReductionChain() const { + return PhiHasUsesOutsideReductionChain; } /// Attempts to find a chain of operations from Phi to LoopExitInst that can @@ -391,7 +390,7 @@ class RecurrenceDescriptor { // True if the reduction PHI has in-loop users outside the reduction chain. // This is relevant for min/max reductions that are part of a FindLastIV // pattern. - bool PhiHasLoopUsesOutsideReductionChain = false; + bool PhiHasUsesOutsideReductionChain = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; // The minimum width used by the recurrence. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index d46214507f55e..f89f97d068d4a 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -217,9 +217,9 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, } /// Returns true if \p Phi is a min/max reduction matching \p Kind where \p Phi -/// is used in the loop outside the reduction chain. This is common for loops -/// selecting the index of a minimum/maximum value (argmin/argmax). -static bool isMinMaxReductionWithLoopUsersOutsideReductionChain( +/// is used outside the reduction chain. This is common for loops selecting the +/// index of a minimum/maximum value (argmin/argmax). +static bool isMinMaxReductionPhiWithUsersOutsideReductionChain( PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes) { BasicBlock *Latch = TheLoop->getLoopLatch(); if (!Latch) @@ -274,7 +274,7 @@ bool RecurrenceDescriptor::AddReductionVar( return false; // Check for min/max reduction variables that feed other users in the loop. - if (isMinMaxReductionWithLoopUsersOutsideReductionChain(Phi, Kind, TheLoop, + if (isMinMaxReductionPhiWithUsersOutsideReductionChain(Phi, Kind, TheLoop, RedDes)) return true; diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 1795591763c6d..6e6c146d79f05 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1253,7 +1253,7 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, /*DemandedBits=*/nullptr, /*AC=*/nullptr, /*DT=*/nullptr, SE)) return std::nullopt; - if (RdxDesc.hasLoopUsesOutsideReductionChain()) + if (RdxDesc.hasUsesOutsideReductionChain()) return std::nullopt; RecurKind RK = RdxDesc.getRecurrenceKind(); // Skip unsupported reductions. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index fbba91e0c1811..cfadcfe675ce7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -877,7 +877,7 @@ bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) { Requirements->addExactFPMathInst(RedDes.getExactFPMathInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; - assert((!RedDes.hasLoopUsesOutsideReductionChain() || + assert((!RedDes.hasUsesOutsideReductionChain() || RecurrenceDescriptor::isMinMaxRecurrenceKind( RedDes.getRecurrenceKind())) && "Only min/max recurrences are allowed to have multiple uses " diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index aad7c6c31a5c5..4691708e10960 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6590,7 +6590,7 @@ void LoopVectorizationCostModel::collectInLoopReductions() { // Multi-use reductions (e.g., used in FindLastIV patterns) are handled // separately and should not be considered for in-loop reductions. - if (RdxDesc.hasLoopUsesOutsideReductionChain()) + if (RdxDesc.hasUsesOutsideReductionChain()) continue; // We don't collect reductions that are type promoted (yet). @@ -8218,7 +8218,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, PhiRecipe = new VPReductionPHIRecipe( Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi), CM.useOrderedReductions(RdxDesc), ScaleFactor, - RdxDesc.hasLoopUsesOutsideReductionChain()); + RdxDesc.hasUsesOutsideReductionChain()); } else { // TODO: Currently fixed-order recurrences are modeled as chains of // first-order recurrences. If there are no users of the intermediate diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 13fec51bf356a..02d9d54ad2b10 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2413,7 +2413,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, /// patterns for argmin/argmax). /// TODO: Also support cases where the phi itself has a single use, but its /// compare has multiple uses. - bool HasLoopUsesOutsideReductionChain; + bool HasUsesOutsideReductionChain; /// When expanding the reduction PHI, the plan's VF element count is divided /// by this factor to form the reduction phi's VF. @@ -2424,10 +2424,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop = false, bool IsOrdered = false, unsigned VFScaleFactor = 1, - bool HasLoopUsesOutsideReductionChain = false) + bool HasUsesOutsideReductionChain = false) : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind), IsInLoop(IsInLoop), IsOrdered(IsOrdered), - HasLoopUsesOutsideReductionChain(HasLoopUsesOutsideReductionChain), + HasUsesOutsideReductionChain(HasUsesOutsideReductionChain), VFScaleFactor(VFScaleFactor) { assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); } @@ -2438,7 +2438,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, auto *R = new VPReductionPHIRecipe( dyn_cast_or_null(getUnderlyingValue()), getRecurrenceKind(), *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor, - HasLoopUsesOutsideReductionChain); + HasUsesOutsideReductionChain); R->addOperand(getBackedgeValue()); return R; } @@ -2466,8 +2466,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, bool isInLoop() const { return IsInLoop; } /// Returns true, if the phi is part of a multi-use reduction. - bool hasLoopUsesOutsideReductionChain() const { - return HasLoopUsesOutsideReductionChain; + bool hasUsesOutsideReductionChain() const { + return HasUsesOutsideReductionChain; } /// Returns true if the recipe only uses the first lane of operand \p Op. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 9bc2e9216690c..18c43bb79081c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1011,7 +1011,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) { auto *MinMaxPhiR = dyn_cast(&PhiR); // TODO: check for multi-uses in VPlan directly. - if (!MinMaxPhiR || !MinMaxPhiR->hasLoopUsesOutsideReductionChain()) + if (!MinMaxPhiR || !MinMaxPhiR->hasUsesOutsideReductionChain()) continue; // MinMaxPhiR has users outside the reduction cycle in the loop. Check if @@ -1040,12 +1040,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { isa( cast(MinMaxOp)->getUnderlyingValue()))) && "MinMaxOp must be a wide or scalar intrinsic"); - VPValue *MinMaxOpA = MinMaxOp->getOperand(0); - VPValue *MinMaxOpB = MinMaxOp->getOperand(1); - if (MinMaxOpA != MinMaxPhiR) - std::swap(MinMaxOpA, MinMaxOpB); - assert(MinMaxOpA == MinMaxPhiR && - "one of MinMaxOp's operands must be the phi"); + VPValue *MinMaxOpValue = MinMaxOp->getOperand(0); + if (MinMaxOpValue == MinMaxPhiR) + MinMaxOpValue= MinMaxOp->getOperand(1); VPValue *CmpOpA; VPValue *CmpOpB; @@ -1053,7 +1050,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { auto *Cmp = dyn_cast_or_null(findUserOf( MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB)))); if (!Cmp || Cmp->getNumUsers() != 1 || - (CmpOpA != MinMaxOpB && CmpOpB != MinMaxOpB)) + (CmpOpA != MinMaxOpValue&& CmpOpB != MinMaxOpValue)) return false; // MinMaxPhiR must have exactly 3 users: @@ -1067,13 +1064,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { findUserOf(MinMaxPhiR); assert(is_contained(MinMaxPhiR->users(), MinMaxOp) && "one user must be MinMaxOp"); - assert(is_contained(MinMaxPhiR->users(), Cmp) && "one user must be Cmp"); - assert(is_contained(MinMaxPhiR->users(), MinMaxResult) && - "one user must be MinMaxResult"); - assert(is_contained(MinMaxOp->users(), MinMaxPhiR) && - "one user must be MinMaxPhiR"); + assert(MinMaxResult && "MinMaxResult must be a user of MinMaxPhiR"); assert(is_contained(MinMaxOp->users(), MinMaxResult) && - "one user must be MinMaxResult"); + "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR"); // TODO: Strict predicates need to find the first IV value for which the // predicate holds, not the last. From a073c9bf39ea894705818acda1c9f13ceacacf10 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 27 Nov 2025 10:12:05 +0000 Subject: [PATCH 16/20] !fixup --- .../Vectorize/VPlanConstruction.cpp | 38 +++-- .../LoopVectorize/AArch64/select-index.ll | 8 +- .../select-index-interleaving.ll | 8 +- .../LoopVectorize/select-smin-last-index.ll | 143 +++++++----------- .../LoopVectorize/select-umin-first-index.ll | 10 +- .../LoopVectorize/select-umin-last-index.ll | 109 ++++++++----- 6 files changed, 168 insertions(+), 148 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 18c43bb79081c..df00c0aab247d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1068,19 +1068,39 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { assert(is_contained(MinMaxOp->users(), MinMaxResult) && "MinMaxResult must be a user of MinMaxOp (and of MinMaxPhiR"); - // TODO: Strict predicates need to find the first IV value for which the - // predicate holds, not the last. - if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || - ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred)) - return false; - // Cmp must be used by the select of a FindLastIV chain. VPValue *Sel = dyn_cast(Cmp->getSingleUser()); VPValue *IVOp, *FindIV; - if (!Sel || + if (!Sel || Sel->getNumUsers() != 2 || !match(Sel, - m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) || - Sel->getNumUsers() != 2) + m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV)))) + return false; + + if (!isa(FindIV)) { + std::swap(FindIV, IVOp); + Pred = CmpInst::getInversePredicate(Pred); + } + if (MinMaxOpValue != CmpOpB) + Pred = CmpInst::getSwappedPredicate(Pred); + + CmpInst::Predicate RdxPredicate = [RdxKind]() { + switch (RdxKind) { + case RecurKind::UMin: + return CmpInst::ICMP_UGE; + case RecurKind::UMax: + return CmpInst::ICMP_ULE; + case RecurKind::SMax: + return CmpInst::ICMP_SLE; + case RecurKind::SMin: + return CmpInst::ICMP_SGE; + default: + llvm_unreachable("unhandled recurrence kind"); + } + }(); + + // TODO: Strict predicates need to find the first IV value for which the + // predicate holds, not the last. + if (Pred != RdxPredicate) return false; auto *FindIVPhiR = dyn_cast(FindIV); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index fea09221a4a80..d7147e17731be 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -62,7 +62,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -193,7 +193,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -324,7 +324,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -455,7 +455,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <2 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 48ea761f7365d..1638360d08e99 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -62,7 +62,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp uge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -193,7 +193,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -324,7 +324,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] @@ -455,7 +455,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sle <4 x i64> [[VEC_PHI2]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll index b133d4e5843d4..5b94fe1356035 100644 --- a/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-smin-last-index.ll @@ -14,45 +14,45 @@ define i64 @test_vectorize_select_smin_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP3]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: @@ -84,46 +84,11 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_vectorize_select_smin_idx_cond_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[L]], [[MIN_VAL]] @@ -131,9 +96,9 @@ define i64 @test_vectorize_select_smin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -298,47 +263,47 @@ define i64 @test_vectorize_select_smin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[WIDE_LOAD]], <4 x i64> [[VEC_PHI1]]) +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP3]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[MIN_VAL]], [[L]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.smin.i64(i64 [[L]], i64 [[MIN_VAL]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll index 3011fe3e73930..283dc075a9aee 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-first-index.ll @@ -93,15 +93,15 @@ define i64 @test_vectorize_select_umin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV1]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll index dd23634f73402..407a27054d764 100644 --- a/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-umin-last-index.ll @@ -91,45 +91,45 @@ define i64 @test_vectorize_select_umin_idx_cond_flipped(ptr %src, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 130), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] -; CHECK-NEXT: [[TMP3]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 130), %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP3]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP4]], <4 x i64> splat (i64 -9223372036854775808) -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP8]]) -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808 -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP3]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 130, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 130, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: @@ -161,21 +161,56 @@ define i64 @test_vectorize_select_umin_idx_select_ops_flipped(ptr %src, i64 %n) ; CHECK-LABEL: define i64 @test_vectorize_select_umin_idx_select_ops_flipped( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 120), %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[WIDE_LOAD]], [[VEC_PHI1]] +; CHECK-NEXT: [[TMP2]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP3]], <4 x i64> splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]]) +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 120, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 120, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[L]], [[MIN_VAL]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], %[[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[L]], [[MIN_VAL]] ; CHECK-NEXT: [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]]) -; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV1]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[MIN_IDX]], i64 [[IV]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] ; entry: @@ -187,7 +222,7 @@ loop: %min.val = phi i64 [ 120, %entry ], [ %min.val.next, %loop ] %gep = getelementptr i64, ptr %src, i64 %iv %l = load i64, ptr %gep - %cmp = icmp ule i64 %l, %min.val + %cmp = icmp ugt i64 %l, %min.val %min.val.next = tail call i64 @llvm.umin.i64(i64 %min.val, i64 %l) %min.idx.next = select i1 %cmp, i64 %min.idx, i64 %iv %iv.next = add nuw nsw i64 %iv, 1 @@ -310,7 +345,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP2]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i64 0 @@ -338,7 +373,7 @@ define i64 @test_vectorize_select_umin_idx_min_ops_switched(ptr %src, i64 %n) { ; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV1]], i64 [[MIN_IDX]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RES]] From 06713710a49d7d91981795aa2468a7e6ad5577df Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 27 Nov 2025 11:29:05 +0000 Subject: [PATCH 17/20] Step --- .../Vectorize/VPlanConstruction.cpp | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index df00c0aab247d..30dada4497184 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #define DEBUG_TYPE "vplan" @@ -1030,16 +1031,26 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { if (!MinMaxOp) return false; + // Check that MinMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe + // with an intrinsic that matches the reduction kind. + Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind); + auto *RepR = dyn_cast(MinMaxOp); + auto *WideR = dyn_cast(MinMaxOp); + + if (WideR && WideR->getVectorIntrinsicID() != ExpectedIntrinsicID) + return false; + if (RepR) { + auto *II = dyn_cast(RepR->getUnderlyingInstr()); + if (!II || II->getIntrinsicID() != ExpectedIntrinsicID) + return false; + } + if (!WideR && !RepR) + return false; + // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult // (asserted below). assert(MinMaxOp->getNumUsers() == 2 && "MinMaxOp must have exactly 2 users"); - - assert((isa(MinMaxOp) || - (isa(MinMaxOp) && - isa( - cast(MinMaxOp)->getUnderlyingValue()))) && - "MinMaxOp must be a wide or scalar intrinsic"); VPValue *MinMaxOpValue = MinMaxOp->getOperand(0); if (MinMaxOpValue == MinMaxPhiR) MinMaxOpValue= MinMaxOp->getOperand(1); From 450d6a0dc391ce85e96ada195da2707026ba8ad1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 27 Nov 2025 12:22:06 +0000 Subject: [PATCH 18/20] [VPlan] Use m_Intrinsic to match assumes/noalias_scope_decl (NFC). --- llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 5 +++-- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 9 ++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index ee2a2a1de761e..ea38a8b16ebc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -11,6 +11,7 @@ #include "VPlanCFG.h" #include "VPlanDominatorTree.h" #include "VPlanHelpers.h" +#include "VPlanPatternMatch.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -19,6 +20,7 @@ #include "llvm/IR/PatternMatch.h" using namespace llvm; +using namespace VPlanPatternMatch; #define DEBUG_TYPE "vplan" @@ -327,8 +329,7 @@ void llvm::collectEphemeralRecipesForVPlan( vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { for (VPRecipeBase &R : *VPBB) { auto *RepR = dyn_cast(&R); - if (!RepR || !match(RepR->getUnderlyingInstr(), - PatternMatch::m_Intrinsic())) + if (!RepR || !match(RepR, m_Intrinsic())) continue; Worklist.push_back(RepR); EphRecipes.insert(RepR); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index c497c5925c277..f215476b1e163 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -275,11 +275,10 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { remapOperands(&R, UF - 1); return; } - if (auto *II = dyn_cast(RepR->getUnderlyingValue())) { - if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) { - addUniformForAllParts(RepR); - return; - } + if (match(RepR, + m_Intrinsic())) { + addUniformForAllParts(RepR); + return; } } From cb25d0c35e6bc624f9308bf6508568c45fb1575d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 27 Nov 2025 12:23:11 +0000 Subject: [PATCH 19/20] [VPlan] Add matcher --- .../Vectorize/VPlanConstruction.cpp | 20 +++++-------------- .../Transforms/Vectorize/VPlanPatternMatch.h | 5 +++++ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 30dada4497184..7015400cce828 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -849,23 +849,13 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { if (!MinMaxR) return nullptr; - auto *RepR = dyn_cast(MinMaxR); - if (!isa(MinMaxR) && - !(RepR && isa(RepR->getUnderlyingInstr()))) + // Check that MinMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe + // with an intrinsic that matches the reduction kind. + Intrinsic::ID ExpectedIntrinsicID = + getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind()); + if (!match(MinMaxR, m_Intrinsic(ExpectedIntrinsicID))) return nullptr; -#ifndef NDEBUG - Intrinsic::ID RdxIntrinsicId = - RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum - : Intrinsic::minnum; - assert(((isa(MinMaxR) && - cast(MinMaxR)->getVectorIntrinsicID() == - RdxIntrinsicId) || - (RepR && cast(RepR->getUnderlyingInstr()) - ->getIntrinsicID() == RdxIntrinsicId)) && - "Intrinsic did not match recurrence kind"); -#endif - if (MinMaxR->getOperand(0) == RedPhiR) return MinMaxR->getOperand(1); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 79275e7bffdd0..07dfe31eea46d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -856,6 +856,11 @@ template inline IntrinsicID_match m_Intrinsic() { return IntrinsicID_match(IntrID); } +/// Match intrinsic calls with a runtime intrinsic ID. +inline IntrinsicID_match m_Intrinsic(Intrinsic::ID IntrID) { + return IntrinsicID_match(IntrID); +} + template inline typename m_Intrinsic_Ty::Ty m_Intrinsic(const T0 &Op0) { return m_CombineAnd(m_Intrinsic(), m_Argument<0>(Op0)); From 7d349741abd97fa28310c18fb9ff5db97a46e3e4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 27 Nov 2025 18:15:09 +0000 Subject: [PATCH 20/20] Fix crash --- .../Transforms/Vectorize/VPlanConstruction.cpp | 17 ++++------------- .../Transforms/Vectorize/VPlanPatternMatch.h | 6 ++++++ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 7015400cce828..b23ddb56158aa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1024,17 +1024,7 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { // Check that MinMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe // with an intrinsic that matches the reduction kind. Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind); - auto *RepR = dyn_cast(MinMaxOp); - auto *WideR = dyn_cast(MinMaxOp); - - if (WideR && WideR->getVectorIntrinsicID() != ExpectedIntrinsicID) - return false; - if (RepR) { - auto *II = dyn_cast(RepR->getUnderlyingInstr()); - if (!II || II->getIntrinsicID() != ExpectedIntrinsicID) - return false; - } - if (!WideR && !RepR) + if (!match(MinMaxOp, m_Intrinsic(ExpectedIntrinsicID))) return false; // MinMaxOp must have 2 users: 1) MinMaxPhiR and 2) ComputeReductionResult @@ -1081,6 +1071,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { std::swap(FindIV, IVOp); Pred = CmpInst::getInversePredicate(Pred); } + if (match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) && !isa(IVOp)) + return false; + if (MinMaxOpValue != CmpOpB) Pred = CmpInst::getSwappedPredicate(Pred); @@ -1109,8 +1102,6 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) { FindIVPhiR->getRecurrenceKind())) return false; - assert(isa(IVOp) && - "IVOp must be a wide induction"); assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() && "cannot handle inloop/ordered reductions yet"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 07dfe31eea46d..750ef8edd94bb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -468,6 +468,12 @@ inline AllRecipe_match m_Trunc(const Op0_t &Op0) { return m_Unary(Op0); } +template +inline match_combine_or, Op0_t> +m_TruncOrSelf(const Op0_t &Op0) { + return m_CombineOr(m_Trunc(Op0), Op0); +} + template inline AllRecipe_match m_ZExt(const Op0_t &Op0) { return m_Unary(Op0);