diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 765c613b04a44..86eb78dc70372 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -163,6 +163,9 @@ LLVM_ABI bool computeUnrollCount( TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound); +LLVM_ABI std::optional +canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, + ScalarEvolution *SE); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 8be5de3bf356f..fd1c40fd28368 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -268,10 +268,12 @@ bool RecurrenceDescriptor::AddReductionVar( // resulting from the type promotion performed by InstCombine. Vector // operations are not limited to the legal integer widths, so we may be able // to evaluate the reduction in the narrower width. - if (RecurrenceType->isFloatingPointTy()) { + // Check the scalar type to handle both scalar and vector types. + Type *ScalarTy = RecurrenceType->getScalarType(); + if (ScalarTy->isFloatingPointTy()) { if (!isFloatingPointRecurrenceKind(Kind)) return false; - } else if (RecurrenceType->isIntegerTy()) { + } else if (ScalarTy->isIntegerTy()) { if (!isIntegerRecurrenceKind(Kind)) return false; if (!isMinMaxRecurrenceKind(Kind)) diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 86b268de43cf6..1a0b0082faf29 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -108,6 +109,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden, #endif ); +static cl::opt UnrollAddParallelReductions( + "unroll-add-parallel-reductions", cl::init(false), cl::Hidden, + cl::desc("Allow unrolling to add parallel reduction phis.")); /// Check if unrolling created a situation where we need to insert phi nodes to /// preserve LCSSA form. @@ -660,6 +664,39 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, OrigPHINode.push_back(cast(I)); } + // Collect phi nodes for reductions for which we can introduce multiple + // parallel reduction phis and compute the final reduction result after the + // loop. This requires a single exit block after unrolling. This is ensured by + // restricting to single-block loops where the unrolled iterations are known + // to not exit. + DenseMap Reductions; + bool CanAddAdditionalAccumulators = + UnrollAddParallelReductions && !CompletelyUnroll && + L->getNumBlocks() == 1 && + (ULO.Runtime || + (ExitInfos.contains(Header) && ((ExitInfos[Header].TripCount != 0 && + ExitInfos[Header].BreakoutTrip == 0)))); + + // Limit parallelizing reductions to unroll counts of 4 or less for now. + // TODO: The number of parallel reductions should depend on the number of + // execution units. We also don't have to add a parallel reduction phi per + // unrolled iteration, but could for example add a parallel phi for every 2 + // unrolled iterations. + if (CanAddAdditionalAccumulators && ULO.Count <= 4) { + for (PHINode &Phi : Header->phis()) { + auto RdxDesc = canParallelizeReductionWhenUnrolling(Phi, L, SE); + if (!RdxDesc) + continue; + + // Only handle duplicate phis for a single reduction for now. + // TODO: Handle any number of reductions + if (!Reductions.empty()) + continue; + + Reductions[&Phi] = *RdxDesc; + } + } + std::vector Headers; std::vector Latches; Headers.push_back(Header); @@ -710,6 +747,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // latch. This is a reasonable default placement if we don't have block // frequencies, and if we do, well the layout will be adjusted later. auto BlockInsertPt = std::next(LatchBlock->getIterator()); + SmallVector PartialReductions; for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector NewBlocks; SmallDenseMap NewLoops; @@ -733,6 +771,31 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (PHINode *OrigPHI : OrigPHINode) { PHINode *NewPHI = cast(VMap[OrigPHI]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); + + // Use cloned phis as parallel phis for partial reductions, which will + // get combined to the final reduction result after the loop. + if (Reductions.contains(OrigPHI)) { + // Collect partial reduction results. + if (PartialReductions.empty()) + PartialReductions.push_back(cast(InVal)); + PartialReductions.push_back(cast(VMap[InVal])); + + // Update the start value for the cloned phis to use the identity + // value for the reduction. + const RecurrenceDescriptor &RdxDesc = Reductions[OrigPHI]; + NewPHI->setIncomingValueForBlock( + L->getLoopPreheader(), + getRecurrenceIdentity(RdxDesc.getRecurrenceKind(), + OrigPHI->getType(), + RdxDesc.getFastMathFlags())); + + // Update NewPHI to use the cloned value for the iteration and move + // to header. + NewPHI->replaceUsesOfWith(InVal, VMap[InVal]); + NewPHI->moveBefore(OrigPHI->getIterator()); + continue; + } + if (Instruction *InValI = dyn_cast(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; @@ -832,6 +895,9 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader)); PN->eraseFromParent(); } else if (ULO.Count > 1) { + if (Reductions.contains(PN)) + continue; + Value *InVal = PN->removeIncomingValue(LatchBlock, false); // If this value was defined in the loop, take the value defined by the // last iteration of the loop. @@ -1010,6 +1076,39 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } + // If there are partial reductions, create code in the exit block to compute + // the final result and update users of the final result. + if (!PartialReductions.empty()) { + BasicBlock *ExitBlock = L->getExitBlock(); + assert(ExitBlock && + "Can only introduce parallel reduction phis with single exit block"); + assert(Reductions.size() == 1 && + "currently only a single reduction is supported"); + Value *FinalRdxValue = PartialReductions.back(); + Value *RdxResult = nullptr; + for (PHINode &Phi : ExitBlock->phis()) { + if (Phi.getIncomingValueForBlock(L->getLoopLatch()) != FinalRdxValue) + continue; + if (!RdxResult) { + RdxResult = PartialReductions.front(); + IRBuilder Builder(ExitBlock, ExitBlock->getFirstNonPHIIt()); + Builder.setFastMathFlags(Reductions.begin()->second.getFastMathFlags()); + RecurKind RK = Reductions.begin()->second.getRecurrenceKind(); + for (Instruction *RdxPart : drop_begin(PartialReductions)) { + RdxResult = Builder.CreateBinOp( + (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK), + RdxPart, RdxResult, "bin.rdx"); + } + NeedToFixLCSSA = true; + for (Instruction *RdxPart : PartialReductions) + RdxPart->dropPoisonGeneratingFlags(); + } + + Phi.replaceAllUsesWith(RdxResult); + continue; + } + } + if (DTUToUse) { // Apply updates to the DomTree. DT = &DTU.getDomTree(); @@ -1111,3 +1210,42 @@ MDNode *llvm::GetUnrollMetadata(MDNode *LoopID, StringRef Name) { } return nullptr; } + +std::optional +llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L, + ScalarEvolution *SE) { + RecurrenceDescriptor RdxDesc; + if (!RecurrenceDescriptor::isReductionPHI(&Phi, L, RdxDesc, + /*DemandedBits=*/nullptr, + /*AC=*/nullptr, /*DT=*/nullptr, SE)) + return std::nullopt; + RecurKind RK = RdxDesc.getRecurrenceKind(); + // Skip unsupported reductions. + // TODO: Handle additional reductions, including min-max reductions. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || + RecurrenceDescriptor::isFindIVRecurrenceKind(RK) || + RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) + return std::nullopt; + + if (RdxDesc.hasExactFPMath()) + return std::nullopt; + + if (RdxDesc.IntermediateStore) + return std::nullopt; + + // Don't unroll reductions with constant ops; those can be folded to a + // single induction update. + if (any_of(cast(Phi.getIncomingValueForBlock(L->getLoopLatch())) + ->operands(), + IsaPred)) + return std::nullopt; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch || + !is_contained( + cast(Phi.getIncomingValueForBlock(Latch))->operands(), + &Phi)) + return std::nullopt; + + return RdxDesc; +} diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll index 1a091e847ca34..0b78beea54aa9 100644 --- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll @@ -578,8 +578,323 @@ loop.latch: exit: ret void } + +define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) { +; APPLE-LABEL: define i32 @test_add_reduction_unroll_partial( +; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-NEXT: [[ENTRY:.*]]: +; APPLE-NEXT: br label %[[LOOP:.*]] +; APPLE: [[LOOP]]: +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE: [[EXIT]]: +; APPLE-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; APPLE-NEXT: ret i32 [[BIN_RDX2]] +; +; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial( +; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; OTHER-NEXT: [[ENTRY:.*]]: +; OTHER-NEXT: br label %[[LOOP:.*]] +; OTHER: [[LOOP]]: +; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] +; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2 +; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]] +; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] +; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2 +; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP2]] +; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] +; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2 +; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP3]] +; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024 +; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] +; OTHER: [[EXIT]]: +; OTHER-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; OTHER-NEXT: ret i32 [[BIN_RDX2]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %rdx.next = add nuw nsw i32 %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1024 + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ %rdx.next, %loop ] + ret i32 %res +} + +declare i1 @cond() + +define i32 @test_add_reduction_multi_block(ptr %a, i64 noundef %n) { +; APPLE-LABEL: define i32 @test_add_reduction_multi_block( +; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-NEXT: [[ENTRY:.*]]: +; APPLE-NEXT: br label %[[LOOP:.*]] +; APPLE: [[LOOP]]: +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ] +; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-NEXT: [[C:%.*]] = call i1 @cond() +; APPLE-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; APPLE: [[THEN]]: +; APPLE-NEXT: store i32 0, ptr [[GEP_A]], align 4 +; APPLE-NEXT: br label %[[LOOP_LATCH]] +; APPLE: [[LOOP_LATCH]]: +; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE: [[EXIT]]: +; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ] +; APPLE-NEXT: ret i32 [[RES]] +; +; OTHER-LABEL: define i32 @test_add_reduction_multi_block( +; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; OTHER-NEXT: [[ENTRY:.*]]: +; OTHER-NEXT: br label %[[LOOP:.*]] +; OTHER: [[LOOP]]: +; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ] +; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; OTHER-NEXT: [[C:%.*]] = call i1 @cond() +; OTHER-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; OTHER: [[THEN]]: +; OTHER-NEXT: store i32 0, ptr [[GEP_A]], align 4 +; OTHER-NEXT: br label %[[LOOP_LATCH]] +; OTHER: [[LOOP_LATCH]]: +; OTHER-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; OTHER: [[EXIT]]: +; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ] +; OTHER-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop.latch ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %c = call i1 @cond() + br i1 %c, label %then, label %loop.latch + +then: + store i32 0, ptr %gep.a + br label %loop.latch + +loop.latch: + %rdx.next = add nuw nsw i32 %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1024 + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ %rdx.next, %loop.latch ] + ret i32 %res +} + +define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) { +; APPLE-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial( +; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-NEXT: [[ENTRY:.*]]: +; APPLE-NEXT: br label %[[LOOP:.*]] +; APPLE: [[LOOP]]: +; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; APPLE-NEXT: [[RDX_2_NEXT]] = mul i32 [[RDX_2]], [[TMP0]] +; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE: [[EXIT]]: +; APPLE-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT]], %[[LOOP]] ] +; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]] +; APPLE-NEXT: ret i32 [[SUM]] +; +; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial( +; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; OTHER-NEXT: [[ENTRY:.*]]: +; OTHER-NEXT: br label %[[LOOP:.*]] +; OTHER: [[LOOP]]: +; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_1:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2 +; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]] +; OTHER-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]] +; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] +; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2 +; OTHER-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]] +; OTHER-NEXT: [[RDX_2_NEXT_1]] = mul i32 [[RDX_2_NEXT]], [[TMP1]] +; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024 +; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]] +; OTHER: [[EXIT]]: +; OTHER-NEXT: [[BIN_RDX:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ] +; OTHER-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX]], [[RES_2]] +; OTHER-NEXT: ret i32 [[SUM]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %rdx.2 = phi i32 [ 0, %entry ], [ %rdx.2.next, %loop ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %rdx.next = add nuw nsw i32 %rdx, %1 + %rdx.2.next = mul i32 %rdx.2, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1024 + br i1 %ec, label %exit, label %loop + +exit: + %res.1 = phi i32 [ %rdx.next, %loop ] + %res.2 = phi i32 [ %rdx.2.next, %loop ] + %sum = add i32 %res.1, %res.2 + ret i32 %sum +} + + +define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) { +; APPLE-LABEL: define i32 @test_add_reduction_runtime( +; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; APPLE-NEXT: [[ENTRY:.*]]: +; APPLE-NEXT: br label %[[LOOP:.*]] +; APPLE: [[LOOP]]: +; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP]] ] +; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]] +; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 +; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]] +; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] +; APPLE-NEXT: br i1 [[EC_EPIL]], label %[[EXIT:.*]], label %[[LOOP]] +; APPLE: [[EXIT]]: +; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP]] ] +; APPLE-NEXT: ret i32 [[RES]] +; +; OTHER-LABEL: define i32 @test_add_reduction_runtime( +; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +; OTHER-NEXT: [[ENTRY:.*]]: +; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3 +; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3 +; OTHER-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; OTHER: [[ENTRY_NEW]]: +; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; OTHER-NEXT: br label %[[LOOP:.*]] +; OTHER: [[LOOP]]: +; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ] +; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2 +; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]] +; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] +; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2 +; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]] +; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] +; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2 +; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]] +; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] +; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2 +; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]] +; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; OTHER-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4 +; OTHER-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]] +; OTHER-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]] +; OTHER: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; OTHER-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ] +; OTHER-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]] +; OTHER: [[EXIT_UNR_LCSSA]]: +; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; OTHER: [[LOOP_EPIL_PREHEADER]]: +; OTHER-NEXT: br label %[[LOOP_EPIL:.*]] +; OTHER: [[LOOP_EPIL]]: +; OTHER-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] +; OTHER-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] +; OTHER-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ] +; OTHER-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]] +; OTHER-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 +; OTHER-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]] +; OTHER-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; OTHER-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] +; OTHER-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 +; OTHER-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; OTHER-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]] +; OTHER: [[EXIT_EPILOG_LCSSA]]: +; OTHER-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; OTHER-NEXT: br label %[[EXIT]] +; OTHER: [[EXIT]]: +; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ] +; OTHER-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %rdx.next = add nuw nsw i32 %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i32 [ %rdx.next, %loop ] + ret i32 %res +} ;. ; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} ; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"} ; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} ;. +; OTHER: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; OTHER: [[META1]] = !{!"llvm.loop.unroll.disable"} +;. diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll new file mode 100644 index 0000000000000..e94a368d3ded0 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll @@ -0,0 +1,685 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s + +define i32 @test_add(ptr %src, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]] +; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +define i32 @test_add_tc_not_multiple_of_4(ptr %src, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_tc_not_multiple_of_4( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_1:.*]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP_1]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1001 +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_1]], label %[[EXIT:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_12]] +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1001 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +define i32 @test_add_rdx_used_in_loop(ptr %src, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_rdx_used_in_loop( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: store i32 [[RDX_NEXT]], ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: store i32 [[RDX_NEXT_1]], ptr [[GEP_SRC_1]], align 4 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: store i32 [[RDX_NEXT_2]], ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_NEXT_2]], [[L_24]] +; CHECK-NEXT: store i32 [[RDX_NEXT_24]], ptr [[GEP_SRC_24]], align 4 +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.next = add i32 %rdx, %l + store i32 %rdx.next, ptr %gep.src + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +define i32 @test_add_phi_used_outside_loop(ptr %src, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_phi_used_outside_loop( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_2]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[RDX_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx +} + +define i32 @test_add_and_mul_reduction(ptr %src, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_and_mul_reduction( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_1_NEXT_24:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_1_NEXT]] = add i32 [[RDX_1]], [[L]] +; CHECK-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_1_NEXT_1]] = add i32 [[RDX_1_1]], [[L_1]] +; CHECK-NEXT: [[RDX_2_2:%.*]] = mul i32 [[RDX_2_NEXT]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_1_NEXT_2]] = add i32 [[RDX_1_2]], [[L_2]] +; CHECK-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_1_NEXT_24]] = add i32 [[RDX_1_3]], [[L_24]] +; CHECK-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[L_24]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_1_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_1_NEXT_24]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX5:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_1_NEXT_1]], [[RDX_1_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_1_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_1_NEXT_LCSSA:%.*]] = add i32 [[RDX_1_NEXT_24]], [[BIN_RDX1]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[RDX_1_NEXT_LCSSA]], [[BIN_RDX5]] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx.1 = phi i32 [ %start, %entry ], [ %rdx.1.next, %loop ] + %rdx.2 = phi i32 [ %start, %entry ], [ %rdx.2.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.1.next = add i32 %rdx.1, %l + %rdx.2.next = mul i32 %rdx.2, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + %res = add i32 %rdx.1.next, %rdx.2.next + ret i32 %res +} + +define float @test_fadd_no_fmfs(ptr %src, i64 %n, float %start) { +; CHECK-LABEL: define float @test_fadd_no_fmfs( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 1 + %rdx.next = fadd float %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret float %rdx.next +} + +define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) { +; CHECK-LABEL: define float @test_fadd_with_ressaoc( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi float [ -0.000000e+00, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2]] = fadd reassoc float [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = fadd reassoc float [[RDX_3]], [[L_24]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = fadd reassoc float [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = fadd reassoc float [[RDX_NEXT_3]], [[BIN_RDX1]] +; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 1 + %rdx.next = fadd reassoc float %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret float %rdx.next +} + +define i32 @test_smin(ptr %src, i64 %n) { +; CHECK-LABEL: define i32 @test_smin( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MIN:%.*]] = phi i32 [ 1000, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = call i32 @llvm.smin.i32(i32 [[MIN]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT]], i32 [[L_1]]) +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_1]], i32 [[L_2]]) +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[RDX_NEXT_3]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_2]], i32 [[L_24]]) +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %min = phi i32 [ 1000, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i32, ptr %src, i64 %iv + %l = load i32 , ptr %gep.src, align 1 + %rdx.next = call i32 @llvm.smin(i32 %min, i32 %l) + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +define i64 @test_any_of_reduction(ptr %src, i64 %n) { +; CHECK-LABEL: define i64 @test_any_of_reduction( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ANY_OF_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = select i1 [[C]], i64 [[ANY_OF_RDX]], i64 0 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L_1]], 0 +; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = select i1 [[C_1]], i64 [[RDX_NEXT]], i64 0 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_SRC_2]], align 1 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[L_2]], 0 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = select i1 [[C_2]], i64 [[RDX_NEXT_1]], i64 0 +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_24:%.*]] = load i8, ptr [[GEP_SRC_24]], align 1 +; CHECK-NEXT: [[C_24:%.*]] = icmp eq i8 [[L_24]], 0 +; CHECK-NEXT: [[RDX_NEXT_3]] = select i1 [[C_24]], i64 [[RDX_NEXT_2]], i64 0 +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %any.of.rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] + %iv.next = add i64 %iv, 1 + %gep.src = getelementptr i8, ptr %src, i64 %iv + %l = load i8, ptr %gep.src, align 1 + %c = icmp eq i8 %l, 0 + %rdx.next = select i1 %c, i64 %any.of.rdx, i64 0 + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i64 %rdx.next +} + +define void @reduction_with_intermediate_store(ptr %src, ptr %sum) { +; CHECK-LABEL: define void @reduction_with_intermediate_store( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]] +; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 +; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]] +; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]] +; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4 +; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]] +; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %sum.promoted = load i32, ptr %sum, align 4 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ] + %gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %red.next = add nsw i32 %red, %l + store i32 %red.next, ptr %sum, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 10000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare i32 @foo() + +; Loop with a call cannot be handled by LoopVectorize, introducing additional +; accumulators when unrolling increases throughput. +define i32 @test_add_with_call(i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_with_call( +; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]] +; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]] +; CHECK-NEXT: ret i32 [[BIN_RDX2]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %l = call i32 @foo() + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +; Loop with backward dependence cannot be handled LoopVectorize, introducing additional +; accumulators when unrolling increases throughput. +define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_with_backward_dep( +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4 +; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4 +; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4 +; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4 +; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4 +; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4 +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]] +; CHECK-NEXT: ret i32 [[BIN_RDX3]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv + %l = load i32, ptr %gep + %gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next + store i32 0, ptr %gep.1 + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +define <4 x i32> @test_vector_add(ptr %p, i64 %n, <4 x i32> %start) { +; CHECK-LABEL: define <4 x i32> @test_vector_add( +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], <4 x i32> [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi <4 x i32> [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load <4 x i32>, ptr [[GEP]], align 16 +; CHECK-NEXT: [[RDX_NEXT]] = add <4 x i32> [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load <4 x i32>, ptr [[GEP_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = add <4 x i32> [[RDX_1]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load <4 x i32>, ptr [[GEP_2]], align 16 +; CHECK-NEXT: [[RDX_NEXT_2]] = add <4 x i32> [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_3:%.*]] = load <4 x i32>, ptr [[GEP_3]], align 16 +; CHECK-NEXT: [[RDX_NEXT_3]] = add <4 x i32> [[RDX_3]], [[L_3]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi <4 x i32> [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: [[BIN_RDX1:%.*]] = add <4 x i32> [[RDX_NEXT_2]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[RDX_NEXT_3]], [[BIN_RDX1]] +; CHECK-NEXT: ret <4 x i32> [[BIN_RDX2]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi <4 x i32> [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep = getelementptr inbounds nuw <4 x i32>, ptr %p, i64 %iv + %l = load <4 x i32>, ptr %gep, align 16 + %rdx.next = add <4 x i32> %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret <4 x i32> %rdx.next +} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll new file mode 100644 index 0000000000000..ee187adba77bb --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll @@ -0,0 +1,510 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -S %s | FileCheck %s + +define i32 @test_add_reduction(ptr %a, i64 %n) { +; CHECK-LABEL: define i32 @test_add_reduction( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2 +; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_UNR]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add nuw nsw i32 [[RDX_UNR]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %rdx.next = add nuw nsw i32 %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi i32 [ %rdx.next, %loop ] + ret i32 %res +} + +define i32 @test_add_reduction_constant_op(ptr %a, i64 %n) { +; CHECK-LABEL: define i32 @test_add_reduction_constant_op( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX]], 2 +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add nuw nsw i32 [[RDX_UNR]], 1 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %rdx.next = add nuw nsw i32 %rdx, 1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi i32 [ %rdx.next, %loop ] + ret i32 %res +} + +define i32 @test_add_reduction_8x_unroll(ptr %a, i64 %n) { +; CHECK-LABEL: define i32 @test_add_reduction_8x_unroll( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_7:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2 +; CHECK-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2 +; CHECK-NEXT: [[RDX_4:%.*]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]] +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_A_4:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_4]], align 2 +; CHECK-NEXT: [[RDX_NEXT_4:%.*]] = add nuw nsw i32 [[RDX_4]], [[TMP6]] +; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5 +; CHECK-NEXT: [[GEP_A_5:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[GEP_A_5]], align 2 +; CHECK-NEXT: [[RDX_6:%.*]] = add nuw nsw i32 [[RDX_NEXT_4]], [[TMP7]] +; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6 +; CHECK-NEXT: [[GEP_A_6:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_5]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[GEP_A_6]], align 2 +; CHECK-NEXT: [[RDX_NEXT_6:%.*]] = add nuw nsw i32 [[RDX_6]], [[TMP8]] +; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7 +; CHECK-NEXT: [[GEP_A_7:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[GEP_A_7]], align 2 +; CHECK-NEXT: [[RDX_NEXT_7]] = add nuw nsw i32 [[RDX_NEXT_6]], [[TMP9]] +; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 +; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_7]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_UNR]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2 +; CHECK-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP10]] +; CHECK-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; CHECK-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]] +; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT_EPILOG_LCSSA]]: +; CHECK-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv + %1 = load i32, ptr %gep.a, align 2 + %rdx.next = add nuw nsw i32 %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !2 + +exit: + %res = phi i32 [ %rdx.next, %loop ] + ret i32 %res +} + +define <4 x i32> @test_vector_add_reduction(ptr %a, i64 %n) { +; CHECK-LABEL: define <4 x i32> @test_vector_add_reduction( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT]] = add <4 x i32> [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = add <4 x i32> [[RDX_1]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi <4 x i32> [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add <4 x i32> [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi <4 x i32> [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret <4 x i32> [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi <4 x i32> [ zeroinitializer, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw <4 x i32>, ptr %a, i64 %iv + %1 = load <4 x i32>, ptr %gep.a, align 16 + %rdx.next = add <4 x i32> %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi <4 x i32> [ %rdx.next, %loop ] + ret <4 x i32> %res +} + +define float @test_fadd_reduction(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_reduction( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_1:%.*]] = phi float [ -0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT]] = fadd reassoc float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd reassoc float [[RDX_1]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc float [[RDX_NEXT_1]], [[RDX_NEXT]] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd reassoc float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd reassoc float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} + +define float @test_fadd_no_reassoc(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_no_reassoc( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd float [[RDX_NEXT]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} + +define float @test_fadd_other_fastmath(ptr %a, i64 %n) { +; CHECK-LABEL: define float @test_fadd_other_fastmath( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[ENTRY_NEW:.*]] +; CHECK: [[ENTRY_NEW]]: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A]], align 16 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd contract float [[RDX]], [[TMP2]] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[GEP_A_1]], align 16 +; CHECK-NEXT: [[RDX_NEXT_1]] = fadd contract float [[RDX_NEXT]], [[TMP3]] +; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2 +; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT_UNR_LCSSA_LOOPEXIT]]: +; CHECK-NEXT: [[RES_PH_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX_UNR_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[EXIT_UNR_LCSSA]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[RES_PH:%.*]] = phi float [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[LOOP_EPIL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_EPIL:.*]] +; CHECK: [[LOOP_EPIL]]: +; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_EPIL_INIT]] +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[GEP_A_EPIL]], align 16 +; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = fadd contract float [[RDX_EPIL_INIT]], [[TMP4]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi float [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ] +; CHECK-NEXT: ret float [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi float [ 0.0, %entry ], [ %rdx.next, %loop ] + %gep.a = getelementptr inbounds nuw float, ptr %a, i64 %iv + %1 = load float, ptr %gep.a, align 16 + %rdx.next = fadd contract float %rdx, %1 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, %n + br i1 %ec, label %exit, label %loop, !llvm.loop !0 + +exit: + %res = phi float [ %rdx.next, %loop ] + ret float %res +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.count", i32 2} + +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.unroll.count", i32 8} + +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]} +;.