@@ -253,7 +253,7 @@ bool LoopIdiomVectorize::run(Loop *L) {
253253
254254 if (recognizeMinIdxPattern ())
255255 return true ;
256-
256+
257257 return false ;
258258}
259259
@@ -448,7 +448,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
448448 auto *VecTy = ScalableVectorType::get (
449449 LoadType, VF); // This is the vector type for i32 values
450450
451-
452451 // High-level overview of the transformation:
453452 // We divide the process in three phases:
454453 // In the first phase, we process a chunk which is not multiple of VF.
@@ -470,6 +469,8 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
470469 // The below basic blocks are used to process the first phase
471470 // and are for processing the chunk which is not multiple of VF.
472471 BasicBlock *VecEntry = BasicBlock::Create (Ctx, " minidx.vec.entry" , F);
472+ BasicBlock *VecScalarForkBlock =
473+ BasicBlock::Create (Ctx, " minidx.vec.scalar.fork" , F);
473474 BasicBlock *MinIdxPartial1If =
474475 BasicBlock::Create (Ctx, " minidx.partial.1.if" , F);
475476 BasicBlock *MinIdxPartial1ProcExit =
@@ -501,8 +502,41 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
501502
502503 LI->addTopLevelLoop (VecLoop);
503504
504- // Start populating preheader.
505+ // In loop preheader, we check to fail fast.
506+ // If the FirstIndex is equal to the SecondIndex,
507+ // we branch to the exit block and return the SecondIndex.
508+ // Thus, the loop preheader is split into two blocks.
509+ // The original one has the early exit check
510+ // and the new one sets up the code for vectorization.
511+ // TODO: Can use splitBasicBlock(...) API to split the loop preheader.
512+
505513 IRBuilder<> Builder (LoopPreheader->getTerminator ());
514+ Value *FirstIndexCmp =
515+ Builder.CreateICmpEQ (FirstIndex, SecondIndex, " first.index.cmp" );
516+ Value *SecondIndexBitCast = Builder.CreateTruncOrBitCast (
517+ SecondIndex, F->getReturnType (), " second.index.bitcast" );
518+ Builder.CreateCondBr (FirstIndexCmp, ExitBB, VecScalarForkBlock);
519+
520+ // Add edges from LoopPreheader to VecScalarForkBlock and ExitBB.
521+ DTU.applyUpdates (
522+ {{DominatorTree::Insert, LoopPreheader, VecScalarForkBlock}});
523+ DTU.applyUpdates ({{DominatorTree::Insert, LoopPreheader, ExitBB}});
524+
525+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock, Header}});
526+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock, ExitBB}});
527+
528+ // We change PHI values in the loop's header to point to the new block.
529+ // This is done to avoid the PHI node being optimized out.
530+ for (PHINode &PHI : Header->phis ()) {
531+ PHI.replaceIncomingBlockWith (LoopPreheader, VecScalarForkBlock);
532+ }
533+
534+ // Change the name as it is no longer the loop preheader.
535+ LoopPreheader->setName (" minidx.early.exit1" );
536+
537+ // Start populating preheader.
538+ Builder.SetInsertPoint (VecScalarForkBlock);
539+
506540 // %VScale = tail call i64 @llvm.vscale.i64()
507541 // %VLen = shl nuw nsw i64 %VScale, 2
508542 // %minidx.not = sub nsw i64 0, %VLen
@@ -571,7 +605,7 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
571605 LoopPreheader->getTerminator ()->eraseFromParent ();
572606
573607 // Add edge from preheader to VecEntry
574- DTU.applyUpdates ({{DominatorTree::Insert, LoopPreheader , VecEntry}});
608+ DTU.applyUpdates ({{DominatorTree::Insert, VecScalarForkBlock , VecEntry}});
575609
576610 // %minidx.entry.cmp = fcmp olt float %minidx.minVal, %init
577611 // br i1 %minidx.entry.cmp, label %minidx.partial.1.if, label
@@ -835,7 +869,6 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
835869 {MaskTy, I64Ty}),
836870 {FirstIndex, MinIdxPartial2IfAdd}, " minidx.partial.2.if.mask" );
837871
838-
839872 Value *FirstIndexMinus1 =
840873 Builder.CreateSub (FirstIndex, ConstantInt::get (I64Ty, 1 ),
841874 " minidx.partial.2.if.firstindex.minus1" );
@@ -856,8 +889,9 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
856889 {MinIdxPartial2IfGEP, ConstantInt::get (I32Ty, 1 ),
857890 MinIdxPartial2IfMask, Constant::getNullValue (VecTy)},
858891 " minidx.partial.2.if.load" );
859- Value *MinIdxPartial2IfSelectVals =
860- Builder.CreateSelect (MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax, " minidx.partial2.if.finalVals" );
892+ Value *MinIdxPartial2IfSelectVals =
893+ Builder.CreateSelect (MinIdxPartial2IfMask, MinIdxPartial2IfLoad, GMax,
894+ " minidx.partial2.if.finalVals" );
861895
862896 // Reverse the mask.
863897 MinIdxPartial2IfMask = Builder.CreateCall (
@@ -962,12 +996,14 @@ bool LoopIdiomVectorize::transformMinIdxPattern(
962996 for (PHINode *PHI : PHIsToReplace) {
963997 // Create PHI at the beginning of the block
964998 Builder.SetInsertPoint (ExitBB, ExitBB->getFirstInsertionPt ());
999+ // TODO: Add comment.
9651000 PHINode *ExitPHI =
966- Builder.CreatePHI (F->getReturnType (), PHI->getNumIncomingValues () + 1 );
1001+ Builder.CreatePHI (F->getReturnType (), PHI->getNumIncomingValues () + 2 );
9671002 for (unsigned I = 0 ; I < PHI->getNumIncomingValues (); ++I) {
9681003 ExitPHI->addIncoming (PHI->getIncomingValue (I), PHI->getIncomingBlock (I));
9691004 }
9701005 ExitPHI->addIncoming (MinIdxRetBitCast, MinIdxEnd);
1006+ ExitPHI->addIncoming (SecondIndexBitCast, LoopPreheader);
9711007 // Replace all uses of PHI with ExitPHI.
9721008 PHI->replaceAllUsesWith (ExitPHI);
9731009 PHI->eraseFromParent ();
0 commit comments