Skip to content

Commit 08b4a7a

Browse files
committed
merge main into amd-staging
2 parents 54ffaf5 + 70970d0 commit 08b4a7a

File tree

16 files changed

+526
-32
lines changed

16 files changed

+526
-32
lines changed

flang/lib/Lower/Runtime.cpp

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,12 +169,55 @@ void Fortran::lower::genUnlockStatement(
169169

170170
void Fortran::lower::genPauseStatement(
171171
Fortran::lower::AbstractConverter &converter,
172-
const Fortran::parser::PauseStmt &) {
172+
const Fortran::parser::PauseStmt &stmt) {
173+
173174
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
174175
mlir::Location loc = converter.getCurrentLocation();
175-
mlir::func::FuncOp callee =
176-
fir::runtime::getRuntimeFunc<mkRTKey(PauseStatement)>(loc, builder);
177-
fir::CallOp::create(builder, loc, callee, mlir::ValueRange{});
176+
Fortran::lower::StatementContext stmtCtx;
177+
178+
llvm::SmallVector<mlir::Value> operands;
179+
mlir::func::FuncOp callee;
180+
mlir::FunctionType calleeType;
181+
182+
if (stmt.v.has_value()) {
183+
const auto &code = stmt.v.value();
184+
auto expr =
185+
converter.genExprValue(*Fortran::semantics::GetExpr(code), stmtCtx);
186+
expr.match(
187+
// Character-valued expression -> call PauseStatementText (CHAR, LEN)
188+
[&](const fir::CharBoxValue &x) {
189+
callee = fir::runtime::getRuntimeFunc<mkRTKey(PauseStatementText)>(
190+
loc, builder);
191+
calleeType = callee.getFunctionType();
192+
193+
operands.push_back(
194+
builder.createConvert(loc, calleeType.getInput(0), x.getAddr()));
195+
operands.push_back(
196+
builder.createConvert(loc, calleeType.getInput(1), x.getLen()));
197+
},
198+
// Unboxed value -> call PauseStatementInt which accepts an integer.
199+
[&](fir::UnboxedValue x) {
200+
callee = fir::runtime::getRuntimeFunc<mkRTKey(PauseStatementInt)>(
201+
loc, builder);
202+
calleeType = callee.getFunctionType();
203+
assert(calleeType.getNumInputs() >= 1);
204+
mlir::Value cast =
205+
builder.createConvert(loc, calleeType.getInput(0), x);
206+
operands.push_back(cast);
207+
},
208+
[&](auto) {
209+
fir::emitFatalError(loc, "unhandled expression in PAUSE");
210+
});
211+
} else {
212+
callee =
213+
fir::runtime::getRuntimeFunc<mkRTKey(PauseStatement)>(loc, builder);
214+
calleeType = callee.getFunctionType();
215+
}
216+
217+
fir::CallOp::create(builder, loc, callee, operands);
218+
219+
// NOTE: PAUSE does not terminate the current block. The program may resume
220+
// and continue normal execution, so we do not emit control-flow terminators.
178221
}
179222

180223
void Fortran::lower::genPointerAssociate(fir::FirOpBuilder &builder,

flang/test/Lower/pause-statement.f90

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,31 @@
22

33
! CHECK-LABEL: pause_test
44
subroutine pause_test()
5-
! CHECK: fir.call @_Fortran{{.*}}PauseStatement()
6-
! CHECK-NEXT: return
75
pause
6+
! CHECK: fir.call @_FortranA{{.*}}PauseStatement()
7+
! CHECK-NEXT: return
8+
end subroutine
9+
10+
! CHECK-LABEL: pause_code
11+
subroutine pause_code()
12+
pause 42
13+
! CHECK: %[[c42:.*]] = arith.constant 42 : i32
14+
! CHECK: fir.call @_FortranA{{.*}}PauseStatementInt(%[[c42]])
15+
! CHECK-NEXT: return
816
end subroutine
17+
18+
! CHECK-LABEL: pause_msg
19+
subroutine pause_msg()
20+
pause "hello"
21+
! CHECK-DAG: %[[five:.*]] = arith.constant 5 : index
22+
! CHECK-DAG: %[[addr:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref<!fir.char<1,5>>
23+
! CHECK-DAG: %[[str:.*]]:2 = hlfir.declare %[[addr]] typeparams %[[five]] {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQ{{.*}}"} : (!fir.ref<!fir.char<1,5>>, index) -> (!fir.ref<!fir.char<1,5>>, !fir.ref<!fir.char<1,5>>)
24+
! CHECK-DAG: %[[buff:.*]] = fir.convert %[[str]]#0 : (!fir.ref<!fir.char<1,5>>) -> !fir.ref<i8>
25+
! CHECK-DAG: %[[len:.*]] = fir.convert %[[five]] : (index) -> i64
26+
! CHECK: fir.call @_FortranA{{.*}}PauseStatementText(%[[buff]], %[[len]])
27+
! CHECK-NEXT: return
28+
end subroutine
29+
30+
! CHECK-DAG: func private @_FortranA{{.*}}PauseStatement
31+
! CHECK-DAG: func private @_FortranA{{.*}}PauseStatementInt
32+
! CHECK-DAG: func private @_FortranA{{.*}}PauseStatementText

llvm/include/llvm/MC/MCObjectFileInfo.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,6 @@ class MCSection;
2929

3030
class LLVM_ABI MCObjectFileInfo {
3131
protected:
32-
/// True if target object file supports a weak_definition of constant 0 for an
33-
/// omitted EH frame.
34-
bool SupportsWeakOmittedEHFrame = false;
35-
3632
/// True if the target object file supports emitting a compact unwind section
3733
/// without an associated EH frame section.
3834
bool SupportsCompactUnwindWithoutEHFrame = false;
@@ -260,9 +256,6 @@ class LLVM_ABI MCObjectFileInfo {
260256
virtual ~MCObjectFileInfo();
261257
MCContext &getContext() const { return *Ctx; }
262258

263-
bool getSupportsWeakOmittedEHFrame() const {
264-
return SupportsWeakOmittedEHFrame;
265-
}
266259
bool getSupportsCompactUnwindWithoutEHFrame() const {
267260
return SupportsCompactUnwindWithoutEHFrame;
268261
}

llvm/lib/MC/MCObjectFileInfo.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ static bool useCompactUnwind(const Triple &T) {
6161
}
6262

6363
void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
64-
// MachO
65-
SupportsWeakOmittedEHFrame = false;
66-
6764
EHFrameSection = Ctx->getMachOSection(
6865
"__TEXT", "__eh_frame",
6966
MachO::S_COALESCED | MachO::S_ATTR_NO_TOC |
@@ -1090,7 +1087,6 @@ void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
10901087
Ctx = &MCCtx;
10911088

10921089
// Common.
1093-
SupportsWeakOmittedEHFrame = true;
10941090
SupportsCompactUnwindWithoutEHFrame = false;
10951091
OmitDwarfIfHaveCompactUnwind = false;
10961092

llvm/lib/Transforms/Utils/BasicBlockUtils.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@ emptyAndDetachBlock(BasicBlock *BB,
9292
"applying corresponding DTU updates.");
9393
}
9494

95+
static bool HasLoopOrEntryConvergenceToken(const BasicBlock *BB) {
96+
for (const Instruction &I : *BB) {
97+
const ConvergenceControlInst *CCI = dyn_cast<ConvergenceControlInst>(&I);
98+
if (CCI && (CCI->isLoop() || CCI->isEntry()))
99+
return true;
100+
}
101+
return false;
102+
}
103+
95104
void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
96105
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
97106
bool KeepOneInputPHIs) {
@@ -259,6 +268,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
259268
if (llvm::is_contained(PN.incoming_values(), &PN))
260269
return false;
261270

271+
// Don't break if both the basic block and the predecessor contain loop or
272+
// entry convergent intrinsics, since there may only be one convergence token
273+
// per block.
274+
if (HasLoopOrEntryConvergenceToken(BB) &&
275+
HasLoopOrEntryConvergenceToken(PredBB))
276+
return false;
277+
262278
LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
263279
<< PredBB->getName() << "\n");
264280

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7187,17 +7187,29 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
71877187
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
71887188
*CM.PSE.getSE(), OrigLoop);
71897189
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
7190-
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
7191-
// with early exits and plans with additional VPlan simplifications. The
7192-
// legacy cost model doesn't properly model costs for such loops.
7193-
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7194-
!Legal->getLAI()->getSymbolicStrides().empty() ||
7195-
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
7196-
CostCtx, OrigLoop,
7197-
BestFactor.Width) ||
7198-
planContainsAdditionalSimplifications(
7199-
getPlanFor(LegacyVF.Width), CostCtx, OrigLoop, LegacyVF.Width)) &&
7200-
" VPlan cost model and legacy cost model disagreed");
7190+
// Verify that the VPlan-based and legacy cost models agree, except for
7191+
// * VPlans with early exits,
7192+
// * VPlans with additional VPlan simplifications,
7193+
// * EVL-based VPlans with gather/scatters (the VPlan-based cost model uses
7194+
// vp_scatter/vp_gather).
7195+
// The legacy cost model doesn't properly model costs for such loops.
7196+
bool UsesEVLGatherScatter =
7197+
any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(
7198+
BestPlan.getVectorLoopRegion()->getEntry())),
7199+
[](VPBasicBlock *VPBB) {
7200+
return any_of(*VPBB, [](VPRecipeBase &R) {
7201+
return isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(&R) &&
7202+
!cast<VPWidenMemoryRecipe>(&R)->isConsecutive();
7203+
});
7204+
});
7205+
assert(
7206+
(BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
7207+
!Legal->getLAI()->getSymbolicStrides().empty() || UsesEVLGatherScatter ||
7208+
planContainsAdditionalSimplifications(
7209+
getPlanFor(BestFactor.Width), CostCtx, OrigLoop, BestFactor.Width) ||
7210+
planContainsAdditionalSimplifications(
7211+
getPlanFor(LegacyVF.Width), CostCtx, OrigLoop, LegacyVF.Width)) &&
7212+
" VPlan cost model and legacy cost model disagreed");
72017213
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
72027214
"when vectorizing, the scalar cost must be computed.");
72037215
#endif

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,9 +1080,10 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
10801080
FindIVPhiR->getRecurrenceKind()))
10811081
return false;
10821082

1083-
assert(match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) &&
1084-
isa<VPWidenIntOrFpInductionRecipe>(IVOp) &&
1085-
"other select operand must be a (truncated) wide induction");
1083+
// TODO: Support cases where IVOp is the IV increment.
1084+
if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1085+
!isa<VPWidenIntOrFpInductionRecipe>(IVOp))
1086+
return false;
10861087

10871088
CmpInst::Predicate RdxPredicate = [RdxKind]() {
10881089
switch (RdxKind) {

llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,3 +219,119 @@ loop:
219219
exit:
220220
ret void
221221
}
222+
223+
; Test for https://github.com/llvm/llvm-project/issues/169948.
224+
define i8 @mixed_gather_scatters(ptr %A, ptr %B, ptr %C) #0 {
225+
; RVA23-LABEL: @mixed_gather_scatters(
226+
; RVA23-NEXT: entry:
227+
; RVA23-NEXT: br label [[VECTOR_PH:%.*]]
228+
; RVA23: vector.ph:
229+
; RVA23-NEXT: br label [[VECTOR_BODY:%.*]]
230+
; RVA23: vector.body:
231+
; RVA23-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
232+
; RVA23-NEXT: [[AVL:%.*]] = phi i32 [ 10, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
233+
; RVA23-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 2, i1 true)
234+
; RVA23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A:%.*]], align 8
235+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP1]], i64 0
236+
; RVA23-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
237+
; RVA23-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
238+
; RVA23-NEXT: [[TMP2:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], zeroinitializer
239+
; RVA23-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i1> [[TMP2]] to <vscale x 2 x i8>
240+
; RVA23-NEXT: [[TMP4:%.*]] = or <vscale x 2 x i8> [[VEC_PHI]], [[TMP3]]
241+
; RVA23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B:%.*]], align 8
242+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP5]], i64 0
243+
; RVA23-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
244+
; RVA23-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT2]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
245+
; RVA23-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER3]], zeroinitializer
246+
; RVA23-NEXT: [[TMP7:%.*]] = zext <vscale x 2 x i1> [[TMP6]] to <vscale x 2 x i8>
247+
; RVA23-NEXT: [[TMP8:%.*]] = or <vscale x 2 x i8> [[TMP4]], [[TMP7]]
248+
; RVA23-NEXT: [[TMP9:%.*]] = or <vscale x 2 x i8> [[TMP8]], splat (i8 1)
249+
; RVA23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C:%.*]], align 8
250+
; RVA23-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
251+
; RVA23-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT4]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
252+
; RVA23-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[BROADCAST_SPLAT5]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP0]])
253+
; RVA23-NEXT: [[TMP11:%.*]] = icmp sgt <vscale x 2 x i64> [[WIDE_MASKED_GATHER6]], zeroinitializer
254+
; RVA23-NEXT: [[TMP12:%.*]] = zext <vscale x 2 x i1> [[TMP11]] to <vscale x 2 x i8>
255+
; RVA23-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i8> [[TMP9]], [[TMP12]]
256+
; RVA23-NEXT: [[TMP14]] = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> [[TMP13]], <vscale x 2 x i8> [[VEC_PHI]], i32 [[TMP0]])
257+
; RVA23-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
258+
; RVA23-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
259+
; RVA23-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
260+
; RVA23: middle.block:
261+
; RVA23-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> [[TMP14]])
262+
; RVA23-NEXT: br label [[EXIT:%.*]]
263+
; RVA23: exit:
264+
; RVA23-NEXT: ret i8 [[TMP16]]
265+
;
266+
; RVA23ZVL1024B-LABEL: @mixed_gather_scatters(
267+
; RVA23ZVL1024B-NEXT: entry:
268+
; RVA23ZVL1024B-NEXT: br label [[VECTOR_PH:%.*]]
269+
; RVA23ZVL1024B: vector.ph:
270+
; RVA23ZVL1024B-NEXT: br label [[VECTOR_BODY:%.*]]
271+
; RVA23ZVL1024B: vector.body:
272+
; RVA23ZVL1024B-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 1 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
273+
; RVA23ZVL1024B-NEXT: [[AVL:%.*]] = phi i32 [ 10, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
274+
; RVA23ZVL1024B-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 1, i1 true)
275+
; RVA23ZVL1024B-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A:%.*]], align 8
276+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP1]], i64 0
277+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
278+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
279+
; RVA23ZVL1024B-NEXT: [[TMP2:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], zeroinitializer
280+
; RVA23ZVL1024B-NEXT: [[TMP3:%.*]] = zext <vscale x 1 x i1> [[TMP2]] to <vscale x 1 x i8>
281+
; RVA23ZVL1024B-NEXT: [[TMP4:%.*]] = or <vscale x 1 x i8> [[VEC_PHI]], [[TMP3]]
282+
; RVA23ZVL1024B-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B:%.*]], align 8
283+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP5]], i64 0
284+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
285+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
286+
; RVA23ZVL1024B-NEXT: [[TMP6:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER3]], zeroinitializer
287+
; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i1> [[TMP6]] to <vscale x 1 x i8>
288+
; RVA23ZVL1024B-NEXT: [[TMP8:%.*]] = or <vscale x 1 x i8> [[TMP4]], [[TMP7]]
289+
; RVA23ZVL1024B-NEXT: [[TMP9:%.*]] = or <vscale x 1 x i8> [[TMP8]], splat (i8 1)
290+
; RVA23ZVL1024B-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C:%.*]], align 8
291+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[TMP10]], i64 0
292+
; RVA23ZVL1024B-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT4]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
293+
; RVA23ZVL1024B-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 [[BROADCAST_SPLAT5]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP0]])
294+
; RVA23ZVL1024B-NEXT: [[TMP11:%.*]] = icmp sgt <vscale x 1 x i64> [[WIDE_MASKED_GATHER6]], zeroinitializer
295+
; RVA23ZVL1024B-NEXT: [[TMP12:%.*]] = zext <vscale x 1 x i1> [[TMP11]] to <vscale x 1 x i8>
296+
; RVA23ZVL1024B-NEXT: [[TMP13:%.*]] = or <vscale x 1 x i8> [[TMP9]], [[TMP12]]
297+
; RVA23ZVL1024B-NEXT: [[TMP14]] = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i8> [[TMP13]], <vscale x 1 x i8> [[VEC_PHI]], i32 [[TMP0]])
298+
; RVA23ZVL1024B-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP0]]
299+
; RVA23ZVL1024B-NEXT: [[TMP15:%.*]] = icmp eq i32 [[AVL_NEXT]], 0
300+
; RVA23ZVL1024B-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
301+
; RVA23ZVL1024B: middle.block:
302+
; RVA23ZVL1024B-NEXT: [[TMP16:%.*]] = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> [[TMP14]])
303+
; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]]
304+
; RVA23ZVL1024B: exit:
305+
; RVA23ZVL1024B-NEXT: ret i8 [[TMP16]]
306+
;
307+
entry:
308+
br label %loop
309+
310+
loop:
311+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
312+
%accum = phi i8 [ 0, %entry ], [ %or.4, %loop ]
313+
%ptr.0 = load ptr, ptr %A, align 8
314+
%val.0 = load i64, ptr %ptr.0, align 8
315+
%cmp.0 = icmp sgt i64 %val.0, 0
316+
%ext.0 = zext i1 %cmp.0 to i8
317+
%or.0 = or i8 %accum, %ext.0
318+
%ptr.1 = load ptr, ptr %B, align 8
319+
%val.1 = load i64, ptr %ptr.1, align 8
320+
%cmp.1 = icmp sgt i64 %val.1, 0
321+
%ext.1 = zext i1 %cmp.1 to i8
322+
%or.1 = or i8 %or.0, %ext.1
323+
%or.2 = or i8 %or.1, 1
324+
%ptr.4 = load ptr, ptr %C, align 8
325+
%val.4 = load i64, ptr %ptr.4, align 8
326+
%cmp.4 = icmp sgt i64 %val.4, 0
327+
%ext.4 = zext i1 %cmp.4 to i8
328+
%or.4 = or i8 %or.2, %ext.4
329+
%iv.next = add i32 %iv, 1
330+
%exitcond = icmp eq i32 %iv, 9
331+
br i1 %exitcond, label %exit, label %loop
332+
333+
exit:
334+
ret i8 %or.4
335+
}
336+
337+
attributes #0 = { "target-features"="+zve64x,+zvl256b" }

llvm/test/Transforms/LoopVectorize/select-smax-last-index.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,5 +656,47 @@ exit:
656656
ret i64 %res
657657
}
658658

659+
define i64 @test_vectorize_select_smax_idx_inc(ptr %src, i64 %n) {
660+
; CHECK-LABEL: define i64 @test_vectorize_select_smax_idx_inc(
661+
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
662+
; CHECK-NEXT: [[ENTRY:.*]]:
663+
; CHECK-NEXT: br label %[[LOOP:.*]]
664+
; CHECK: [[LOOP]]:
665+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
666+
; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ]
667+
; CHECK-NEXT: [[MAX_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ]
668+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
669+
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4
670+
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[MAX_VAL]], [[L]]
671+
; CHECK-NEXT: [[MAX_VAL_NEXT]] = tail call i64 @llvm.smax.i64(i64 [[MAX_VAL]], i64 [[L]])
672+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
673+
; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV_NEXT]], i64 [[MAX_IDX]]
674+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
675+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
676+
; CHECK: [[EXIT]]:
677+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ]
678+
; CHECK-NEXT: ret i64 [[RES]]
679+
;
680+
entry:
681+
br label %loop
682+
683+
loop:
684+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
685+
%max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ]
686+
%max.val = phi i64 [ 0, %entry ], [ %max.val.next, %loop ]
687+
%gep = getelementptr i64, ptr %src, i64 %iv
688+
%l = load i64, ptr %gep
689+
%cmp = icmp sle i64 %max.val, %l
690+
%max.val.next = tail call i64 @llvm.smax.i64(i64 %max.val, i64 %l)
691+
%iv.next = add nuw nsw i64 %iv, 1
692+
%max.idx.next = select i1 %cmp, i64 %iv.next, i64 %max.idx
693+
%exitcond.not = icmp eq i64 %iv.next, %n
694+
br i1 %exitcond.not, label %exit, label %loop
695+
696+
exit:
697+
%res = phi i64 [ %max.idx.next, %loop ]
698+
ret i64 %res
699+
}
700+
659701
declare i64 @llvm.smax.i64(i64, i64)
660702
declare i16 @llvm.smax.i16(i16, i16)

0 commit comments

Comments
 (0)