Skip to content

Commit 967d5ce

Browse files
authored
merge main into amd-staging (#604)
2 parents d56ec74 + 0c89c0a commit 967d5ce

File tree

11 files changed

+213
-84
lines changed

11 files changed

+213
-84
lines changed

llvm/include/llvm/ADT/DenseMap.h

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,17 @@ class DenseMapBase : public DebugEpochBase {
369369
protected:
370370
DenseMapBase() = default;
371371

372+
struct ExactBucketCount {};
373+
374+
void initWithExactBucketCount(unsigned NewNumBuckets) {
375+
if (derived().allocateBuckets(NewNumBuckets)) {
376+
initEmpty();
377+
} else {
378+
setNumEntries(0);
379+
setNumTombstones(0);
380+
}
381+
}
382+
372383
void destroyAll() {
373384
// No need to iterate through the buckets if both KeyT and ValueT are
374385
// trivially destructible.
@@ -729,9 +740,8 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
729740
unsigned NumTombstones;
730741
unsigned NumBuckets;
731742

732-
struct ExactBucketCount {};
733-
explicit DenseMap(unsigned NumBuckets, ExactBucketCount) {
734-
initWithExactBucketCount(NumBuckets);
743+
explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
744+
this->initWithExactBucketCount(NumBuckets);
735745
}
736746

737747
public:
@@ -818,18 +828,9 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
818828
return true;
819829
}
820830

821-
void initWithExactBucketCount(unsigned NewNumBuckets) {
822-
if (allocateBuckets(NewNumBuckets)) {
823-
this->BaseT::initEmpty();
824-
} else {
825-
NumEntries = 0;
826-
NumTombstones = 0;
827-
}
828-
}
829-
830831
void init(unsigned InitNumEntries) {
831832
auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries);
832-
initWithExactBucketCount(InitBuckets);
833+
this->initWithExactBucketCount(InitBuckets);
833834
}
834835

835836
// Put the zombie instance in a known good state after a move.
@@ -841,7 +842,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
841842

842843
void grow(unsigned AtLeast) {
843844
AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast - 1));
844-
DenseMap Tmp(AtLeast, ExactBucketCount{});
845+
DenseMap Tmp(AtLeast, typename BaseT::ExactBucketCount{});
845846
Tmp.moveFrom(*this);
846847
swapImpl(Tmp);
847848
}
@@ -891,10 +892,8 @@ class SmallDenseMap
891892
/// a large bucket. This union will be discriminated by the 'Small' bit.
892893
AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage;
893894

894-
struct ExactBucketCount {};
895-
SmallDenseMap(unsigned NumBuckets, ExactBucketCount) {
896-
allocateBuckets(NumBuckets);
897-
this->BaseT::initEmpty();
895+
SmallDenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
896+
this->initWithExactBucketCount(NumBuckets);
898897
}
899898

900899
public:
@@ -1097,8 +1096,7 @@ class SmallDenseMap
10971096

10981097
void init(unsigned InitNumEntries) {
10991098
auto InitBuckets = BaseT::getMinBucketToReserveForEntries(InitNumEntries);
1100-
allocateBuckets(InitBuckets);
1101-
this->BaseT::initEmpty();
1099+
this->initWithExactBucketCount(InitBuckets);
11021100
}
11031101

11041102
// Put the zombie instance in a known good state after a move.
@@ -1112,7 +1110,7 @@ class SmallDenseMap
11121110
if (AtLeast > InlineBuckets)
11131111
AtLeast = std::max<unsigned>(64, NextPowerOf2(AtLeast - 1));
11141112

1115-
SmallDenseMap Tmp(AtLeast, ExactBucketCount{});
1113+
SmallDenseMap Tmp(AtLeast, typename BaseT::ExactBucketCount{});
11161114
Tmp.moveFrom(*this);
11171115

11181116
if (Tmp.Small) {

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,6 @@ struct InsertedPass {
401401

402402
namespace llvm {
403403

404-
extern cl::opt<bool> EnableFSDiscriminator;
405-
406404
class PassConfigImpl {
407405
public:
408406
// List of passes explicitly substituted by this target. Normally this is

llvm/lib/Transforms/Scalar/LoopDistribute.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ class InstPartitionContainer {
520520
// -1 means belonging to multiple partitions.
521521
else if (Partition == -1)
522522
break;
523-
else if (Partition != (int)ThisPartition)
523+
else if (Partition != ThisPartition)
524524
Partition = -1;
525525
}
526526
assert(Partition != -2 && "Pointer not belonging to any partition");

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5340,6 +5340,7 @@ class BoUpSLP {
53405340
bool IsNonSchedulableWithParentPhiNode =
53415341
TE->doesNotNeedToSchedule() && TE->UserTreeIndex &&
53425342
TE->UserTreeIndex.UserTE->hasState() &&
5343+
TE->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
53435344
TE->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
53445345
// Count the number of unique phi nodes, which are the parent for
53455346
// parent entry, and exit, if all the unique phis are processed.
@@ -5391,6 +5392,7 @@ class BoUpSLP {
53915392
bool IsNonSchedulableWithParentPhiNode =
53925393
P.first->doesNotNeedToSchedule() && P.first->UserTreeIndex &&
53935394
P.first->UserTreeIndex.UserTE->hasState() &&
5395+
P.first->UserTreeIndex.UserTE->State != TreeEntry::SplitVectorize &&
53945396
P.first->UserTreeIndex.UserTE->getOpcode() == Instruction::PHI;
53955397
auto *It = find(P.first->Scalars, User);
53965398
do {
@@ -5690,6 +5692,8 @@ class BoUpSLP {
56905692
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
56915693
Bundle->getTreeEntry()->UserTreeIndex &&
56925694
Bundle->getTreeEntry()->UserTreeIndex.UserTE->hasState() &&
5695+
Bundle->getTreeEntry()->UserTreeIndex.UserTE->State !=
5696+
TreeEntry::SplitVectorize &&
56935697
Bundle->getTreeEntry()->UserTreeIndex.UserTE->getOpcode() ==
56945698
Instruction::PHI;
56955699
// Count the number of unique phi nodes, which are the parent for

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,8 +1105,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
11051105
public:
11061106
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
11071107
DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
1108-
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1109-
VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1108+
: VPInstruction(Opcode, Operands, {}, {}, DL, Name) {}
11101109

11111110
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
11121111
const VPIRFlags &Flags, const VPIRMetadata &MD = {},

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
538538
case Instruction::Load:
539539
case VPInstruction::AnyOf:
540540
case VPInstruction::BranchOnCond:
541+
case VPInstruction::Broadcast:
541542
case VPInstruction::BuildStructVector:
542543
case VPInstruction::BuildVector:
543544
case VPInstruction::CalculateTripCountMinusVF:
@@ -548,13 +549,16 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
548549
case VPInstruction::ExtractPenultimateElement:
549550
case VPInstruction::FirstActiveLane:
550551
case VPInstruction::Not:
552+
case VPInstruction::ResumeForEpilogue:
551553
case VPInstruction::Unpack:
552554
return 1;
553555
case Instruction::ICmp:
554556
case Instruction::FCmp:
557+
case Instruction::ExtractElement:
555558
case Instruction::Store:
556559
case VPInstruction::BranchOnCount:
557560
case VPInstruction::ComputeReductionResult:
561+
case VPInstruction::ExtractLane:
558562
case VPInstruction::FirstOrderRecurrenceSplice:
559563
case VPInstruction::LogicalAnd:
560564
case VPInstruction::PtrAdd:
@@ -572,6 +576,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
572576
case Instruction::GetElementPtr:
573577
case Instruction::PHI:
574578
case Instruction::Switch:
579+
case VPInstruction::SLPLoad:
580+
case VPInstruction::SLPStore:
575581
// Cannot determine the number of operands from the opcode.
576582
return -1u;
577583
}

llvm/lib/Transforms/Vectorize/VPlanSLP.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
464464
LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
465465
// Create dummy VPInstruction, which will we replace later by the
466466
// re-ordered operand.
467-
VPInstruction *Op = new VPInstruction(0, {});
467+
VPInstruction *Op =
468+
new VPInstruction(VPInstruction::Broadcast, {Values[0]});
468469
CombinedOperands.push_back(Op);
469470
MultiNodeOps.emplace_back(Op, Operands);
470471
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @main(ptr %c, i32 %0, i1 %tobool4.not, i16 %1) {
5+
; CHECK-LABEL: define i32 @main(
6+
; CHECK-SAME: ptr [[C:%.*]], i32 [[TMP0:%.*]], i1 [[TOBOOL4_NOT:%.*]], i16 [[TMP1:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*]]:
8+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[TMP0]], i32 1
9+
; CHECK-NEXT: br label %[[IF_END:.*]]
10+
; CHECK: [[IF_END]]:
11+
; CHECK-NEXT: [[B_0_PH:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP32:%.*]], %[[WHILE_COND_PREHEADER:.*]] ]
12+
; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP33:%.*]], %[[WHILE_COND_PREHEADER]] ]
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[B_0_PH]], i32 0
14+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[R:.*]], label %[[IF_END9:.*]]
15+
; CHECK: [[IF_END9]]:
16+
; CHECK-NEXT: [[CONV11:%.*]] = sext i16 [[TMP1]] to i32
17+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[CONV11]], i32 0
18+
; CHECK-NEXT: br label %[[R]]
19+
; CHECK: [[R]]:
20+
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ <i32 1, i32 0>, %[[IF_END9]] ], [ [[TMP2]], %[[IF_END]] ]
21+
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ [[TMP5]], %[[IF_END9]] ], [ [[TMP4]], %[[IF_END]] ]
22+
; CHECK-NEXT: [[TOBOOL12_NOT:%.*]] = icmp eq i32 [[B_0_PH]], 0
23+
; CHECK-NEXT: br i1 [[TOBOOL12_NOT]], label %[[IF_END14:.*]], label %[[IF_THEN13:.*]]
24+
; CHECK: [[IF_THEN13]]:
25+
; CHECK-NEXT: br label %[[IF_END14]]
26+
; CHECK: [[IF_END14]]:
27+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1
28+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP8]], 1
29+
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], 1
30+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0
31+
; CHECK-NEXT: [[AND17:%.*]] = and i32 [[TMP9]], 1
32+
; CHECK-NEXT: [[DIV20:%.*]] = sdiv i32 [[AND17]], [[TMP0]]
33+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4
34+
; CHECK-NEXT: [[AND25:%.*]] = and i32 [[TMP0]], 1
35+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
36+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[AND17]], i32 1
37+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP10]], i32 2
38+
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[AND25]], i32 3
39+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 0, i32 1, i32 poison, i32 1>, i32 [[DIV20]], i32 2
40+
; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP14]], [[TMP15]]
41+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> poison, i32 [[NOT]], i32 2
42+
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP11]], <4 x i32> <i32 poison, i32 poison, i32 2, i32 5>
43+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
44+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
45+
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[B_0_PH]], i32 0
46+
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP0]], i32 2
47+
; CHECK-NEXT: br label %[[AH:.*]]
48+
; CHECK: [[AH]]:
49+
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP21]], %[[AH]] ], [ [[TMP16]], %[[IF_END14]] ]
50+
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP22]], %[[AH]] ], [ [[TMP20]], %[[IF_END14]] ]
51+
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2
52+
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2
53+
; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP24]], <2 x i32> <i32 2, i32 6>
54+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP25]], [[TMP26]]
55+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1
56+
; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[ADD]], [[TMP28]]
57+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3
58+
; CHECK-NEXT: [[OR27:%.*]] = or i32 [[TMP29]], [[TMP30]]
59+
; CHECK-NEXT: store i32 [[OR27]], ptr [[C]], align 4
60+
; CHECK-NEXT: br i1 [[TOBOOL4_NOT]], label %[[WHILE_COND_PREHEADER]], label %[[AH]]
61+
; CHECK: [[WHILE_COND_PREHEADER]]:
62+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3
63+
; CHECK-NEXT: [[CALL69:%.*]] = tail call i32 @s(i32 [[TMP31]])
64+
; CHECK-NEXT: [[TMP32]] = extractelement <4 x i32> [[TMP23]], i32 0
65+
; CHECK-NEXT: [[TMP33]] = shufflevector <4 x i32> [[TMP24]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
66+
; CHECK-NEXT: br label %[[IF_END]]
67+
;
68+
entry:
69+
br label %if.end
70+
71+
if.end:
72+
%n.0.ph = phi i32 [ 0, %entry ], [ %.us-phi52, %while.cond.preheader ]
73+
%b.0.ph = phi i32 [ 0, %entry ], [ %b.2, %while.cond.preheader ]
74+
%a.0.ph = phi i32 [ 0, %entry ], [ %a.2, %while.cond.preheader ]
75+
br i1 %tobool4.not, label %r, label %if.end9
76+
77+
if.end9:
78+
%conv11 = sext i16 %1 to i32
79+
br label %r
80+
81+
r:
82+
%.us-phi51642 = phi i32 [ 0, %if.end9 ], [ %0, %if.end ]
83+
%.us-phi415662 = phi i32 [ 0, %if.end9 ], [ 1, %if.end ]
84+
%b.1 = phi i32 [ %conv11, %if.end9 ], [ %b.0.ph, %if.end ]
85+
%a.1 = phi i32 [ 1, %if.end9 ], [ 0, %if.end ]
86+
%tobool12.not = icmp eq i32 %b.0.ph, 0
87+
br i1 %tobool12.not, label %if.end14, label %if.then13
88+
89+
if.then13:
90+
br label %if.end14
91+
92+
if.end14:
93+
%and = and i32 %n.0.ph, 1
94+
%not = xor i32 %and, 1
95+
%and17 = and i32 %a.0.ph, 1
96+
%not18 = xor i32 %and17, 1
97+
%div20 = sdiv i32 %and17, %0
98+
%2 = load i32, ptr %c, align 4
99+
%3 = xor i32 %2, %div20
100+
%and25 = and i32 %0, 1
101+
%not26 = xor i32 %and25, 1
102+
br label %ah
103+
104+
ah:
105+
%.us-phi4154 = phi i32 [ 0, %ah ], [ %.us-phi415662, %if.end14 ]
106+
%.us-phi52 = phi i32 [ 0, %ah ], [ %.us-phi51642, %if.end14 ]
107+
%b.2 = phi i32 [ %b.0.ph, %ah ], [ %b.1, %if.end14 ]
108+
%a.2 = phi i32 [ 0, %ah ], [ %a.1, %if.end14 ]
109+
%l.1 = phi i32 [ %0, %ah ], [ %not, %if.end14 ]
110+
%p16.1 = phi i32 [ 0, %ah ], [ %not18, %if.end14 ]
111+
%q.1 = phi i32 [ 0, %ah ], [ %3, %if.end14 ]
112+
%r23.1 = phi i32 [ 0, %ah ], [ %not26, %if.end14 ]
113+
%add = add i32 %q.1, %l.1
114+
%4 = or i32 %add, %p16.1
115+
%or27 = or i32 %4, %r23.1
116+
store i32 %or27, ptr %c, align 4
117+
br i1 %tobool4.not, label %while.cond.preheader, label %ah
118+
119+
while.cond.preheader:
120+
%call69 = tail call i32 @s(i32 %.us-phi4154)
121+
br label %if.end
122+
}
123+
124+
declare i32 @s(i32)

0 commit comments

Comments
 (0)