Skip to content

Commit f96f35f

Browse files
authored
merge main into amd-staging (#723)
2 parents bea23cf + fc97e06 commit f96f35f

30 files changed

+1526
-146
lines changed

lld/ELF/SyntheticSections.cpp

Lines changed: 70 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -540,43 +540,6 @@ void EhFrameSection::finalizeContents() {
540540
this->size = off;
541541
}
542542

543-
// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table
544-
// to get an FDE from an address to which FDE is applied. This function
545-
// returns a list of such pairs.
546-
SmallVector<EhFrameSection::FdeData, 0> EhFrameSection::getFdeData() const {
547-
uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff;
548-
SmallVector<FdeData, 0> ret;
549-
550-
uint64_t va = getPartition(ctx).ehFrameHdr->getVA();
551-
for (CieRecord *rec : cieRecords) {
552-
uint8_t enc = getFdeEncoding(rec->cie);
553-
for (EhSectionPiece *fde : rec->fdes) {
554-
uint64_t pc = getFdePc(buf, fde->outputOff, enc);
555-
uint64_t fdeVA = getParent()->addr + fde->outputOff;
556-
if (!isInt<32>(pc - va)) {
557-
Err(ctx) << fde->sec << ": PC offset is too large: 0x"
558-
<< Twine::utohexstr(pc - va);
559-
continue;
560-
}
561-
ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)});
562-
}
563-
}
564-
565-
// Sort the FDE list by their PC and uniqueify. Usually there is only
566-
// one FDE for a PC (i.e. function), but if ICF merges two functions
567-
// into one, there can be more than one FDEs pointing to the address.
568-
auto less = [](const FdeData &a, const FdeData &b) {
569-
return a.pcRel < b.pcRel;
570-
};
571-
llvm::stable_sort(ret, less);
572-
auto eq = [](const FdeData &a, const FdeData &b) {
573-
return a.pcRel == b.pcRel;
574-
};
575-
ret.erase(llvm::unique(ret, eq), ret.end());
576-
577-
return ret;
578-
}
579-
580543
static uint64_t readFdeAddr(Ctx &ctx, uint8_t *buf, int size) {
581544
switch (size) {
582545
case DW_EH_PE_udata2:
@@ -630,14 +593,79 @@ void EhFrameSection::writeTo(uint8_t *buf) {
630593
}
631594
}
632595

633-
// Apply relocations. .eh_frame section contents are not contiguous
634-
// in the output buffer, but relocateAlloc() still works because
635-
// getOffset() takes care of discontiguous section pieces.
596+
// Apply relocations to .eh_frame entries. This includes CIE personality
597+
// pointers, FDE initial_location fields, and LSDA pointers.
636598
for (EhInputSection *s : sections)
637599
ctx.target->relocateEh(*s, buf);
638600

639-
if (getPartition(ctx).ehFrameHdr && getPartition(ctx).ehFrameHdr->getParent())
640-
getPartition(ctx).ehFrameHdr->write();
601+
EhFrameHeader *hdr = getPartition(ctx).ehFrameHdr.get();
602+
if (!hdr || !hdr->getParent())
603+
return;
604+
605+
// Write the .eh_frame_hdr section, which contains a binary search table of
606+
// pointers to FDEs. This must be written after .eh_frame relocation since
607+
// the content depends on relocated initial_location fields in FDEs.
608+
using FdeData = EhFrameSection::FdeData;
609+
SmallVector<FdeData, 0> fdes;
610+
uint64_t va = hdr->getVA();
611+
for (CieRecord *rec : cieRecords) {
612+
uint8_t enc = getFdeEncoding(rec->cie);
613+
for (EhSectionPiece *fde : rec->fdes) {
614+
uint64_t pc = getFdePc(buf, fde->outputOff, enc);
615+
uint64_t fdeVA = getParent()->addr + fde->outputOff;
616+
if (!isInt<32>(pc - va)) {
617+
Err(ctx) << fde->sec << ": PC offset is too large: 0x"
618+
<< Twine::utohexstr(pc - va);
619+
continue;
620+
}
621+
fdes.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)});
622+
}
623+
}
624+
625+
// Sort the FDE list by their PC and uniqueify. Usually there is only
626+
// one FDE for a PC (i.e. function), but if ICF merges two functions
627+
// into one, there can be more than one FDEs pointing to the address.
628+
llvm::stable_sort(fdes, [](const FdeData &a, const FdeData &b) {
629+
return a.pcRel < b.pcRel;
630+
});
631+
fdes.erase(
632+
llvm::unique(fdes, [](auto &a, auto &b) { return a.pcRel == b.pcRel; }),
633+
fdes.end());
634+
635+
// Write header.
636+
uint8_t *hdrBuf = ctx.bufferStart + hdr->getParent()->offset + hdr->outSecOff;
637+
hdrBuf[0] = 1; // version
638+
hdrBuf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; // eh_frame_ptr_enc
639+
hdrBuf[2] = DW_EH_PE_udata4; // fde_count_enc
640+
hdrBuf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; // table_enc
641+
write32(ctx, hdrBuf + 4,
642+
getParent()->addr - hdr->getVA() - 4); // eh_frame_ptr
643+
write32(ctx, hdrBuf + 8, fdes.size()); // fde_count
644+
hdrBuf += 12;
645+
646+
// Write binary search table. Each entry describes the starting PC and the FDE
647+
// address.
648+
for (FdeData &fde : fdes) {
649+
write32(ctx, hdrBuf, fde.pcRel);
650+
write32(ctx, hdrBuf + 4, fde.fdeVARel);
651+
hdrBuf += 8;
652+
}
653+
}
654+
655+
EhFrameHeader::EhFrameHeader(Ctx &ctx)
656+
: SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {}
657+
658+
void EhFrameHeader::writeTo(uint8_t *buf) {
659+
// The section content is written during EhFrameSection::writeTo.
660+
}
661+
662+
size_t EhFrameHeader::getSize() const {
663+
// .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
664+
return 12 + getPartition(ctx).ehFrame->numFdes * 8;
665+
}
666+
667+
bool EhFrameHeader::isNeeded() const {
668+
return isLive() && getPartition(ctx).ehFrame->isNeeded();
641669
}
642670

643671
GotSection::GotSection(Ctx &ctx)
@@ -3658,51 +3686,6 @@ void GdbIndexSection::writeTo(uint8_t *buf) {
36583686

36593687
bool GdbIndexSection::isNeeded() const { return !chunks.empty(); }
36603688

3661-
EhFrameHeader::EhFrameHeader(Ctx &ctx)
3662-
: SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {}
3663-
3664-
void EhFrameHeader::writeTo(uint8_t *buf) {
3665-
// Unlike most sections, the EhFrameHeader section is written while writing
3666-
// another section, namely EhFrameSection, which calls the write() function
3667-
// below from its writeTo() function. This is necessary because the contents
3668-
// of EhFrameHeader depend on the relocated contents of EhFrameSection and we
3669-
// don't know which order the sections will be written in.
3670-
}
3671-
3672-
// .eh_frame_hdr contains a binary search table of pointers to FDEs.
3673-
// Each entry of the search table consists of two values,
3674-
// the starting PC from where FDEs covers, and the FDE's address.
3675-
// It is sorted by PC.
3676-
void EhFrameHeader::write() {
3677-
uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff;
3678-
using FdeData = EhFrameSection::FdeData;
3679-
SmallVector<FdeData, 0> fdes = getPartition(ctx).ehFrame->getFdeData();
3680-
3681-
buf[0] = 1;
3682-
buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
3683-
buf[2] = DW_EH_PE_udata4;
3684-
buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
3685-
write32(ctx, buf + 4,
3686-
getPartition(ctx).ehFrame->getParent()->addr - this->getVA() - 4);
3687-
write32(ctx, buf + 8, fdes.size());
3688-
buf += 12;
3689-
3690-
for (FdeData &fde : fdes) {
3691-
write32(ctx, buf, fde.pcRel);
3692-
write32(ctx, buf + 4, fde.fdeVARel);
3693-
buf += 8;
3694-
}
3695-
}
3696-
3697-
size_t EhFrameHeader::getSize() const {
3698-
// .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
3699-
return 12 + getPartition(ctx).ehFrame->numFdes * 8;
3700-
}
3701-
3702-
bool EhFrameHeader::isNeeded() const {
3703-
return isLive() && getPartition(ctx).ehFrame->isNeeded();
3704-
}
3705-
37063689
VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx)
37073690
: SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC,
37083691
sizeof(uint32_t)) {}

lld/ELF/SyntheticSections.h

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ class EhFrameSection final : public SyntheticSection {
6868
uint32_t fdeVARel;
6969
};
7070

71-
SmallVector<FdeData, 0> getFdeData() const;
7271
ArrayRef<CieRecord *> getCieRecords() const { return cieRecords; }
7372
template <class ELFT>
7473
void iterateFDEWithLSDA(llvm::function_ref<void(InputSection &)> fn);
@@ -95,6 +94,17 @@ class EhFrameSection final : public SyntheticSection {
9594
llvm::DenseMap<std::pair<ArrayRef<uint8_t>, Symbol *>, CieRecord *> cieMap;
9695
};
9796

97+
// .eh_frame_hdr contains a binary search table for .eh_frame FDEs. The section
98+
// is covered by a PT_GNU_EH_FRAME segment, which allows the runtime unwinder to
99+
// locate it via functions like `dl_iterate_phdr`.
100+
class EhFrameHeader final : public SyntheticSection {
101+
public:
102+
EhFrameHeader(Ctx &);
103+
void writeTo(uint8_t *buf) override;
104+
size_t getSize() const override;
105+
bool isNeeded() const override;
106+
};
107+
98108
class GotSection final : public SyntheticSection {
99109
public:
100110
GotSection(Ctx &);
@@ -967,24 +977,6 @@ class GdbIndexSection final : public SyntheticSection {
967977
size_t size;
968978
};
969979

970-
// --eh-frame-hdr option tells linker to construct a header for all the
971-
// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
972-
// and also to a PT_GNU_EH_FRAME segment.
973-
// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
974-
// calling dl_iterate_phdr.
975-
// This section contains a lookup table for quick binary search of FDEs.
976-
// Detailed info about internals can be found in Ian Lance Taylor's blog:
977-
// http://www.airs.com/blog/archives/460 (".eh_frame")
978-
// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
979-
class EhFrameHeader final : public SyntheticSection {
980-
public:
981-
EhFrameHeader(Ctx &);
982-
void write();
983-
void writeTo(uint8_t *buf) override;
984-
size_t getSize() const override;
985-
bool isNeeded() const override;
986-
};
987-
988980
// For more information about .gnu.version and .gnu.version_r see:
989981
// https://www.akkadia.org/drepper/symbol-versioning
990982

llvm/docs/SPIRVUsage.rst

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ Static Compiler Commands
3030
Description: This command compiles an LLVM IL file (`input.ll`) to a SPIR-V binary (`output.spvt`) for a 32-bit architecture.
3131

3232
2. **Compilation with Extensions and Optimization**
33-
Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers input.ll -o output.spvt`
34-
Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_INTEL_arbitrary_precision_integers extension.
33+
Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers input.ll -o output.spvt`
34+
Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_ALTERA_arbitrary_precision_integers extension.
3535

3636
3. **Compilation with experimental NonSemantic.Shader.DebugInfo.100 support**
3737
Command: `llc --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info input.ll -o output.spvt`
@@ -136,7 +136,7 @@ extensions to enable or disable, each prefixed with ``+`` or ``-``, respectively
136136

137137
To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use:
138138

139-
``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers``
139+
``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_ALTERA_arbitrary_precision_integers``
140140

141141
To enable all extensions, use the following option:
142142
``-spirv-ext=all``
@@ -145,7 +145,7 @@ To enable all KHR extensions, use the following option:
145145
``-spirv-ext=khr``
146146

147147
To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example:
148-
``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers``
148+
``-spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers``
149149

150150
Below is a list of supported SPIR-V extensions, sorted alphabetically by their extension names:
151151

@@ -171,7 +171,7 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
171171
- Extends the SPV_EXT_shader_atomic_float_add and SPV_EXT_shader_atomic_float_min_max to support addition, minimum and maximum on 16-bit `bfloat16` floating-point numbers in memory.
172172
* - ``SPV_INTEL_2d_block_io``
173173
- Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory.
174-
* - ``SPV_INTEL_arbitrary_precision_integers``
174+
* - ``SPV_ALTERA_arbitrary_precision_integers``
175175
- Allows generating arbitrary width integer types.
176176
* - ``SPV_INTEL_bindless_images``
177177
- Adds instructions to convert convert unsigned integer handles to images, samplers and sampled images.
@@ -245,6 +245,9 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
245245
- Adds execution mode and capability to enable maximal reconvergence.
246246
* - ``SPV_ALTERA_blocking_pipes``
247247
- Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions.
248+
* - ``SPV_ALTERA_arbitrary_precision_fixed_point``
249+
- Add instructions for fixed point arithmetic. The extension works without SPV_ALTERA_arbitrary_precision_integers, but together they allow greater flexibility in representing arbitrary precision data types.
250+
248251

249252
SPIR-V representation in LLVM IR
250253
================================

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9584,6 +9584,50 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
95849584
if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
95859585
return V;
95869586

9587+
// When there is no cost for GPR <-> FPR, we can use zicond select for
9588+
// floating value when CondV is int type
9589+
bool FPinGPR = Subtarget.hasStdExtZfinx();
9590+
9591+
// We can handle FGPR without spliting into hi/lo parts
9592+
bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
9593+
Subtarget.getXLenVT().getSizeInBits());
9594+
9595+
bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
9596+
VT.isFloatingPoint() && FitsInGPR;
9597+
9598+
if (UseZicondForFPSel) {
9599+
9600+
auto CastToInt = [&](SDValue V) -> SDValue {
9601+
// Treat +0.0 as int 0 to enable single 'czero' instruction generation.
9602+
if (isNullFPConstant(V))
9603+
return DAG.getConstant(0, DL, XLenVT);
9604+
9605+
if (VT == MVT::f16)
9606+
return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);
9607+
9608+
if (VT == MVT::f32 && Subtarget.is64Bit())
9609+
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);
9610+
9611+
return DAG.getBitcast(XLenVT, V);
9612+
};
9613+
9614+
SDValue TrueVInt = CastToInt(TrueV);
9615+
SDValue FalseVInt = CastToInt(FalseV);
9616+
9617+
// Emit integer SELECT (lowers to Zicond)
9618+
SDValue ResultInt =
9619+
DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9620+
9621+
// Convert back to floating VT
9622+
if (VT == MVT::f32 && Subtarget.is64Bit())
9623+
return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);
9624+
9625+
if (VT == MVT::f16)
9626+
return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);
9627+
9628+
return DAG.getBitcast(VT, ResultInt);
9629+
}
9630+
95879631
// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
95889632
// nodes to implement the SELECT. Performing the lowering here allows for
95899633
// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
@@ -10699,7 +10743,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1069910743
VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
1070010744
return SDValue();
1070110745
SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
10702-
unsigned ElemWidth = EltVT.getSizeInBits();
10746+
unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
1070310747
SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
1070410748
DAG.getConstant(ElemWidth, DL, XLenVT));
1070510749
return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);

0 commit comments

Comments
 (0)