Skip to content

Commit b617908

Browse files
authored
merge main into amd-staging (#701)
passed https://compiler-ci.amd.com/blue/organizations/jenkins/compiler-psdb-amd-staging/detail/compiler-psdb-amd-staging/3005/pipeline/722/ which failed trying to land PR , since approval missing
2 parents 2520866 + 8ccc861 commit b617908

File tree

163 files changed

+4989
-3019
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

163 files changed

+4989
-3019
lines changed

bolt/lib/Passes/Inliner.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,32 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
491491
}
492492
}
493493

494+
// AArch64 BTI:
495+
// If the callee has an indirect tailcall (BR), we would transform it to
496+
// an indirect call (BLR) in InlineCall. Because of this, we would have to
497+
// update the BTI at the target of the tailcall. However, these targets
498+
// are not known. Instead, we skip inlining blocks with indirect
499+
// tailcalls.
500+
auto HasIndirectTailCall = [&](const BinaryFunction &BF) -> bool {
501+
for (const auto &BB : BF) {
502+
for (const auto &II : BB) {
503+
if (BC.MIB->isIndirectBranch(II) && BC.MIB->isTailCall(II)) {
504+
return true;
505+
}
506+
}
507+
}
508+
return false;
509+
};
510+
511+
if (BC.isAArch64() && BC.usesBTI() &&
512+
HasIndirectTailCall(*TargetFunction)) {
513+
++InstIt;
514+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Skipping inlining block with tailcall"
515+
<< " in " << Function << " : " << BB->getName()
516+
<< " to keep BTIs consistent.\n");
517+
continue;
518+
}
519+
494520
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
495521
<< " in " << Function << " : " << BB->getName()
496522
<< ". Count: " << BB->getKnownExecutionCount()

bolt/test/AArch64/inline-bti.s

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
## This test checks that for AArch64 binaries with BTI, we do not inline blocks with indirect tailcalls.
2+
3+
# REQUIRES: system-linux
4+
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
6+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
7+
# RUN: llvm-bolt --inline-all %t.exe -o %t.bolt --debug 2>&1 | FileCheck %s
8+
9+
# For BTI, we should not inline foo.
10+
# CHECK: BOLT-DEBUG: Skipping inlining block with tailcall in _Z3barP1A : .LBB01 to keep BTIs consistent.
11+
# CHECK-NOT: BOLT-INFO: inlined {{[0-9]+}} calls at {{[0-9]+}} call sites in {{[0-9]+}} iteration(s). Change in binary size: {{[0-9]+}} bytes.
12+
13+
.text
14+
.globl _Z3fooP1A
15+
.type _Z3fooP1A,@function
16+
_Z3fooP1A:
17+
ldr x8, [x0]
18+
ldr w0, [x8]
19+
br x30
20+
.size _Z3fooP1A, .-_Z3fooP1A
21+
22+
.globl _Z3barP1A
23+
.type _Z3barP1A,@function
24+
_Z3barP1A:
25+
stp x29, x30, [sp, #-16]!
26+
mov x29, sp
27+
bl _Z3fooP1A
28+
mul w0, w0, w0
29+
ldp x29, x30, [sp], #16
30+
ret
31+
.size _Z3barP1A, .-_Z3barP1A
32+
33+
.globl main
34+
.p2align 2
35+
.type main,@function
36+
main:
37+
mov w0, wzr
38+
ret
39+
.size main, .-main

clang/docs/ClangStaticAnalyzer.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ Clang Static Analyzer
55
The Clang Static Analyzer is a source code analysis tool that finds bugs in C, C++, and Objective-C programs.
66
It implements *path-sensitive*, *inter-procedural analysis* based on *symbolic execution* technique.
77

8-
This is the Static Analyzer documentation page.
8+
The Static Analyzer is a part of Clang; for downloading and installing Clang visit the `LLVM releases page <https://releases.llvm.org/>`_.
99

10-
See the `Official Tool Page <https://clang-analyzer.llvm.org/>`_.
10+
This is the documentation page of the Static Analyzer; there is also an old `Official Tool Page <https://clang-analyzer.llvm.org/>`_ which provides a short overview of features and limitations.
1111

1212
.. toctree::
1313
:caption: Table of Contents

clang/docs/HIPSupport.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,43 @@ Example Usage
376376
basePtr->virtualFunction(); // Allowed since obj is constructed in device code
377377
}
378378

379+
Alias Attribute Support
380+
=======================
381+
382+
Clang supports alias attributes in HIP code, allowing creation of alternative names for functions and variables.
383+
- Aliases work with ``__host__``, ``__device__``, and ``__host__ __device__`` functions and variables.
384+
- The alias attribute uses the syntax ``__attribute__((alias("target_name")))``. Both weak and strong aliases are supported.
385+
- Outside of ``extern "C"``, the alias target must use the mangled name of the aliasee
386+
- The alias is only emitted if the aliasee is emitted on the same side (ie __host__ or __device__), otherwise it is ignored.
387+
388+
Example Usage
389+
-------------
390+
391+
.. code-block:: c++
392+
393+
extern "C" {
394+
// Host function alias
395+
int __HostFunc(void) { return 0; }
396+
int HostFunc(void) __attribute__((weak, alias("__HostFunc")));
397+
398+
// Device function alias
399+
__device__ int __DeviceFunc(void) { return 1; }
400+
__device__ int DeviceFunc(void) __attribute__((weak, alias("__DeviceFunc")));
401+
402+
// Host-device function alias
403+
__host__ __device__ int __BothFunc(void) { return 2; }
404+
__host__ __device__ int BothFunc(void) __attribute__((alias("__BothFunc")));
405+
406+
// Variable alias
407+
int __host_var = 3;
408+
extern int __attribute__((weak, alias("__host_var"))) host_var;
409+
}
410+
// Mangled / overload alias
411+
__host__ __device__ float __Four(float f) { return 2.0f * f; }
412+
__host__ __device__ int Four(void) __attribute__((weak, alias("_Z6__Fourv")));
413+
__host__ __device__ float Four(float f) __attribute__((weak, alias("_Z6__Fourf")));
414+
415+
379416
Host and Device Attributes of Default Destructors
380417
===================================================
381418

clang/docs/analyzer/user-docs.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ Contents:
66
.. toctree::
77
:maxdepth: 2
88

9-
user-docs/Installation
109
user-docs/CommandLineUsage
1110
user-docs/Options
1211
user-docs/UsingWithXCode

clang/docs/analyzer/user-docs/CommandLineUsage.rst

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,19 @@ It is possible, however, to invoke the static analyzer from the command line in
1616
The following tools are used commonly to run the analyzer from the command line.
1717
Both tools are wrapper scripts to drive the analysis and the underlying invocations of the Clang compiler:
1818

19-
1. scan-build_ is an old and simple command line tool that emits static analyzer warnings as HTML files while compiling your project. You can view the analysis results in your web browser.
19+
1. scan-build_ is an old and simple command line tool that emits static analyzer warnings as HTML files while compiling your project. You can view the analysis results in your web browser; the utility script ``scan-view`` can provide a trivial HTTP server that servers these result files.
20+
- Is available as a part of the LLVM project (together with ``scan-view``).
2021
- Useful for individual developers who simply want to view static analysis results at their desk, or in a very simple collaborative environment.
2122
- Works on all major platforms (Windows, Linux, macOS) and is available as a package in many Linux distributions.
2223
- Does not include support for cross-translation-unit analysis.
2324

2425
2. CodeChecker_ is a driver and web server that runs the static analyzer on your projects on demand and maintains a database of issues.
26+
- Open source, but out-of-tree, i.e. not part of the LLVM project.
2527
- Perfect for managing large amounts of thee static analyzer warnings in a collaborative environment.
2628
- Generally much more feature-rich than scan-build.
2729
- Supports incremental analysis: Results can be stored in a database, subsequent analysis runs can be compared to list the newly added defects.
2830
- :doc:`CrossTranslationUnit` is supported fully on Linux via CodeChecker.
29-
- Can run clang-tidy checkers too.
30-
- Open source, but out-of-tree, i.e. not part of the LLVM project.
31+
- Can also run clang-tidy checks and various other analysis tools.
3132

3233
scan-build
3334
----------
Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,6 @@
1+
:orphan:
2+
13
Obtaining the Static Analyzer
24
=============================
35

4-
This page describes how to download and install the analyzer. Once the analyzer is installed, follow the :doc:`CommandLineUsage` on using the command line to get started analyzing your code.
5-
6-
.. contents::
7-
:local:
8-
9-
10-
Building the Analyzer from Source
11-
---------------------------------
12-
13-
Currently there are no officially supported binary distributions for the static analyzer.
14-
You must build Clang and LLVM manually.
15-
To do so, please follow the instructions for `building Clang from source code <https://clang.llvm.org/get_started.html#build>`_.
16-
17-
Once the Clang is built, you need to add the location of the ``clang`` binary and the locations of the command line utilities (`CodeChecker` or ``scan-build`` and ``scan-view``) to you PATH for :doc:`CommandLineUsage`.
18-
19-
[Legacy] Packaged Builds (Mac OS X)
20-
-----------------------------------
21-
22-
Semi-regular pre-built binaries of the analyzer used to be available on Mac OS X. These were built to run on OS X 10.7 and later.
23-
24-
For older builds for MacOS visit https://clang-analyzer.llvm.org/release_notes.html.
25-
26-
Packaged builds for other platforms may eventually be provided, but we need volunteers who are willing to help provide such regular builds. If you wish to help contribute regular builds of the analyzer on other platforms, please get in touch via `LLVM Discourse <https://discourse.llvm.org/>`_.
27-
28-
[Legacy] Using Packaged Builds
29-
------------------------------
30-
31-
To use the legacy package builds, simply unpack it anywhere. If the build archive has the name **``checker-XXX.tar.bz2``** then the archive will expand to a directory called **``checker-XXX``**. You do not need to place this directory or the contents of this directory in any special place. Uninstalling the analyzer is as simple as deleting this directory.
32-
33-
Most of the files in the **``checker-XXX``** directory will be supporting files for the analyzer that you can simply ignore. Most users will only care about two files, which are located at the top of the **``checker-XXX``** directory:
34-
35-
* **scan-build**: ``scan-build`` is the high-level command line utility for running the analyzer
36-
* **scan-view**: ``scan-view`` a companion command line utility to ``scan-build``, ``scan-view`` is used to view analysis results generated by ``scan-build``. There is an option that one can pass to ``scan-build`` to cause ``scan-view`` to run as soon as it the analysis of a build completes
37-
6+
The Static Analyzer can be obtained as a part of Clang; for downloading and installing Clang visit the `LLVM releases page <https://releases.llvm.org/>`_. Once the analyzer is installed, follow the :doc:`CommandLineUsage` on using the command line to get started analyzing your code.

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -210,17 +210,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
210210
def _mm_pause : X86LibBuiltin<"void()">;
211211
}
212212

213-
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
214-
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
215-
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
216-
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
217-
def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
218-
def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
219-
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
220-
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
221-
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
222-
}
223-
224213
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
225214
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
226215
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
@@ -261,6 +250,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
261250
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
262251
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
263252
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;
253+
254+
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
255+
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
256+
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
257+
def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
258+
def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
259+
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
260+
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
261+
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
264262
}
265263

266264
let Features = "sse3", Attributes = [NoThrow] in {
@@ -579,14 +577,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
579577
def psadbw256
580578
: X86Builtin<
581579
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
582-
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
583-
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
584-
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
585-
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
586-
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
587-
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
588-
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
589-
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
590580
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
591581
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
592582
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
@@ -663,6 +653,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
663653

664654
def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
665655
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
656+
657+
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
658+
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
659+
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
660+
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
661+
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
662+
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
663+
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
664+
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
666665
}
667666

668667
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1926,16 +1925,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
19261925
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
19271926
}
19281927

1929-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1930-
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
1931-
}
1932-
19331928
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
19341929
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
19351930
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
19361931
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
19371932
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
19381933
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
1934+
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
19391935
}
19401936

19411937
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -1991,7 +1987,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
19911987
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
19921988
}
19931989

1994-
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1990+
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
19951991
def psraw512
19961992
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
19971993
def psrlw512
@@ -2308,25 +2304,17 @@ let Features = "avx512f",
23082304
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
23092305
}
23102306

2311-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2307+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
23122308
def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
2313-
}
2314-
2315-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2316-
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
2317-
}
2318-
2319-
let Features = "avx512vl",
2320-
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
23212309
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
23222310
}
23232311

2324-
let Features = "avx512vl",
2325-
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
2312+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
2313+
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
23262314
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
23272315
}
23282316

2329-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2317+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
23302318
def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
23312319
def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
23322320
def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;

0 commit comments

Comments
 (0)