Skip to content

Commit bb59052

Browse files
author
git apple-llvm automerger
committed
Merge commit 'f7e652127772' from llvm.org/main into next
2 parents dd5853d + f7e6521 commit bb59052

File tree

11 files changed

+2461
-95
lines changed

11 files changed

+2461
-95
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Helper function to find out whether the assembler supports a particular
2+
# command-line flag. You'd like to use the standard check_compiler_flag(), but
3+
# that only supports a fixed list of languages, and ASM isn't one of them. So
4+
# we do it ourselves, by trying to assemble an empty source file.
5+
6+
function(check_assembler_flag outvar flag)
7+
if(NOT DEFINED "${outvar}")
8+
if(NOT CMAKE_REQUIRED_QUIET)
9+
message(CHECK_START "Checking for assembler flag ${flag}")
10+
endif()
11+
12+
# Stop try_compile from attempting to link the result of the assembly, so
13+
# that we don't depend on having a working linker, and also don't have to
14+
# figure out what special symbol like _start needs to be defined in the
15+
# test input.
16+
#
17+
# This change is made within the dynamic scope of this function, so
18+
# CMAKE_TRY_COMPILE_TARGET_TYPE will be restored to its previous value on
19+
# return.
20+
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
21+
22+
# Try to assemble an empty file with a .S name, using the provided flag.
23+
try_compile(success
24+
SOURCE_FROM_CONTENT "CheckAssemblerFlag.s" ""
25+
COMPILE_DEFINITIONS ${flag}
26+
NO_CACHE)
27+
28+
if(NOT CMAKE_REQUIRED_QUIET)
29+
if(success)
30+
message(CHECK_PASS "Accepted")
31+
set(${outvar} 1 CACHE INTERNAL "Test assembler flag ${flag}")
32+
else()
33+
message(CHECK_FAIL "Not accepted")
34+
set(${outvar} "" CACHE INTERNAL "Test assembler flag ${flag}")
35+
endif()
36+
endif()
37+
endif()
38+
endfunction()

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ endif()
6060
include(builtin-config-ix)
6161
include(CMakeDependentOption)
6262
include(CMakePushCheckState)
63+
include(CheckAssemblerFlag)
6364

6465
option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS
6566
"Do not export any symbols from the static library." ON)
@@ -423,6 +424,40 @@ set(arm_or_thumb2_base_SOURCES
423424
${GENERIC_SOURCES}
424425
)
425426

427+
option(COMPILER_RT_ARM_OPTIMIZED_FP
428+
"On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON)
429+
430+
if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
431+
check_assembler_flag(COMPILER_RT_HAS_MIMPLICIT_IT -mimplicit-it=always)
432+
if(COMPILER_RT_HAS_MIMPLICIT_IT)
433+
set(implicit_it_flag -mimplicit-it=always)
434+
else()
435+
check_assembler_flag(
436+
COMPILER_RT_HAS_WA_MIMPLICIT_IT -Wa,-mimplicit-it=always)
437+
if(COMPILER_RT_HAS_WA_MIMPLICIT_IT)
438+
set(implicit_it_flag -Wa,-mimplicit-it=always)
439+
else()
440+
message(WARNING "Don't know how to set the -mimplicit-it=always flag in this assembler; not including Arm optimized implementations")
441+
set(implicit_it_flag "")
442+
endif()
443+
endif()
444+
445+
if(implicit_it_flag)
446+
set(assembly_files
447+
arm/mulsf3.S
448+
arm/divsf3.S)
449+
set_source_files_properties(${assembly_files}
450+
PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
451+
set(arm_or_thumb2_base_SOURCES
452+
${assembly_files}
453+
arm/fnan2.c
454+
arm/fnorm2.c
455+
arm/funder.c
456+
${arm_or_thumb2_base_SOURCES}
457+
)
458+
endif()
459+
endif()
460+
426461
set(arm_sync_SOURCES
427462
arm/sync_fetch_and_add_4.S
428463
arm/sync_fetch_and_add_8.S
@@ -456,6 +491,16 @@ set(thumb1_base_SOURCES
456491
${GENERIC_SOURCES}
457492
)
458493

494+
if(COMPILER_RT_ARM_OPTIMIZED_FP)
495+
set(thumb1_base_SOURCES
496+
arm/thumb1/mulsf3.S
497+
arm/fnan2.c
498+
arm/fnorm2.c
499+
arm/funder.c
500+
${thumb1_base_SOURCES}
501+
)
502+
endif()
503+
459504
set(arm_EABI_RT_SOURCES
460505
arm/aeabi_cdcmp.S
461506
arm/aeabi_cdcmpeq_check_nan.c

compiler-rt/lib/builtins/arm/divsf3.S

Lines changed: 608 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
//===-- fnan2.c - Handle single-precision NaN inputs to binary operation --===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This helper function is available for use by single-precision float
10+
// arithmetic implementations to handle propagating NaNs from the input
11+
// operands to the output, in a way that matches Arm hardware FP.
12+
//
13+
// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
14+
// least one of them must be a NaN. The return value is the correct output NaN.
15+
//
16+
// A signalling NaN in the input (with bit 22 clear) takes priority over any
17+
// quiet NaN, and is adjusted on return by setting bit 22 to make it quiet. If
18+
// both inputs are the same type of NaN then the first input takes priority:
19+
// the input a is used instead of b.
20+
//
21+
//===----------------------------------------------------------------------===//
22+
23+
#include <stdint.h>
24+
25+
uint32_t __compiler_rt_fnan2(uint32_t a, uint32_t b) {
26+
// Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
27+
// the bit position where the quiet vs signalling bit ended up. This squashes
28+
// all the signalling NaNs to the top of the range of 32-bit values, from
29+
// 0xff800001 to 0xffffffff inclusive; meanwhile, all the quiet NaN values
30+
// wrap round to the bottom, from 0 to 0x007fffff inclusive. So we can detect
31+
// a signalling NaN by asking if it's greater than 0xff800000, and a quiet
32+
// one by asking if it's less than 0x00800000.
33+
uint32_t aadj = (a << 1) + 0x00800000;
34+
uint32_t badj = (b << 1) + 0x00800000;
35+
if (aadj > 0xff800000) // a is a signalling NaN?
36+
return a | 0x00400000; // if so, return it with the quiet bit set
37+
if (badj > 0xff800000) // b is a signalling NaN?
38+
return b | 0x00400000; // if so, return it with the quiet bit set
39+
if (aadj < 0x00800000) // a is a quiet NaN?
40+
return a; // if so, return it
41+
return b; // otherwise we expect b must be a quiet NaN
42+
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
//===-- fnorm2.c - Handle single-precision denormal inputs to binary op ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This helper function is available for use by single-precision float
10+
// arithmetic implementations, to handle denormal inputs on entry by
11+
// renormalizing the mantissa and modifying the exponent to match.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include <stdint.h>
16+
17+
// Structure containing the function's inputs and outputs.
18+
//
19+
// On entry: a, b are two input floating-point numbers, still in IEEE 754
20+
// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
21+
// and shifted down to the low 8 bits of the word, with no other change.
22+
// Neither value should be zero, or have the maximum exponent (indicating an
23+
// infinity or NaN).
24+
//
25+
// On exit: each of a and b contains the mantissa of the input value, with the
26+
// leading 1 bit made explicit, and shifted up to the top of the word. If expa
27+
// was zero (indicating that a was denormal) then it is now represented as a
28+
// normalized number with an out-of-range exponent (zero or negative). The same
29+
// applies to expb and b.
30+
struct fnorm2 {
31+
uint32_t a, b, expa, expb;
32+
};
33+
34+
void __compiler_rt_fnorm2(struct fnorm2 *values) {
35+
// Shift the mantissas of a and b to the right place to follow a leading 1 in
36+
// the top bit, if there is one.
37+
values->a <<= 8;
38+
values->b <<= 8;
39+
40+
// Test if a is denormal.
41+
if (values->expa == 0) {
42+
// If so, decide how much further up to shift its mantissa, and adjust its
43+
// exponent to match. This brings the leading 1 of the denormal mantissa to
44+
// the top of values->a.
45+
uint32_t shift = __builtin_clz(values->a);
46+
values->a <<= shift;
47+
values->expa = 1 - shift;
48+
} else {
49+
// Otherwise, leave the mantissa of a in its current position, and OR in
50+
// the explicit leading 1.
51+
values->a |= 0x80000000;
52+
}
53+
54+
// Do the same operation on b.
55+
if (values->expb == 0) {
56+
uint32_t shift = __builtin_clz(values->b);
57+
values->b <<= shift;
58+
values->expb = 1 - shift;
59+
} else {
60+
values->b |= 0x80000000;
61+
}
62+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
//===-- funder.c - Handle single-precision floating-point underflow -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This helper function is available for use by single-precision float
10+
// arithmetic implementations to handle underflowed output values, if they were
11+
// computed in the form of a normalized mantissa and an out-of-range exponent.
12+
//
13+
// On input: x should be a complete IEEE 754 floating-point value representing
14+
// the desired output scaled up by 2^192 (the same value that would have been
15+
// passed to an underflow trap handler in IEEE 754:1985).
16+
//
17+
// This isn't enough information to re-round to the correct output denormal
18+
// without also knowing whether x itself has already been rounded, and which
19+
// way. 'errsign' gives this information, by indicating the sign of the value
20+
// (true result - x). That is, if errsign > 0 it means the true value was
21+
// larger (x was rounded down); if errsign < 0 then x was rounded up; if
22+
// errsign == 0 then x represents the _exact_ desired output value.
23+
//
24+
//===----------------------------------------------------------------------===//
25+
26+
#include <stdint.h>
27+
28+
#define SIGNBIT 0x80000000
29+
#define MANTSIZE 23
30+
#define BIAS 0xc0
31+
32+
uint32_t __compiler_rt_funder(uint32_t x, uint32_t errsign) {
33+
uint32_t sign = x & SIGNBIT;
34+
uint32_t exponent = (x << 1) >> 24;
35+
36+
// Rule out exponents so small (or large!) that no denormalisation
37+
// is needed.
38+
if (exponent > BIAS) {
39+
// Exponent 0xc1 or above means a normalised number got here by
40+
// mistake, so we just remove the 0xc0 exponent bias and go
41+
// straight home.
42+
return x - (BIAS << MANTSIZE);
43+
}
44+
uint32_t bits_lost = BIAS + 1 - exponent;
45+
if (bits_lost > MANTSIZE + 1) {
46+
// The implicit leading 1 of the intermediate value's mantissa is
47+
// below the lowest mantissa bit of a denormal by at least 2 bits.
48+
// Round down to 0 unconditionally.
49+
return sign;
50+
}
51+
52+
// Make the full mantissa (with leading bit) at the top of the word.
53+
uint32_t mantissa = 0x80000000 | (x << 8);
54+
// Adjust by 1 depending on the sign of the error.
55+
mantissa -= errsign >> 31;
56+
mantissa += (-errsign) >> 31;
57+
58+
// Shift down to the output position, keeping the bits shifted off.
59+
uint32_t outmant, shifted_off;
60+
if (bits_lost == MANTSIZE + 1) {
61+
// Special case for the exponent where we have to shift the whole
62+
// of 'mantissa' off the bottom of the word.
63+
outmant = 0;
64+
shifted_off = mantissa;
65+
} else {
66+
outmant = mantissa >> (8 + bits_lost);
67+
shifted_off = mantissa << (32 - (8 + bits_lost));
68+
}
69+
70+
// Re-round.
71+
if (shifted_off >> 31) {
72+
outmant++;
73+
if (!(shifted_off << 1))
74+
outmant &= ~1; // halfway case: round to even
75+
}
76+
77+
return sign | outmant;
78+
}

0 commit comments

Comments
 (0)