From c1f697c0ee1997480d7919448f954c6d3217988f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 11:36:06 +0100 Subject: [PATCH] GPU Math: Add memcpy and QuietNaN, fix Finite and IsNaN --- GPU/Common/GPUCommonMath.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index ef837658f74d1..5a813b74ed7b6 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -25,6 +25,8 @@ #include #include #include +#include +#include #endif #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__)) @@ -83,9 +85,14 @@ class GPUCommonMath GPUd() static float Modf(float x, float y); GPUd() static bool Finite(float x); GPUd() static bool IsNaN(float x); + GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static float QuietNaN(); GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); + GPUd() static void memcpy(void* dst, const void* src, size_t size); + GPUhdni() static float Hypot(float x, float y); GPUhdni() static float Hypot(float x, float y, float z); GPUhdni() static float Hypot(float x, float y, float z, float w); @@ -181,6 +188,23 @@ typedef GPUCommonMath CAMath; #define CHOICE(c1, c2, c3) (c1) // Select first option for Host #endif // clang-format on +GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size) +{ +#ifndef GPUCA_GPUCODE_DEVICE + std::memcpy(dst, src, size); +#elif defined(__CUDACC__) || defined(__HIPCC__) + ::memcpy(dst, src, size); +#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + __builtin_memcpy(dst, src, size); +#else + char* d = (char*)dst; + const char* s = (const char*)src; + for (size_t i = 0; i < size; i++) { + d[i] = s[i]; + } +#endif +} + template GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val) { @@ -222,11 +246,12 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); } +GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } +GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), isnan(x)); } +GPUdi() float GPUCommonMath::QuietNaN() { return CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } #ifdef GPUCA_NO_FAST_MATH GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } // Fixme: fix these 2 for OpenCL -GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), false); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } @@ -238,10 +263,11 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } @@ -253,6 +279,8 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), a GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c)